Changeset 18654

Show
Ignore:
Timestamp:
10.03.2009 17:22:40 (10 years ago)
Author:
mdewsnip
Message:

Fix to support the Hawaiian Glottal character, many thanks to John Thompson at DL Consulting Ltd.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • indexers/trunk/packages/unac/unac.c

    r16583 r18654  
    665665 * 02B8 MODIFIER LETTER SMALL Y 
    666666 *  0079 LATIN SMALL LETTER Y 
     667 * 02BB MODIFIER LETTER TURNED COMMA (GLOTTAL) 
     668 *      0000 NOTHING 
    667669 * 02D8 BREVE 
    668670 *  0020 SPACE 
     
    1216812170unsigned short unac_data11[] = { 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F, 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 0x0053, 0x0073, 0x0054, 0x0074, 0xFFFF, 0xFFFF, 0x0048, 0x0068 }; 
    1216912171unsigned short unac_data12[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0041, 0x0061, 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x0059, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 
    12170 unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 
     12172// Greenstone Customisation: Added mapping for Glottal to remove it from 
     12173// string. Glottal character is 5th from end and is set to 0x000 which isn't 
     12174// a valid character anyway. 
     12175unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };  
    1217112176unsigned short unac_data14[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0xFFFF, 0xFFFF }; 
    1217212177unsigned short unac_data15[] = { 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 
     
    1266612671    } 
    1266712672    if(l > 0) { 
    12668       /* 
    12669        * If there is a decomposition, insert it in the output  
    12670        * string. 
    12671        */ 
    12672       for(k = 0; k < l; k++) { 
    12673     out[out_length++] = (p[k] >> 8) & 0xff; 
    12674     out[out_length++] = (p[k] & 0xff); 
     12673      // Greenstone customization: If the decomposition isn't a valid printable 
     12674      // character, then simply ignore the character to remove it from the 
     12675      // string. This is used to fold Glottals and other spacing modifiers into 
     12676      // non-existance. 
     12677      if (l == 1 && *(p) < 0x0020) 
     12678      { 
     12679      } 
     12680      else 
     12681      { 
     12682        /* 
     12683         * If there is a decomposition, insert it in the output  
     12684         * string. 
     12685         */ 
     12686        for(k = 0; k < l; k++) { 
     12687          out[out_length++] = (p[k] >> 8) & 0xff; 
     12688          out[out_length++] = (p[k] & 0xff); 
     12689        } 
    1267512690      } 
    1267612691    } else {