Ignore:
Timestamp:
2009-03-10T17:22:40+13:00 (15 years ago)
Author:
mdewsnip
Message:

Fix to support the Hawaiian Glottal character, many thanks to John Thompson at DL Consulting Ltd.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • indexers/trunk/packages/unac/unac.c

    r16583 r18654  
    665665 * 02B8 MODIFIER LETTER SMALL Y
    666666 *  0079 LATIN SMALL LETTER Y
     667 * 02BB MODIFIER LETTER TURNED COMMA (GLOTTAL)
     668 *      0000 NOTHING
    667669 * 02D8 BREVE
    668670 *  0020 SPACE
     
    1216812170unsigned short unac_data11[] = { 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F, 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 0x0053, 0x0073, 0x0054, 0x0074, 0xFFFF, 0xFFFF, 0x0048, 0x0068 };
    1216912171unsigned short unac_data12[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0041, 0x0061, 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x0059, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
    12170 unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
     12172// Greenstone Customisation: Added mapping for Glottal to remove it from
     12173// string. Glottal character is 5th from end and is set to 0x000 which isn't
     12174// a valid character anyway.
     12175unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
    1217112176unsigned short unac_data14[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0xFFFF, 0xFFFF };
    1217212177unsigned short unac_data15[] = { 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
     
    1266612671    }
    1266712672    if(l > 0) {
    12668       /*
    12669        * If there is a decomposition, insert it in the output
    12670        * string.
    12671        */
    12672       for(k = 0; k < l; k++) {
    12673     out[out_length++] = (p[k] >> 8) & 0xff;
    12674     out[out_length++] = (p[k] & 0xff);
     12673      // Greenstone customization: If the decomposition isn't a valid printable
     12674      // character, then simply ignore the character to remove it from the
     12675      // string. This is used to fold Glottals and other spacing modifiers into
     12676      // non-existance.
     12677      if (l == 1 && *(p) < 0x0020)
     12678      {
     12679      }
     12680      else
     12681      {
     12682        /*
     12683         * If there is a decomposition, insert it in the output
     12684         * string.
     12685         */
     12686        for(k = 0; k < l; k++) {
     12687          out[out_length++] = (p[k] >> 8) & 0xff;
     12688          out[out_length++] = (p[k] & 0xff);
     12689        }
    1267512690      }
    1267612691    } else {
Note: See TracChangeset for help on using the changeset viewer.