Changeset 18654 for indexers/trunk/packages/unac
- Timestamp:
- 2009-03-10T17:22:40+13:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
indexers/trunk/packages/unac/unac.c
r16583 r18654 665 665 * 02B8 MODIFIER LETTER SMALL Y 666 666 * 0079 LATIN SMALL LETTER Y 667 * 02BB MODIFIER LETTER TURNED COMMA (GLOTTAL) 668 * 0000 NOTHING 667 669 * 02D8 BREVE 668 670 * 0020 SPACE … … 12168 12170 unsigned short unac_data11[] = { 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 0x0049, 0x0069, 0x0049, 0x0069, 0x004F, 0x006F, 0x004F, 0x006F, 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 0x0053, 0x0073, 0x0054, 0x0074, 0xFFFF, 0xFFFF, 0x0048, 0x0068 }; 12169 12171 unsigned short unac_data12[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0041, 0x0061, 0x0045, 0x0065, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x004F, 0x006F, 0x0059, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 12170 unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 12172 // Greenstone Customisation: Added mapping for Glottal to remove it from 12173 // string. Glottal character is 5th from end and is set to 0x000 which isn't 12174 // a valid character anyway. 12175 unsigned short unac_data13[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0068, 0x0266, 0x006A, 0x0072, 0x0279, 0x027B, 0x0281, 0x0077, 0x0079, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; 12171 12176 unsigned short unac_data14[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0xFFFF, 0xFFFF }; 12172 12177 unsigned short unac_data15[] = { 0x0263, 0x006C, 0x0073, 0x0078, 0x0295, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; … … 12666 12671 } 12667 12672 if(l > 0) { 12668 /* 12669 * If there is a decomposition, insert it in the output 12670 * string. 12671 */ 12672 for(k = 0; k < l; k++) { 12673 out[out_length++] = (p[k] >> 8) & 0xff; 12674 out[out_length++] = (p[k] & 0xff); 12673 // Greenstone customization: If the decomposition isn't a valid printable 12674 // character, then simply ignore the character to remove it from the 12675 // string. This is used to fold Glottals and other spacing modifiers into 12676 // non-existance. 12677 if (l == 1 && *(p) < 0x0020) 12678 { 12679 } 12680 else 12681 { 12682 /* 12683 * If there is a decomposition, insert it in the output 12684 * string. 12685 */ 12686 for(k = 0; k < l; k++) { 12687 out[out_length++] = (p[k] >> 8) & 0xff; 12688 out[out_length++] = (p[k] & 0xff); 12689 } 12675 12690 } 12676 12691 } else {
Note:
See TracChangeset
for help on using the changeset viewer.