source: trunk/indexers/packages/unac/unac.h@ 13670

Last change on this file since 13670 was 13670, checked in by kjdon, 15 years ago

unac package needed for accent folding in mgpp, and possibly for mg in the future

  • Property svn:keywords set to Author Date Id Revision
File size: 12.0 KB
Line 
1/*
2 * Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18
19/*
20 * Provides functions to strip accents from a string in all the
21 * charset supported by iconv(3).
22 *
23 * See the unac(3) manual page for more information.
24 *
25 */
26
27#ifndef _unac_h
28#define _unac_h
29
30#ifdef __cplusplus
31extern "C" {
32#endif
33
34/* Generated by builder. Do not modify. Start defines */
35#define UNAC_BLOCK_SHIFT 5
36#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
37#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
38#define UNAC_BLOCK_COUNT 178
39#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
40/* Generated by builder. Do not modify. End defines */
41
42/*
43 * Return the unaccented equivalent of the UTF-16 character <c>
44 * in the pointer <p>. The length of the unsigned short array pointed
45 * by <p> is returned in the <l> argument.
46 * The C++ prototype of this macro would be:
47 *
48 * void unac_char(const unsigned short c, unsigned short*& p, int& l)
49 *
50 * See unac(3) in IMPLEMENTATION NOTES for more information about the
51 * tables (unac_data_table, unac_positions) layout.
52 */
53#define unac_char_utf16(c,p,l) \
54 { \
55 unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT]; \
56 unsigned char position = (c) & UNAC_BLOCK_MASK; \
57 (p) = &(unac_data_table[index][unac_positions[index][position]]); \
58 (l) = unac_positions[index][position + 1] - unac_positions[index][position]; \
59 if((l) == 1 && *(p) == 0xFFFF) { \
60 (p) = 0; \
61 (l) = 0; \
62 } \
63 }
64
65/*
66 * Return the unaccented equivalent of the UTF-16 string <in> of
67 * length <in_length> in the pointer <out>. The length of the UTF-16
68 * string returned in <out> is stored in <out_length>. If the pointer
69 * *out is null, a new string is allocated using malloc(3). If the
70 * pointer *out is not null, the available length must also be given
71 * in the *out_length argument. The pointer passed to *out must have
72 * been allocated by malloc(3) and may be reallocated by realloc(3) if
73 * needs be. It is the responsibility of the caller to free the
74 * pointer returned in *out. The return value is 0 on success and -1
75 * on error, in which case the errno variable is set to the
76 * corresponding error code.
77 */
78int unac_string_utf16(const char* in, size_t in_length,
79 char** out, size_t* out_length);
80
81/*
82 * The semantic of this function is stricly equal to the function
83 * unac_string_utf16. The <charset> argument applies to the content of the
84 * input string. It is converted to UTF-16 using iconv(3) before calling
85 * the unac_string function and the result is converted from UTF-16 to
86 * the specified <charset> before returning it in the <out> pointer.
87 * For efficiency purpose it is recommended that the caller uses
88 * unac_string and iconv(3) to save buffer allocations overhead.
89 * The return value is 0 on success and -1 on error, in which case
90 * the errno variable is set to the corresponding error code.
91 */
92int unac_string(const char* charset,
93 const char* in, size_t in_length,
94 char** out, size_t* out_length);
95
96/*
97 * Return unac version number.
98 */
99const char* unac_version(void);
100
101#define UNAC_DEBUG_NONE 0x00
102#define UNAC_DEBUG_LOW 0x01
103#define UNAC_DEBUG_HIGH 0x02
104
105#ifdef HAVE_VSNPRINTF
106#define UNAC_DEBUG_AVAILABLE 1
107/*
108 * Set the unac debug level. <l> is one of:
109 * UNAC_DEBUG_NONE for no debug messages at all
110 * UNAC_DEBUG_LOW for minimal information
111 * UNAC_DEBUG_HIGH for extremely verbose information,
112 * only usable when translating a few short strings.
113 *
114 * unac_debug with anything but UNAC_DEBUG_NONE is not
115 * thread safe.
116 */
117#define unac_debug(l) unac_debug_callback((l), 0, (void*)0);
118
119/*
120 * Set the debug level and define a printing function callback.
121 * The <level> debug level is the same as in unac_debug. The
122 * <function> is in charge of dealing with the debug messages,
123 * presumably to print them to the user. The <data> is an opaque
124 * pointer that is passed along to <function>, should it
125 * need to manage a persistent context.
126 *
127 * The prototype of <function> allows two arguments. The first
128 * is the debug message (const char*), the second is the opaque
129 * pointer given as <data> argument to unac_debug_callback.
130 *
131 * If <function> is NULL, messages are printed on the standard
132 * error output using fprintf(stderr...).
133 *
134 * unac_debug_callback with anything but UNAC_DEBUG_NONE is not
135 * thread safe.
136 *
137 */
138typedef void (*unac_debug_print_t)(const char* message, void* data);
139void unac_debug_callback(int level, unac_debug_print_t function, void* data);
140#endif /* HAVE_VSNPRINTF */
141
142/* Generated by builder. Do not modify. Start declarations */
143extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
144extern unsigned char unac_positions[UNAC_BLOCK_COUNT][UNAC_BLOCK_SIZE + 1];
145extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
146extern unsigned short unac_data0[];
147extern unsigned short unac_data1[];
148extern unsigned short unac_data2[];
149extern unsigned short unac_data3[];
150extern unsigned short unac_data4[];
151extern unsigned short unac_data5[];
152extern unsigned short unac_data6[];
153extern unsigned short unac_data7[];
154extern unsigned short unac_data8[];
155extern unsigned short unac_data9[];
156extern unsigned short unac_data10[];
157extern unsigned short unac_data11[];
158extern unsigned short unac_data12[];
159extern unsigned short unac_data13[];
160extern unsigned short unac_data14[];
161extern unsigned short unac_data15[];
162extern unsigned short unac_data16[];
163extern unsigned short unac_data17[];
164extern unsigned short unac_data18[];
165extern unsigned short unac_data19[];
166extern unsigned short unac_data20[];
167extern unsigned short unac_data21[];
168extern unsigned short unac_data22[];
169extern unsigned short unac_data23[];
170extern unsigned short unac_data24[];
171extern unsigned short unac_data25[];
172extern unsigned short unac_data26[];
173extern unsigned short unac_data27[];
174extern unsigned short unac_data28[];
175extern unsigned short unac_data29[];
176extern unsigned short unac_data30[];
177extern unsigned short unac_data31[];
178extern unsigned short unac_data32[];
179extern unsigned short unac_data33[];
180extern unsigned short unac_data34[];
181extern unsigned short unac_data35[];
182extern unsigned short unac_data36[];
183extern unsigned short unac_data37[];
184extern unsigned short unac_data38[];
185extern unsigned short unac_data39[];
186extern unsigned short unac_data40[];
187extern unsigned short unac_data41[];
188extern unsigned short unac_data42[];
189extern unsigned short unac_data43[];
190extern unsigned short unac_data44[];
191extern unsigned short unac_data45[];
192extern unsigned short unac_data46[];
193extern unsigned short unac_data47[];
194extern unsigned short unac_data48[];
195extern unsigned short unac_data49[];
196extern unsigned short unac_data50[];
197extern unsigned short unac_data51[];
198extern unsigned short unac_data52[];
199extern unsigned short unac_data53[];
200extern unsigned short unac_data54[];
201extern unsigned short unac_data55[];
202extern unsigned short unac_data56[];
203extern unsigned short unac_data57[];
204extern unsigned short unac_data58[];
205extern unsigned short unac_data59[];
206extern unsigned short unac_data60[];
207extern unsigned short unac_data61[];
208extern unsigned short unac_data62[];
209extern unsigned short unac_data63[];
210extern unsigned short unac_data64[];
211extern unsigned short unac_data65[];
212extern unsigned short unac_data66[];
213extern unsigned short unac_data67[];
214extern unsigned short unac_data68[];
215extern unsigned short unac_data69[];
216extern unsigned short unac_data70[];
217extern unsigned short unac_data71[];
218extern unsigned short unac_data72[];
219extern unsigned short unac_data73[];
220extern unsigned short unac_data74[];
221extern unsigned short unac_data75[];
222extern unsigned short unac_data76[];
223extern unsigned short unac_data77[];
224extern unsigned short unac_data78[];
225extern unsigned short unac_data79[];
226extern unsigned short unac_data80[];
227extern unsigned short unac_data81[];
228extern unsigned short unac_data82[];
229extern unsigned short unac_data83[];
230extern unsigned short unac_data84[];
231extern unsigned short unac_data85[];
232extern unsigned short unac_data86[];
233extern unsigned short unac_data87[];
234extern unsigned short unac_data88[];
235extern unsigned short unac_data89[];
236extern unsigned short unac_data90[];
237extern unsigned short unac_data91[];
238extern unsigned short unac_data92[];
239extern unsigned short unac_data93[];
240extern unsigned short unac_data94[];
241extern unsigned short unac_data95[];
242extern unsigned short unac_data96[];
243extern unsigned short unac_data97[];
244extern unsigned short unac_data98[];
245extern unsigned short unac_data99[];
246extern unsigned short unac_data100[];
247extern unsigned short unac_data101[];
248extern unsigned short unac_data102[];
249extern unsigned short unac_data103[];
250extern unsigned short unac_data104[];
251extern unsigned short unac_data105[];
252extern unsigned short unac_data106[];
253extern unsigned short unac_data107[];
254extern unsigned short unac_data108[];
255extern unsigned short unac_data109[];
256extern unsigned short unac_data110[];
257extern unsigned short unac_data111[];
258extern unsigned short unac_data112[];
259extern unsigned short unac_data113[];
260extern unsigned short unac_data114[];
261extern unsigned short unac_data115[];
262extern unsigned short unac_data116[];
263extern unsigned short unac_data117[];
264extern unsigned short unac_data118[];
265extern unsigned short unac_data119[];
266extern unsigned short unac_data120[];
267extern unsigned short unac_data121[];
268extern unsigned short unac_data122[];
269extern unsigned short unac_data123[];
270extern unsigned short unac_data124[];
271extern unsigned short unac_data125[];
272extern unsigned short unac_data126[];
273extern unsigned short unac_data127[];
274extern unsigned short unac_data128[];
275extern unsigned short unac_data129[];
276extern unsigned short unac_data130[];
277extern unsigned short unac_data131[];
278extern unsigned short unac_data132[];
279extern unsigned short unac_data133[];
280extern unsigned short unac_data134[];
281extern unsigned short unac_data135[];
282extern unsigned short unac_data136[];
283extern unsigned short unac_data137[];
284extern unsigned short unac_data138[];
285extern unsigned short unac_data139[];
286extern unsigned short unac_data140[];
287extern unsigned short unac_data141[];
288extern unsigned short unac_data142[];
289extern unsigned short unac_data143[];
290extern unsigned short unac_data144[];
291extern unsigned short unac_data145[];
292extern unsigned short unac_data146[];
293extern unsigned short unac_data147[];
294extern unsigned short unac_data148[];
295extern unsigned short unac_data149[];
296extern unsigned short unac_data150[];
297extern unsigned short unac_data151[];
298extern unsigned short unac_data152[];
299extern unsigned short unac_data153[];
300extern unsigned short unac_data154[];
301extern unsigned short unac_data155[];
302extern unsigned short unac_data156[];
303extern unsigned short unac_data157[];
304extern unsigned short unac_data158[];
305extern unsigned short unac_data159[];
306extern unsigned short unac_data160[];
307extern unsigned short unac_data161[];
308extern unsigned short unac_data162[];
309extern unsigned short unac_data163[];
310extern unsigned short unac_data164[];
311extern unsigned short unac_data165[];
312extern unsigned short unac_data166[];
313extern unsigned short unac_data167[];
314extern unsigned short unac_data168[];
315extern unsigned short unac_data169[];
316extern unsigned short unac_data170[];
317extern unsigned short unac_data171[];
318extern unsigned short unac_data172[];
319extern unsigned short unac_data173[];
320extern unsigned short unac_data174[];
321extern unsigned short unac_data175[];
322extern unsigned short unac_data176[];
323extern unsigned short unac_data177[];
324/* Generated by builder. Do not modify. End declarations */
325
326#ifdef __cplusplus
327}
328#endif
329
330#endif /* _unac_h */
Note: See TracBrowser for help on using the repository browser.