Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gsdl/trunk/trunk/mg/lib/unitool.h@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago
Undoing change commited in r16582
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 3.0 KB

Line
1	#ifndef UNITOOL_H
2	#define UNITOOL_H
3
4	#ifdef __cplusplus
5	extern "C" {
6	#endif
7
8
9	/* This module is based on Unicode 2.1 */
10
11
12	/* parse_utf8_char parses the next utf-8 character, placing its
13	* unicode equivalent in *value. The length of the utf-8 character
14	* is returned. end is the address of the last character. */
15	int parse_utf8_char (const unsigned char *here,
16	const unsigned char *end,
17	unsigned short *value);
18
19	/* output_utf8_char encodes a unicode character as a UTF-8 character.
20	* The length of the encoding is returned. If the string was not
21	* long enough to encode the character 0 is returned. end is the
22	* address of the last character */
23	int output_utf8_char (unsigned short value,
24	unsigned char *here,
25	unsigned char *end);
26
27	/* decompose_str will decompose a unicode string into its canonical
28	* equivalents. NULL is returned if the input array was not
29	* large enough to contain the fully decomposed string (the array
30	* will be in a correct, but partially decomposed state). The input
31	* must be null-terminated. */
32	unsigned short decompose_str (unsigned short input,
33	int max_output_len);
34
35	/* tests to see whether 'value' is a valid Unicode letter */
36	int is_unicode_letter (unsigned short value);
37
38	/* tests to see whether 'value' is a valid Unicode digit */
39	int is_unicode_digit (unsigned short value);
40
41	/* tests to see whether 'value' is a valid Unicode letter or
42	* digit */
43	int is_unicode_letdig (unsigned short value);
44
45	/* tests to see whether 'value' is a valid space
46	* The test includes both "C" spaces and "Unicode" spaces, i.e.
47	* form-feed, newline, carriage return, horizontal tab,
48	* vertical tab, and the Zs, Zl, and Zp Unicode categorizations */
49	int is_unicode_space (unsigned short value);
50
51	/* returns the length of the unicode string */
52	int unicode_strlen (const unsigned short *str);
53
54	/* returns the length of the unicode string, up to a maximum */
55	int unicode_strnlen (const unsigned short *str, int max_length);
56
57	/* returns the upper-case equivalent of value */
58	unsigned short unicode_toupper (unsigned short value);
59
60	/* returns the lower-case equivalent of value */
61	unsigned short unicode_tolower (unsigned short value);
62
63	/* returns the simplified Chinese character equivalent of
64	* another Chinese character */
65	unsigned short unicode_tosimplified (unsigned short value);
66
67
68	/* converts a utf-8 word (string with length stored in the first byte
69	* to a Unicode array. To handle all situations the output buffer should
70	* be 256 unsigned shorts long. The output will also have the length as
71	* the first entry. */
72	unsigned short utf8_word_to_unicode (const unsigned char input,
73	unsigned short *output,
74	int max_output_length);
75
76	/* converts a unicode word buffer (with the length stored in the
77	* entry) to a utf8 encoded word output (with the length stored in
78	* the first byte. Only 255 bytes (not characters) can be stored
79	* in the output. */
80	unsigned char unicode_to_utf8_word (const unsigned short input,
81	unsigned char *output,
82	int max_output_length);
83
84	#ifdef __cplusplus
85	}
86	#endif
87
88	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: