Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gsdl/trunk/trunk/mgpp/lib/unitool.h@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago
Undoing change commited in r16582
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 3.0 KB

Line
1	#ifndef UNITOOL_H
2	#define UNITOOL_H
3
4	/* This module is based on Unicode 2.1 */
5
6
7	/* parse_utf8_char parses the next utf-8 character, placing its
8	* unicode equivalent in *value. The length of the utf-8 character
9	* is returned. end is the address of the last character. */
10	int parse_utf8_char (const unsigned char *here,
11	const unsigned char *end,
12	unsigned short *value);
13
14	/* output_utf8_char encodes a unicode character as a UTF-8 character.
15	* The length of the encoding is returned. If the string was not
16	* long enough to encode the character 0 is returned. end is the
17	* address of the last character */
18	int output_utf8_char (unsigned short value,
19	unsigned char *here,
20	unsigned char *end);
21
22	/* decompose_str will decompose a unicode string into its canonical
23	* equivalents. NULL is returned if the input array was not
24	* large enough to contain the fully decomposed string (the array
25	* will be in a correct, but partially decomposed state). The input
26	* must be null-terminated. */
27	unsigned short decompose_str (unsigned short input,
28	int max_output_len);
29
30	/* tests to see whether 'value' is a valid Unicode letter */
31	int is_unicode_letter (unsigned short value);
32
33	/* tests to see whether 'value' is a valid Unicode digit */
34	int is_unicode_digit (unsigned short value);
35
36	/* tests to see whether 'value' is a valid Unicode letter or
37	* digit */
38	int is_unicode_letdig (unsigned short value);
39
40	/* tests to see whether 'value' is a valid space
41	* The test includes both "C" spaces and "Unicode" spaces, i.e.
42	* form-feed, newline, carriage return, horizontal tab,
43	* vertical tab, and the Zs, Zl, and Zp Unicode categorizations */
44	int is_unicode_space (unsigned short value);
45
46	/* returns the length of the unicode string */
47	int unicode_strlen (const unsigned short *str);
48
49	/* returns the length of the unicode string, up to a maximum */
50	int unicode_strnlen (const unsigned short *str, int max_length);
51
52	/* returns the upper-case equivalent of value */
53	unsigned short unicode_toupper (unsigned short value);
54
55	/* returns the lower-case equivalent of value */
56	unsigned short unicode_tolower (unsigned short value);
57
58	/* returns the simplified Chinese character equivalent of
59	* another Chinese character */
60	unsigned short unicode_tosimplified (unsigned short value);
61
62
63	/* converts a utf-8 word (string with length stored in the first byte
64	* to a Unicode array. To handle all situations the output buffer should
65	* be 256 unsigned shorts long. The output will also have the length as
66	* the first entry. */
67	unsigned short utf8_word_to_unicode (const unsigned char input,
68	unsigned short *output,
69	int max_output_length);
70
71	/* converts a unicode word buffer (with the length stored in the
72	* entry) to a utf8 encoded word output (with the length stored in
73	* the first byte. Only 255 bytes (not characters) can be stored
74	* in the output. */
75	unsigned char unicode_to_utf8_word (const unsigned short input,
76	unsigned char *output,
77	int max_output_length);
78
79
80	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: