Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: gsdl/trunk/trunk/mgpp/lib/unitool.h@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago
Undoing change commited in r16582
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 3.0 KB

Rev	Line
[3365]	1	#ifndef UNITOOL_H
	2	#define UNITOOL_H
	3
	4	/* This module is based on Unicode 2.1 */
	5
	6
	7	/* parse_utf8_char parses the next utf-8 character, placing its
	8	* unicode equivalent in *value. The length of the utf-8 character
	9	* is returned. end is the address of the last character. */
	10	int parse_utf8_char (const unsigned char *here,
	11	const unsigned char *end,
	12	unsigned short *value);
	13
	14	/* output_utf8_char encodes a unicode character as a UTF-8 character.
	15	* The length of the encoding is returned. If the string was not
	16	* long enough to encode the character 0 is returned. end is the
	17	* address of the last character */
	18	int output_utf8_char (unsigned short value,
	19	unsigned char *here,
	20	unsigned char *end);
	21
	22	/* decompose_str will decompose a unicode string into its canonical
	23	* equivalents. NULL is returned if the input array was not
	24	* large enough to contain the fully decomposed string (the array
	25	* will be in a correct, but partially decomposed state). The input
	26	* must be null-terminated. */
	27	unsigned short decompose_str (unsigned short input,
	28	int max_output_len);
	29
	30	/* tests to see whether 'value' is a valid Unicode letter */
	31	int is_unicode_letter (unsigned short value);
	32
	33	/* tests to see whether 'value' is a valid Unicode digit */
	34	int is_unicode_digit (unsigned short value);
	35
	36	/* tests to see whether 'value' is a valid Unicode letter or
	37	* digit */
	38	int is_unicode_letdig (unsigned short value);
	39
	40	/* tests to see whether 'value' is a valid space
	41	* The test includes both "C" spaces and "Unicode" spaces, i.e.
	42	* form-feed, newline, carriage return, horizontal tab,
	43	* vertical tab, and the Zs, Zl, and Zp Unicode categorizations */
	44	int is_unicode_space (unsigned short value);
	45
	46	/* returns the length of the unicode string */
	47	int unicode_strlen (const unsigned short *str);
	48
	49	/* returns the length of the unicode string, up to a maximum */
	50	int unicode_strnlen (const unsigned short *str, int max_length);
	51
	52	/* returns the upper-case equivalent of value */
	53	unsigned short unicode_toupper (unsigned short value);
	54
	55	/* returns the lower-case equivalent of value */
	56	unsigned short unicode_tolower (unsigned short value);
	57
	58	/* returns the simplified Chinese character equivalent of
	59	* another Chinese character */
	60	unsigned short unicode_tosimplified (unsigned short value);
	61
	62
	63	/* converts a utf-8 word (string with length stored in the first byte
	64	* to a Unicode array. To handle all situations the output buffer should
	65	* be 256 unsigned shorts long. The output will also have the length as
	66	* the first entry. */
	67	unsigned short utf8_word_to_unicode (const unsigned char input,
	68	unsigned short *output,
	69	int max_output_length);
	70
	71	/* converts a unicode word buffer (with the length stored in the
	72	* entry) to a utf8 encoded word output (with the length stored in
	73	* the first byte. Only 255 bytes (not characters) can be stored
	74	* in the output. */
	75	unsigned char unicode_to_utf8_word (const unsigned short input,
	76	unsigned char *output,
	77	int max_output_length);
	78
	79
	80	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: