source: trunk/gsdl/lib/gsdlunicode.h@ 100

Last change on this file since 100 was 100, checked in by rjmcnab, 25 years ago

Added standard header to source files.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**********************************************************************
2 *
3 * gsdlunicode.h --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: gsdlunicode.h 100 1999-01-08 02:33:16Z rjmcnab $
9 *
10 *********************************************************************/
11
12
13#ifndef GSDLUNICODE_H
14#define GSDLUNICODE_H
15
16#include "text_t.h"
17
18
19// converts a unicode encode text_t string to a utf-8
20// encoded text_t string
21text_t to_utf8 (const text_t &in);
22
23// converts a utf-8 encoded text_t string to a unicode
24// encoded text_t string
25text_t to_uni (const text_t &in);
26
27
28
29#define MAXUTF8CHARLEN 3
30
31// convert from a utf-8 char stream to the text_t class
32class utf8inconvertclass : public inconvertclass {
33public:
34 utf8inconvertclass();
35 void reset ();
36 void convert (text_t &output, status_t &status);
37
38protected:
39 // buffer to hold unconverted characters in a stream
40 unsigned char utf8buf[MAXUTF8CHARLEN];
41 size_t utf8buflen;
42
43 // returns the length that the current contents of the
44 // utf8buf should be
45 size_t getutf8charlen ();
46};
47
48
49// This class provides the option of removing zero width
50// spaces (U+200B) during the output. By default this
51// option is turned off. The functionality is actually
52// implemented by the sub-classes, this class just provides
53// the framework for these classes.
54//
55// Note: by convention reset() should not reset the rzws flag.
56class rzwsoutconvertclass : public outconvertclass {
57public:
58 rzwsoutconvertclass () {rzws = 0;};
59 set_rzws (int new_rzws) {rzws = new_rzws;};
60
61protected:
62 int rzws;
63};
64
65
66// Convert from a text_t class to a utf-8 char stream
67class utf8outconvertclass : public rzwsoutconvertclass {
68public:
69 utf8outconvertclass () {utf8buflen=0; utf8bufhere=0;};
70 void reset ();
71 // note that convert does not null-terminate the
72 // output array of characters
73 void convert (char *output, size_t maxlen,
74 size_t &len, status_t &status);
75
76protected:
77 unsigned char utf8buf[MAXUTF8CHARLEN];
78 size_t utf8buflen;
79 size_t utf8bufhere;
80};
81
82
83// mapdata_t is used by mapconvert to hold the map file data
84class mapdata_t {
85public:
86 mapdata_t();
87 bool loaded;
88 unsigned short *ptrs[256];
89};
90
91// mapconvert is used in situations where conversion is best
92// done using a map file. The mapfile should reside in
93// gsdlhome/unicode.
94class mapconvert {
95public:
96 mapconvert ();
97 ~mapconvert () {unloadmapfile();};
98
99 // loadmapfile should be called before any conversion is done
100 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
101 unsigned short theabsentc);
102 void unloadmapfile ();
103
104 unsigned short convert (unsigned short c);
105
106 // note that this version of convert has different semantics to
107 // the convertclass version.
108 text_t convert (const text_t &instr);
109
110protected:
111 text_t gsdlhome;
112 text_t encoding;
113 unsigned short absentc;
114 mapdata_t mapdata;
115};
116
117
118
119#define MAXMAPCHARLEN 2
120
121// convert from a gb char stream to the unicode text_t class
122class mapinconvertclass : public inconvertclass {
123public:
124 mapinconvertclass();
125
126 // loadmapfile should be called before any conversion takes
127 // place
128 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
129 unsigned short theabsentc) {
130 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc);
131 };
132
133 void reset ();
134 void convert (text_t &output, status_t &status);
135
136protected:
137 // buffer to hold unconverted characters in a stream
138 unsigned char mapbuf[MAXMAPCHARLEN];
139 size_t mapbuflen;
140
141 // note: multiple instances of mapinconvert class are expensive
142 // as each will have its own copy of the map file data. This
143 // could be reduced by making map2unimap static, but then it
144 // wouldn't be thread safe.
145 mapconvert converter;
146
147 // returns the length that the current contents of the
148 // mapbuf should be
149 inline size_t getmapcharlen () {
150 if (mapbuflen == 0) return 0;
151 if (mapbuf[0] < 0x80) return 1;
152 return 2;
153 }
154};
155
156
157// Convert from a text_t class to a map char stream
158class mapoutconvertclass : public rzwsoutconvertclass {
159public:
160 mapoutconvertclass ();
161
162 // loadmapfile should be called before any conversion takes
163 // place
164 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
165 unsigned short theabsentc) {
166 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc);
167 };
168
169 void reset ();
170 void convert (char *output, size_t maxlen,
171 size_t &len, status_t &status);
172
173protected:
174 unsigned char mapbuf[MAXMAPCHARLEN];
175 size_t mapbuflen;
176 size_t mapbufhere;
177
178 mapconvert converter;
179};
180
181#endif
Note: See TracBrowser for help on using the repository browser.