source: trunk/gsdl/lib/gsdlunicode.h@ 499

Last change on this file since 499 was 413, checked in by rjmcnab, 25 years ago

Added setmapfile function to map conversion utilities so the map file
does not need to be loaded when map conversion object is created.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1/**********************************************************************
2 *
3 * gsdlunicode.h --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: gsdlunicode.h 413 1999-07-21 07:23:18Z rjmcnab $
9 *
10 *********************************************************************/
11
12
13#ifndef GSDLUNICODE_H
14#define GSDLUNICODE_H
15
16#include "text_t.h"
17
18
19// converts a unicode encode text_t string to a utf-8
20// encoded text_t string
21text_t to_utf8 (text_t::const_iterator here, text_t::const_iterator end);
22inline text_t to_utf8 (const text_t &in) {return to_utf8 (in.begin(), in.end());}
23
24// converts a utf-8 encoded text_t string to a unicode
25// encoded text_t string
26text_t to_uni (const text_t &in);
27
28
29
30#define MAXUTF8CHARLEN 3
31
32// convert from a utf-8 char stream to the text_t class
33class utf8inconvertclass : public inconvertclass {
34public:
35 utf8inconvertclass();
36 void reset ();
37 void convert (text_t &output, status_t &status);
38
39protected:
40 // buffer to hold unconverted characters in a stream
41 unsigned char utf8buf[MAXUTF8CHARLEN];
42 size_t utf8buflen;
43
44 // returns the length that the current contents of the
45 // utf8buf should be
46 size_t getutf8charlen ();
47};
48
49
50// This class provides the option of removing zero width
51// spaces (U+200B) during the output. By default this
52// option is turned off. The functionality is actually
53// implemented by the sub-classes, this class just provides
54// the framework for these classes.
55//
56// Note: by convention reset() should not reset the rzws flag.
57class rzwsoutconvertclass : public outconvertclass {
58public:
59 rzwsoutconvertclass () {rzws = 0;};
60 void set_rzws (int new_rzws) {rzws = new_rzws;};
61
62protected:
63 int rzws;
64};
65
66
67// Convert from a text_t class to a utf-8 char stream
68class utf8outconvertclass : public rzwsoutconvertclass {
69public:
70 utf8outconvertclass () {utf8buflen=0; utf8bufhere=0;};
71 void reset ();
72 // note that convert does not null-terminate the
73 // output array of characters
74 void convert (char *output, size_t maxlen,
75 size_t &len, status_t &status);
76
77protected:
78 unsigned char utf8buf[MAXUTF8CHARLEN];
79 size_t utf8buflen;
80 size_t utf8bufhere;
81};
82
83
84// mapdata_t is used by mapconvert to hold the map file data
85class mapdata_t {
86public:
87 mapdata_t();
88 bool loaded;
89 unsigned short *ptrs[256];
90};
91
92// mapconvert is used in situations where conversion is best
93// done using a map file. The mapfile should reside in
94// gsdlhome/unicode.
95class mapconvert {
96public:
97 mapconvert ();
98 ~mapconvert () {unloadmapfile();};
99
100 // setmapfile will cause loadmapfile to be called when conversion is
101 // needed
102 bool setmapfile (const text_t &thegsdlhome, const text_t &theencoding,
103 unsigned short theabsentc);
104
105 // loadmapfile should be called before any conversion is done
106 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
107 unsigned short theabsentc);
108 void unloadmapfile ();
109
110 unsigned short convert (unsigned short c);
111
112 // note that this version of convert has different semantics to
113 // the convertclass version.
114 text_t convert (const text_t &instr);
115
116protected:
117 text_t gsdlhome;
118 text_t encoding;
119 unsigned short absentc;
120 mapdata_t mapdata;
121};
122
123
124
125#define MAXMAPCHARLEN 2
126
127// convert from a gb char stream to the unicode text_t class
128class mapinconvertclass : public inconvertclass {
129public:
130 mapinconvertclass();
131 virtual ~mapinconvertclass() {};
132
133 // setmapfile will cause loadmapfile to be called when conversion is needed
134 bool setmapfile (const text_t &thegsdlhome, const text_t &theencoding,
135 unsigned short theabsentc) {
136 return converter.setmapfile (thegsdlhome, theencoding, theabsentc);
137 };
138
139 // loadmapfile should be called before any conversion takes
140 // place
141 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
142 unsigned short theabsentc) {
143 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc);
144 };
145
146 void reset ();
147 void convert (text_t &output, status_t &status);
148
149protected:
150 // buffer to hold unconverted characters in a stream
151 unsigned char mapbuf[MAXMAPCHARLEN];
152 size_t mapbuflen;
153
154 // note: multiple instances of mapinconvert class are expensive
155 // as each will have its own copy of the map file data. This
156 // could be reduced by making map2unimap static, but then it
157 // wouldn't be thread safe.
158 mapconvert converter;
159
160 // returns the length that the current contents of the
161 // mapbuf should be
162 inline size_t getmapcharlen () {
163 if (mapbuflen == 0) return 0;
164 if (mapbuf[0] < 0x80) return 1;
165 return 2;
166 }
167};
168
169
170// Convert from a text_t class to a map char stream
171class mapoutconvertclass : public rzwsoutconvertclass {
172public:
173 mapoutconvertclass ();
174 virtual ~mapoutconvertclass() {};
175
176 // setmapfile will cause loadmapfile to be called when conversion is needed
177 bool setmapfile (const text_t &thegsdlhome, const text_t &theencoding,
178 unsigned short theabsentc) {
179 return converter.setmapfile (thegsdlhome, theencoding, theabsentc);
180 };
181
182 // loadmapfile should be called before any conversion takes
183 // place
184 bool loadmapfile (const text_t &thegsdlhome, const text_t &theencoding,
185 unsigned short theabsentc) {
186 return converter.loadmapfile (thegsdlhome, theencoding, theabsentc);
187 };
188
189 void reset ();
190 void convert (char *output, size_t maxlen,
191 size_t &len, status_t &status);
192
193protected:
194 unsigned char mapbuf[MAXMAPCHARLEN];
195 size_t mapbuflen;
196 size_t mapbufhere;
197
198 mapconvert converter;
199};
200
201#endif
Note: See TracBrowser for help on using the repository browser.