source: trunk/gsdl/lib/text_t.h@ 94

Last change on this file since 94 was 94, checked in by rjmcnab, 25 years ago

Wrote general map file based in and out converters. Fixed bugs related
to Chinese charater searching. text_t now has a encoding attribute. Added
an encoding option to the preferences.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/**************************************************************************
2 *
3 * text_t.h -- a simple 16-bit charater string class
4 * Copyright (c) 1998 -- Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: text_t.h 94 1999-01-04 03:32:21Z rjmcnab $
21 *
22 **************************************************************************/
23
24
25
26#ifndef TEXT_T_H
27#define TEXT_T_H
28
29#ifndef USE_OBJECTSPACE
30# include <vector>
31# include <list>
32# include <set>
33#else
34# include <ospace\std\vector>
35# include <ospace\std\list>
36# include <ospace\std\set>
37#endif
38
39// use the standard namespace
40#ifdef __GNUG__
41// namespaces are broken in current version of gcc
42#else
43# ifndef USE_OBJECTSPACE
44using namespace std;
45# else
46# ifndef OS_NO_NAMESPACE
47using namespace ospace::std;
48# endif
49# endif
50#endif
51
52
53// class prototypes
54class inconvertclass;
55class outconvertclass;
56
57
58typedef vector<unsigned short> usvector;
59
60// The class text_t can handle long strings which may contain
61// null characters. It uses unsigned shorts to represent up to
62// 64K character values.
63class text_t {
64 public:
65 //type support for ucvector
66 typedef usvector::iterator iterator;
67 typedef usvector::const_iterator const_iterator;
68 typedef usvector::reference reference;
69 typedef usvector::const_reference const_reference;
70 typedef usvector::size_type size_type;
71 typedef usvector::difference_type difference_type;
72 typedef usvector::const_reverse_iterator const_reverse_iterator;
73 typedef usvector::reverse_iterator reverse_iterator;
74
75protected:
76 usvector text;
77 unsigned short encoding; // 0 = unicode, 1 = other
78
79public:
80 // constructors
81 text_t ();
82 text_t (int i);
83 text_t (char *s); // assumed to be a normal c string
84
85 void setencoding (unsigned short theencoding) {encoding=theencoding;};
86 unsigned short getencoding () {return encoding;};
87
88 // basic container support
89 iterator begin () {return text.begin();}
90 const_iterator begin () const {return text.begin();}
91 iterator end () {return text.end();}
92 const_iterator end () const {return text.end();}
93
94 void erase(iterator pos) {text.erase(pos);}
95 void erase(iterator first, iterator last) {text.erase(first, last);}
96 void push_back(unsigned short c) {text.push_back(c);}
97 void pop_back() {text.pop_back();}
98 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
99 reference operator[](size_type n) {return text[n];};
100 const_reference operator[](size_type n) const {return text[n];};
101
102 bool empty () const {return text.empty();}
103 size_type size() const {return text.size();}
104 friend bool operator!=(const text_t& x, const text_t& y);
105 friend bool operator==(const text_t& x, const text_t& y);
106 friend bool operator<(const text_t& x, const text_t& y);
107 friend bool operator>(const text_t& x, const text_t& y);
108
109 // added functionality
110 void clear () {text.erase(text.begin(),text.end());}
111 void append (const text_t &t);
112 void appendrange (iterator first, iterator last);
113 void appendrange (const_iterator first, const_iterator last);
114 text_t &operator+= (const text_t &t) {append(t);return *this;}
115
116 // support for integers
117 void appendint (int i);
118 void setint (int i) {clear();appendint(i);}
119 text_t &operator=(int i) {setint (i);return *this;}
120 text_t &operator+= (int i) {appendint(i);return *this;}
121 int getint ();
122
123 // support for arrays of chars
124 void appendcarr (char *s, size_type len);
125 void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}
126
127 // support for null-terminated C strings
128 void appendcstr (char *s);
129 void setcstr (char *s) {clear();appendcstr(s);}
130 text_t &operator= (char *s) {setcstr(s);return *this;} // c string
131 text_t &operator+= (char *s) {appendcstr(s);return *this;} // c string
132
133 // strings returned from getcarr and getcstr become the callers
134 // responsibility and should be deallocated with "delete"
135 char *getcarr(size_type &len) const;
136 char *getcstr() const;
137};
138
139
140inline text_t operator+(const text_t &t1, const text_t &t2)
141{
142 text_t tnew = t1;
143 tnew.append(t2);
144 return tnew;
145}
146
147inline text_t operator+(const text_t &t1, int i1)
148{
149 text_t tnew = t1;
150 tnew.appendint(i1);
151 return tnew;
152}
153
154inline text_t operator+(const text_t &t1, char *s1)
155{
156 text_t tnew = t1;
157 tnew.appendcstr(s1);
158 return tnew;
159}
160
161
162inline bool operator!=(const text_t& x, const text_t& y)
163{
164 return (x.text != y.text);
165}
166
167inline bool operator==(const text_t& x, const text_t& y)
168{
169 return (x.text == y.text);
170}
171
172inline bool operator<(const text_t& x, const text_t& y)
173{
174 return (x.text < y.text);
175}
176
177inline bool operator>(const text_t& x, const text_t& y)
178{
179 return (x.text > y.text);
180}
181
182
183struct eqtext_t
184{
185 bool operator()(const text_t t1, const text_t t2) const
186 { return t1 == t2; }
187};
188
189struct lttext_t
190{
191 bool operator()(const text_t t1, const text_t t2) const
192 { return t1 < t2; }
193};
194
195
196// frequently used derived types
197typedef set<text_t,lttext_t> text_tset;
198typedef list<text_t> text_tlist; // more efficient for insertions/deletions
199typedef vector<text_t> text_tarray; // more space efficient than text_tlist
200
201
202// general functions which work on text_ts
203
204// find a character within a range
205text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
206 unsigned short c);
207text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
208 unsigned short c);
209
210// get a string up to the next delimiter (which is skipped)
211text_t::const_iterator getdelimitstr (text_t::const_iterator first,
212 text_t::const_iterator last,
213 unsigned short c, text_t &outstr);
214text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
215 unsigned short c, text_t &outstr);
216
217// split a string with a character
218void splitchar (text_t::const_iterator first, text_t::const_iterator last,
219 unsigned short c, text_tlist &outlist);
220void splitchar (text_t::const_iterator first, text_t::const_iterator last,
221 unsigned short c, text_tarray &outlist);
222
223// join a string using a character
224void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
225void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
226
227// count the occurances of a character within a range
228int countchar (text_t::const_iterator first, text_t::const_iterator last,
229 unsigned short c);
230
231
232
233
234
235
236
237
238// conversion classes used for getting information in to and out of
239// the text_t class.
240
241class convertclass
242{
243public:
244 enum status_t {finished, stopped, unfinished};
245
246 convertclass ();
247 virtual void reset ();
248};
249
250
251
252// convert from a char stream to the text_t class
253// the default version assumes the input is a ascii
254// character array
255class inconvertclass : public convertclass
256{
257public:
258 inconvertclass ();
259 void reset ();
260 void setinput (char *thestart, size_t thelen);
261
262 // output will be cleared before the conversion
263 virtual void convert (text_t &output, status_t &status);
264
265 // will treat the text_t as a 8-bit string and convert
266 // it to a 16-bit string using the about convert method.
267 text_t convert (const text_t &t);
268
269protected:
270 char *start;
271 size_t len;
272};
273
274// to get something which will do the conversion
275// to ascii declare a (non global!) instance like
276// this
277// inconvertclass ascii2text_t;
278
279
280// Convert from a text_t class to a char stream
281// This default version assumes the output is a ascii
282// character array. If you set the output stream you
283// can use this class to output to a stream using the
284// << operator. The << operator can also be conveniently
285// used to set the output stream by doing something like
286//
287// cout << text_t2ascii << textstr << anothertextstr;
288//
289// this class assumes that the input text doesn't change
290// while the conversion takes place
291class outconvertclass : public convertclass
292{
293public:
294 outconvertclass ();
295 void reset ();
296 void setinput (text_t *theinput);
297 // note that convert does not null-terminate the
298 // output array of characters
299 virtual void convert (char *output, size_t maxlen,
300 size_t &len, status_t &status);
301
302 // will convert the 16-bit string to a 8-bit stream
303 // and place the result in a text_t. This method uses
304 // the above convert function.
305 text_t convert (const text_t &t);
306
307 void setostream (ostream *theouts);
308 ostream *getostream ();
309
310protected:
311 text_t *input;
312 text_t::iterator texthere; // only valid if input is valid
313
314 ostream *outs;
315};
316
317// to get something which will do the conversion
318// to text_t declare a (non global!) instance like
319// this
320// outconvertclass text_t2ascii;
321
322
323// stream operators for the output class
324outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
325outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);
326
327
328#endif
Note: See TracBrowser for help on using the repository browser.