source: main/tags/2.35a/gsdl/lib/text_t.h@ 33178

Last change on this file since 33178 was 2487, checked in by sjboddie, 23 years ago

Changes to get phind working under windows

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.5 KB
Line 
1/**********************************************************************
2 *
3 * text_t.h -- a simple 16-bit character string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: text_t.h 2487 2001-06-01 02:51:29Z sjboddie $
25 *
26 *********************************************************************/
27
28
29#ifndef TEXT_T_H
30#define TEXT_T_H
31
32#include "gsdlconf.h"
33
34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\vector>
36# include <ospace\std\list>
37# include <ospace\std\set>
38# include <ospace\std\map>
39#elif defined(GSDL_USE_STL_H)
40# include <vector.h>
41# include <list.h>
42# include <set.h>
43# include <map.h>
44#else
45# include <vector>
46# include <list>
47# include <set>
48# include <map>
49#endif
50
51// use the standard namespace
52#if !defined (GSDL_NAMESPACE_BROKEN)
53#if defined(GSDL_USE_OBJECTSPACE)
54using namespace ospace::std;
55#else
56using namespace std;
57#endif
58#endif
59
60// class prototypes
61class inconvertclass;
62class outconvertclass;
63
64
65// for those stupid compilers which need it
66#if defined(GSDL_NEED_DESTROY_USHORT)
67inline void destroy(unsigned short *) {};
68inline void destroy(int *) {};
69#endif
70
71typedef vector<unsigned short> usvector;
72
73// The class text_t can handle long strings which may contain
74// null characters. It uses unsigned shorts to represent up to
75// 64K character values.
76class text_t {
77 public:
78 //type support for ucvector
79 typedef usvector::iterator iterator;
80 typedef usvector::const_iterator const_iterator;
81 typedef usvector::reference reference;
82 typedef usvector::const_reference const_reference;
83 typedef usvector::size_type size_type;
84 typedef usvector::difference_type difference_type;
85 typedef usvector::const_reverse_iterator const_reverse_iterator;
86 typedef usvector::reverse_iterator reverse_iterator;
87
88protected:
89 usvector text;
90 unsigned short encoding; // 0 = unicode, 1 = other
91
92public:
93 // constructors
94 text_t ();
95 text_t (int i);
96 text_t (char *s); // assumed to be a normal c string
97
98 void setencoding (unsigned short theencoding) {encoding=theencoding;};
99 unsigned short getencoding () {return encoding;};
100
101 usvector& text_as_usvector() { return text ; };
102 const usvector& text_as_usvector() const { return text ; };
103
104 // basic container support
105 iterator begin () {return text.begin();}
106 const_iterator begin () const {return text.begin();}
107 iterator end () {return text.end();}
108 const_iterator end () const {return text.end();}
109
110 void erase(iterator pos) {text.erase(pos);}
111 void erase(iterator first, iterator last) {text.erase(first, last);}
112 void push_back(unsigned short c) {text.push_back(c);}
113 void pop_back() {text.pop_back();}
114 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
115 reference operator[](size_type n) {return text[n];};
116 const_reference operator[](size_type n) const {return text[n];};
117
118 void reserve (size_type n) {text.reserve(n);}
119
120 bool empty () const {return text.empty();}
121 size_type size() const {return text.size();}
122 friend inline bool operator!=(const text_t& x, const text_t& y)
123 {return (x.text != y.text);}
124 friend inline bool operator==(const text_t& x, const text_t& y)
125 {return (x.text == y.text);}
126 friend inline bool operator<(const text_t& x, const text_t& y)
127 {return (x.text < y.text);}
128 friend inline bool operator>(const text_t& x, const text_t& y)
129 {return (x.text > y.text);}
130 friend inline bool operator>=(const text_t& x, const text_t& y)
131 {return (x.text >= y.text);}
132 friend inline bool operator<=(const text_t& x, const text_t& y)
133 {return (x.text <= y.text);}
134
135 // added functionality
136 void clear () {text.erase(text.begin(),text.end());}
137 void append (const text_t &t);
138 void appendrange (iterator first, iterator last);
139 void appendrange (const_iterator first, const_iterator last);
140 text_t &operator+= (const text_t &t) {append(t);return *this;}
141
142 // support for integers
143 void appendint (int i);
144 void setint (int i) {clear();appendint(i);}
145 text_t &operator=(int i) {setint (i);return *this;}
146 text_t &operator+= (int i) {appendint(i);return *this;}
147 int getint () const;
148
149 // same as getint but returns an unsigned long
150 unsigned long getulong () const;
151
152 // support for arrays of chars
153 void appendcarr (char *s, size_type len);
154 void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}
155
156 // support for null-terminated C strings
157 void appendcstr (char *s);
158 void setcstr (char *s) {clear();appendcstr(s);}
159 text_t &operator= (char *s) {setcstr(s);return *this;} // c string
160 text_t &operator+= (char *s) {appendcstr(s);return *this;} // c string
161
162 // support for const null-terminated C string
163 void appendcstr (const char *s) {appendcstr((char *)s);}
164 void setcstr (const char *s) {clear();appendcstr((char *) s);}
165 text_t &operator= (const char *s) {setcstr((char *) s);return *this;} // c string
166 text_t &operator+= (const char *s) {appendcstr((char *) s);return *this;} // c string
167
168 // strings returned from getcarr and getcstr become the callers
169 // responsibility and should be deallocated with "delete"
170 char *getcarr(size_type &len) const;
171 char *getcstr() const;
172
173};
174
175// new stream converter ...
176ostream& operator<< (ostream &o, const text_t text);
177
178inline text_t operator+(const text_t &t1, const text_t &t2)
179{
180 text_t tnew = t1;
181 tnew.append(t2);
182 return tnew;
183}
184
185inline text_t operator+(const text_t &t1, int i1)
186{
187 text_t tnew = t1;
188 tnew.appendint(i1);
189 return tnew;
190}
191
192inline text_t operator+(const text_t &t1, char *s1)
193{
194 text_t tnew = t1;
195 tnew.appendcstr(s1);
196 return tnew;
197}
198
199
200
201
202struct eqtext_t
203{
204 bool operator()(const text_t &t1, const text_t &t2) const
205 { return t1 == t2; }
206};
207
208struct lttext_t
209{
210 bool operator()(const text_t &t1, const text_t &t2) const
211 { return t1 < t2; }
212};
213
214
215// frequently used derived types
216typedef set<text_t,lttext_t> text_tset;
217typedef list<text_t> text_tlist; // more efficient for insertions/deletions
218typedef vector<text_t> text_tarray; // more space efficient than text_tlist
219typedef map<text_t, text_t, lttext_t> text_tmap;
220
221
222// general functions which work on text_ts
223
224// find a character within a range
225text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
226 unsigned short c);
227text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
228 unsigned short c);
229
230text_t::iterator findword (text_t::iterator first, text_t::iterator last,
231 const text_t &word);
232
233// get a string up to the next delimiter (which is skipped)
234text_t::const_iterator getdelimitstr (text_t::const_iterator first,
235 text_t::const_iterator last,
236 unsigned short c, text_t &outstr);
237text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
238 unsigned short c, text_t &outstr);
239
240// split a string with a character
241void splitchar (text_t::const_iterator first, text_t::const_iterator last,
242 unsigned short c, text_tset &outlist);
243void splitchar (text_t::const_iterator first, text_t::const_iterator last,
244 unsigned short c, text_tlist &outlist);
245void splitchar (text_t::const_iterator first, text_t::const_iterator last,
246 unsigned short c, text_tarray &outlist);
247
248// join a string using a character
249void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext);
250void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
251void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
252void joinchar (const text_tset &inlist, text_t c, text_t &outtext);
253void joinchar (const text_tlist &inlist, text_t c, text_t &outtext);
254void joinchar (const text_tarray &inlist, text_t c, text_t &outtext);
255
256// count the occurances of a character within a range
257int countchar (text_t::const_iterator first, text_t::const_iterator last,
258 unsigned short c);
259
260// return a substring of string from first up to but not including last
261text_t substr (text_t::const_iterator first, text_t::const_iterator last);
262
263// convert to lowercase
264void lc (text_t::iterator first, text_t::iterator last);
265inline void lc (text_t &t) {lc (t.begin(), t.end());}
266
267// convert to uppercase
268void uc (text_t::iterator first, text_t::iterator last);
269inline void uc (text_t &t) {uc (t.begin(), t.end());}
270
271// checks to see if it is a number (i.e. contains only 0-9)
272bool is_number (const text_t &text);
273
274// checks to see if the text has any letters or digits
275bool has_unicode_letdig (const text_t &text);
276
277
278// conversion classes used for getting information in to and out of
279// the text_t class.
280
281class convertclass
282{
283public:
284 enum status_t {finished, stopped, unfinished};
285
286 convertclass ();
287 virtual void reset ();
288};
289
290
291
292// convert from a char stream to the text_t class
293// the default version assumes the input is a ascii
294// character array
295class inconvertclass : public convertclass
296{
297public:
298 inconvertclass ();
299 void reset ();
300 void setinput (char *thestart, size_t thelen);
301
302 // output will be cleared before the conversion
303 virtual void convert (text_t &output, status_t &status);
304
305 // will treat the text_t as a 8-bit string and convert
306 // it to a 16-bit string using the about convert method.
307 text_t convert (const text_t &t);
308
309protected:
310 char *start;
311 size_t len;
312};
313
314// to get something which will do the conversion
315// to ascii declare a (non global!) instance like
316// this
317// inconvertclass ascii2text_t;
318
319#if defined(GSDL_USE_IOS_H)
320#include <iostream.h> // darwin doesn't have ostream.h...
321#else
322#include <ostream>
323#endif
324
325// Convert from a text_t class to a char stream
326// This default version assumes the output is a ascii
327// character array. If you set the output stream you
328// can use this class to output to a stream using the
329// << operator. The << operator can also be conveniently
330// used to set the output stream by doing something like
331//
332// cout << text_t2ascii << textstr << anothertextstr;
333//
334// this class assumes that the input text doesn't change
335// while the conversion takes place
336class outconvertclass : public convertclass
337{
338public:
339 outconvertclass ();
340 void reset ();
341 void setinput (text_t *theinput);
342 // note that convert does not null-terminate the
343 // output array of characters
344 virtual void convert (char *output, size_t maxlen,
345 size_t &len, status_t &status);
346
347 // will convert the 16-bit string to a 8-bit stream
348 // and place the result in a text_t. This method uses
349 // the above convert function.
350 text_t convert (const text_t &t);
351
352 void setostream (ostream *theouts);
353 ostream *getostream ();
354
355protected:
356 text_t *input;
357 text_t::iterator texthere; // only valid if input is valid
358
359 ostream *outs;
360};
361
362// to get something which will do the conversion
363// to text_t declare a (non global!) instance like
364// this
365// outconvertclass text_t2ascii;
366
367
368// stream operators for the output class
369outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
370outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);
371
372#endif
Note: See TracBrowser for help on using the repository browser.