source: main/trunk/greenstone2/common-src/src/lib/text_t.h@ 22141

Last change on this file since 22141 was 22141, checked in by davidb, 14 years ago

Was surprised to discover some classes that did not correctly specify virtual on its destructor, even though virutal was being used on other methods in the class, or else through inheritance. Now fixed up.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1/**********************************************************************
2 *
3 * text_t.h -- a simple 16-bit character string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: text_t.h 22141 2010-05-20 08:51:21Z davidb $
25 *
26 *********************************************************************/
27
28
29#ifndef TEXT_T_H
30#define TEXT_T_H
31
32#include "gsdlconf.h"
33
34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\vector>
36# include <ospace\std\list>
37# include <ospace\std\set>
38# include <ospace\std\map>
39#elif defined(GSDL_USE_STL_H)
40# include <vector.h>
41# include <list.h>
42# include <set.h>
43# include <map.h>
44#else
45# include <vector>
46# include <list>
47# include <set>
48# include <map>
49#endif
50#include <string>
51
52// use the standard namespace
53#if !defined (GSDL_NAMESPACE_BROKEN)
54#if defined(GSDL_USE_OBJECTSPACE)
55using namespace ospace::std;
56#else
57using namespace std;
58#endif
59#endif
60
61// class prototypes
62class text_t;
63class inconvertclass;
64class outconvertclass;
65
66
67// for those stupid compilers which need it
68#if defined(GSDL_NEED_DESTROY_USHORT)
69inline void destroy(unsigned short *) {};
70inline void destroy(int *) {};
71#endif
72
73typedef vector<unsigned short> usvector;
74extern const text_t g_EmptyText;
75
76// The class text_t can handle long strings which may contain
77// null characters. It uses unsigned shorts to represent up to
78// 64K character values.
79class text_t {
80 public:
81 //type support for ucvector
82 typedef usvector::iterator iterator;
83 typedef usvector::const_iterator const_iterator;
84 typedef usvector::reference reference;
85 typedef usvector::const_reference const_reference;
86 typedef usvector::size_type size_type;
87 typedef usvector::difference_type difference_type;
88 typedef usvector::const_reverse_iterator const_reverse_iterator;
89 typedef usvector::reverse_iterator reverse_iterator;
90
91protected:
92 usvector text;
93 unsigned short encoding; // 0 = unicode, 1 = other
94
95public:
96 // constructors
97 text_t ();
98 text_t (int i);
99 text_t (const char *s); // assumed to be a normal c string
100 text_t (const char *s, size_type nLength); // support for arrays of chars
101 void setencoding (unsigned short theencoding) {encoding=theencoding;};
102 unsigned short getencoding () {return encoding;};
103
104 usvector& text_as_usvector() { return text ; };
105 const usvector& text_as_usvector() const { return text ; };
106
107 // basic container support
108 iterator begin () {return text.begin();}
109 const_iterator begin () const {return text.begin();}
110 iterator end () {return text.end();}
111 const_iterator end () const {return text.end();}
112
113 void erase(iterator pos) {text.erase(pos);}
114 void erase(iterator first, iterator last) {text.erase(first, last);}
115 void push_back(unsigned short c) {text.push_back(c);}
116 void pop_back() {text.pop_back();}
117 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
118 reference operator[](size_type n) {return text[n];};
119 const_reference operator[](size_type n) const {return text[n];};
120
121 void reserve (size_type n) {text.reserve(n);}
122 size_type capacity() const { return text.capacity(); }
123
124 bool empty () const {return text.empty();}
125 size_type size() const {return text.size();}
126 friend inline bool operator!=(const text_t& x, const text_t& y)
127 {return (x.text != y.text);}
128 friend inline bool operator==(const text_t& x, const text_t& y)
129 {return (x.text == y.text);}
130 friend inline bool operator<(const text_t& x, const text_t& y)
131 {return (x.text < y.text);}
132 friend inline bool operator>(const text_t& x, const text_t& y)
133 {return (x.text > y.text);}
134 friend inline bool operator>=(const text_t& x, const text_t& y)
135 {return (x.text >= y.text);}
136 friend inline bool operator<=(const text_t& x, const text_t& y)
137 {return (x.text <= y.text);}
138
139 // added functionality
140 void clear () {text.erase(text.begin(),text.end());}
141 void append (const text_t &t);
142 void appendrange (iterator first, iterator last);
143 void appendrange (const_iterator first, const_iterator last);
144 text_t &operator+= (const text_t &t) {append(t);return *this;}
145
146 // support for integers
147 void appendint (int i);
148 void setint (int i) {clear();appendint(i);}
149 text_t &operator=(int i) {setint (i);return *this;}
150 text_t &operator+= (int i) {appendint(i);return *this;}
151 int getint () const;
152
153 // same as getint but returns an unsigned long
154 unsigned long getulong () const;
155
156 // support for arrays of chars
157 void appendcarr (const char *s, size_type len);
158 void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}
159
160 // support for const null-terminated C strings
161 void appendcstr (const char *s);
162 void setcstr (const char *s) {clear();appendcstr(s);}
163 text_t &operator= (const char *s) {setcstr(s);return *this;} // c string
164 text_t &operator+= (const char *s) {appendcstr(s);return *this;} // c string
165
166 // strings returned from getcarr and getcstr become the callers
167 // responsibility and should be deallocated with "delete []"
168 char *getcarr(size_type &len) const;
169 char *getcstr() const;
170
171 int replace(text_t toreplace, text_t replacement);
172};
173
174// new stream converter ...
175ostream& operator<< (ostream &o, const text_t &text);
176
177inline text_t operator+(const text_t &t1, const text_t &t2)
178{
179 text_t tnew = t1;
180 tnew.append(t2);
181 return tnew;
182}
183
184inline text_t operator+(const text_t &t1, int i1)
185{
186 text_t tnew = t1;
187 tnew.appendint(i1);
188 return tnew;
189}
190
191inline text_t operator+(const text_t &t1, char *s1)
192{
193 text_t tnew = t1;
194 tnew.appendcstr(s1);
195 return tnew;
196}
197
198
199
200
201struct eqtext_t
202{
203 bool operator()(const text_t &t1, const text_t &t2) const
204 { return t1 == t2; }
205};
206
207struct lttext_t
208{
209 bool operator()(const text_t &t1, const text_t &t2) const
210 { return t1 < t2; }
211};
212
213
214// frequently used derived types
215typedef set<text_t,lttext_t> text_tset;
216typedef list<text_t> text_tlist; // more efficient for insertions/deletions
217typedef vector<text_t> text_tarray; // more space efficient than text_tlist
218typedef map<text_t, text_t, lttext_t> text_tmap;
219
220
221// general functions which work on text_ts
222
223// find a character within a range
224text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
225 unsigned short c);
226text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
227 unsigned short c);
228// Find the last occurrence of c between first and last_plus_one -1. Returns last_plus_one if not found.
229text_t::iterator findlastchar (text_t::iterator first, text_t::iterator last_plus_one,
230 unsigned short c);
231text_t::iterator findword (text_t::iterator first, text_t::iterator last,
232 const text_t &word);
233text_t::const_iterator findword (text_t::const_iterator first, text_t::const_iterator last,
234 const text_t& word);
235
236// get a string up to the next delimiter (which is skipped)
237text_t::const_iterator getdelimitstr (text_t::const_iterator first,
238 text_t::const_iterator last,
239 unsigned short c, text_t &outstr);
240text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
241 unsigned short c, text_t &outstr);
242
243text_t::const_iterator getdelimitstr (text_t::const_iterator first, text_t::const_iterator last,
244 text_t w, text_t &outstr);
245
246// split a string with a character
247void splitchar (text_t::const_iterator first, text_t::const_iterator last,
248 unsigned short c, text_tset &outlist);
249void splitchar (text_t::const_iterator first, text_t::const_iterator last,
250 unsigned short c, text_tlist &outlist);
251void splitchar (text_t::const_iterator first, text_t::const_iterator last,
252 unsigned short c, text_tarray &outlist);
253
254void splitword (text_t::const_iterator first, text_t::const_iterator last,
255 text_t w, text_tlist &outlist);
256
257// join a string using a character
258void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext);
259void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
260void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
261void joinchar (const text_tset &inlist, const text_t &c, text_t &outtext);
262void joinchar (const text_tlist &inlist, const text_t &c, text_t &outtext);
263void joinchar (const text_tarray &inlist, const text_t &c, text_t &outtext);
264
265// count the occurances of a character within a range
266int countchar (text_t::const_iterator first, text_t::const_iterator last,
267 unsigned short c);
268
269// return a substring of string from first up to but not including last
270text_t substr (text_t::const_iterator first, text_t::const_iterator last);
271
272// convert to lowercase
273void lc (text_t::iterator first, text_t::iterator last);
274inline void lc (text_t &t) {lc (t.begin(), t.end());}
275
276// convert to uppercase
277void uc (text_t::iterator first, text_t::iterator last);
278inline void uc (text_t &t) {uc (t.begin(), t.end());}
279
280// checks to see if it is a number (i.e. contains only 0-9)
281bool is_number (const text_t &text);
282
283// checks to see if the text has any letters or digits
284bool has_unicode_letdig (const text_t &text);
285
286// checks to see if a text_t starts with the specified prefix
287bool starts_with(const text_t& text, const text_t& prefix);
288// checks to see if a text_t ends with the specified suffix
289bool ends_with(const text_t& text, const text_t& suffix);
290
291// conversion classes used for getting information in to and out of
292// the text_t class.
293
294class convertclass
295{
296public:
297 enum status_t {finished, stopped, unfinished};
298
299 convertclass ();
300 virtual ~convertclass ();
301 virtual void reset ();
302};
303
304
305
306// convert from a char stream to the text_t class
307// the default version assumes the input is a ascii
308// character array
309class inconvertclass : public convertclass
310{
311public:
312 inconvertclass ();
313 ~inconvertclass ();
314
315 virtual void reset ();
316 void setinput (char *thestart, size_t thelen);
317
318 // output will be cleared before the conversion
319 virtual void convert (text_t &output, status_t &status);
320
321 // will treat the text_t as a 8-bit string and convert
322 // it to a 16-bit string using the about convert method.
323 text_t convert (const text_t &t);
324
325protected:
326 char *start;
327 size_t len;
328};
329
330// to get something which will do the conversion
331// to ascii declare a (non global!) instance like
332// this
333// inconvertclass ascii2text_t;
334
335#if defined(GSDL_USE_IOS_H)
336#include <iostream.h> // darwin doesn't have ostream.h...
337#else
338#include <ostream>
339#endif
340
341// Convert from a text_t class to a char stream
342// This default version assumes the output is a ascii
343// character array. If you set the output stream you
344// can use this class to output to a stream using the
345// << operator. The << operator can also be conveniently
346// used to set the output stream by doing something like
347//
348// cout << text_t2ascii << textstr << anothertextstr;
349//
350// this class assumes that the input text doesn't change
351// while the conversion takes place
352class outconvertclass : public convertclass
353{
354public:
355 outconvertclass ();
356 ~outconvertclass ();
357
358 virtual void reset ();
359 virtual void setinput (text_t *theinput);
360 virtual void setdata(text_t *input, text_t::iterator texthere);
361 // note that convert does not null-terminate the
362 // output array of characters
363 virtual void convert (char *output, size_t maxlen,
364 size_t &len, status_t &status);
365
366 // will convert the 16-bit string to a 8-bit stream
367 // and place the result in a text_t. This method uses
368 // the above convert function.
369 text_t convert (const text_t &t);
370
371 virtual void setostream (ostream *theouts);
372 ostream *getostream ();
373
374protected:
375 text_t *input;
376 text_t::iterator texthere; // only valid if input is valid
377
378 ostream *outs;
379};
380
381// to get something which will do the conversion
382// to text_t declare a (non global!) instance like
383// this
384// outconvertclass text_t2ascii;
385
386
387// stream operators for the output class
388outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
389outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);
390
391
392#endif
Note: See TracBrowser for help on using the repository browser.