source: trunk/gsdl/lib/text_t.h@ 413

Last change on this file since 413 was 330, checked in by rjmcnab, 25 years ago

Optimised append functions slightly and added a reserve function.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1/**********************************************************************
2 *
3 * text_t.h -- a simple 16-bit charater string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: text_t.h 330 1999-07-01 04:05:10Z rjmcnab $
9 *
10 *********************************************************************/
11
12
13#ifndef TEXT_T_H
14#define TEXT_T_H
15
16#include "gsdlconf.h"
17
18#if defined(GSDL_USE_OBJECTSPACE)
19# include <ospace\std\vector>
20# include <ospace\std\list>
21# include <ospace\std\set>
22# include <ospace\std\map>
23#elif defined(GSDL_USE_STL_H)
24# include <vector.h>
25# include <list.h>
26# include <set.h>
27# include <map.h>
28#else
29# include <vector>
30# include <list>
31# include <set>
32# include <map>
33#endif
34
35// use the standard namespace
36#if defined(GSDL_USE_OBJECTSPACE)
37using namespace ospace::std;
38#elif !defined(GSDL_NAMESPACE_BROKEN)
39using namespace std;
40#endif
41
42
43// class prototypes
44class inconvertclass;
45class outconvertclass;
46
47
48// for those stupid compilers which need it
49#if defined(GSDL_NEED_DESTROY_USHORT)
50inline void destroy(unsigned short *) {};
51#endif
52
53typedef vector<unsigned short> usvector;
54
55// The class text_t can handle long strings which may contain
56// null characters. It uses unsigned shorts to represent up to
57// 64K character values.
58class text_t {
59 public:
60 //type support for ucvector
61 typedef usvector::iterator iterator;
62 typedef usvector::const_iterator const_iterator;
63 typedef usvector::reference reference;
64 typedef usvector::const_reference const_reference;
65 typedef usvector::size_type size_type;
66 typedef usvector::difference_type difference_type;
67 typedef usvector::const_reverse_iterator const_reverse_iterator;
68 typedef usvector::reverse_iterator reverse_iterator;
69
70protected:
71 usvector text;
72 unsigned short encoding; // 0 = unicode, 1 = other
73
74public:
75 // constructors
76 text_t ();
77 text_t (int i);
78 text_t (char *s); // assumed to be a normal c string
79
80 void setencoding (unsigned short theencoding) {encoding=theencoding;};
81 unsigned short getencoding () {return encoding;};
82
83 // basic container support
84 iterator begin () {return text.begin();}
85 const_iterator begin () const {return text.begin();}
86 iterator end () {return text.end();}
87 const_iterator end () const {return text.end();}
88
89 void erase(iterator pos) {text.erase(pos);}
90 void erase(iterator first, iterator last) {text.erase(first, last);}
91 void push_back(unsigned short c) {text.push_back(c);}
92 void pop_back() {text.pop_back();}
93 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
94 reference operator[](size_type n) {return text[n];};
95 const_reference operator[](size_type n) const {return text[n];};
96
97 void reserve (size_type n) {text.reserve(n);}
98
99 bool empty () const {return text.empty();}
100 size_type size() const {return text.size();}
101 friend bool operator!=(const text_t& x, const text_t& y);
102 friend bool operator==(const text_t& x, const text_t& y);
103 friend bool operator<(const text_t& x, const text_t& y);
104 friend bool operator>(const text_t& x, const text_t& y);
105
106 // added functionality
107 void clear () {text.erase(text.begin(),text.end());}
108 void append (const text_t &t);
109 void appendrange (iterator first, iterator last);
110 void appendrange (const_iterator first, const_iterator last);
111 text_t &operator+= (const text_t &t) {append(t);return *this;}
112
113 // support for integers
114 void appendint (int i);
115 void setint (int i) {clear();appendint(i);}
116 text_t &operator=(int i) {setint (i);return *this;}
117 text_t &operator+= (int i) {appendint(i);return *this;}
118 int getint () const;
119
120 // support for arrays of chars
121 void appendcarr (char *s, size_type len);
122 void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}
123
124 // support for null-terminated C strings
125 void appendcstr (char *s);
126 void setcstr (char *s) {clear();appendcstr(s);}
127 text_t &operator= (char *s) {setcstr(s);return *this;} // c string
128 text_t &operator+= (char *s) {appendcstr(s);return *this;} // c string
129
130 // strings returned from getcarr and getcstr become the callers
131 // responsibility and should be deallocated with "delete"
132 char *getcarr(size_type &len) const;
133 char *getcstr() const;
134};
135
136
137inline text_t operator+(const text_t &t1, const text_t &t2)
138{
139 text_t tnew = t1;
140 tnew.append(t2);
141 return tnew;
142}
143
144inline text_t operator+(const text_t &t1, int i1)
145{
146 text_t tnew = t1;
147 tnew.appendint(i1);
148 return tnew;
149}
150
151inline text_t operator+(const text_t &t1, char *s1)
152{
153 text_t tnew = t1;
154 tnew.appendcstr(s1);
155 return tnew;
156}
157
158
159inline bool operator!=(const text_t& x, const text_t& y)
160{
161 return (x.text != y.text);
162}
163
164inline bool operator==(const text_t& x, const text_t& y)
165{
166 return (x.text == y.text);
167}
168
169inline bool operator<(const text_t& x, const text_t& y)
170{
171 return (x.text < y.text);
172}
173
174inline bool operator>(const text_t& x, const text_t& y)
175{
176 return (x.text > y.text);
177}
178
179
180struct eqtext_t
181{
182 bool operator()(const text_t &t1, const text_t &t2) const
183 { return t1 == t2; }
184};
185
186struct lttext_t
187{
188 bool operator()(const text_t &t1, const text_t &t2) const
189 { return t1 < t2; }
190};
191
192
193// frequently used derived types
194typedef set<text_t,lttext_t> text_tset;
195typedef list<text_t> text_tlist; // more efficient for insertions/deletions
196typedef vector<text_t> text_tarray; // more space efficient than text_tlist
197typedef map<text_t, text_t, lttext_t> text_tmap;
198
199
200// general functions which work on text_ts
201
202// find a character within a range
203text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
204 unsigned short c);
205text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
206 unsigned short c);
207
208// get a string up to the next delimiter (which is skipped)
209text_t::const_iterator getdelimitstr (text_t::const_iterator first,
210 text_t::const_iterator last,
211 unsigned short c, text_t &outstr);
212text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
213 unsigned short c, text_t &outstr);
214
215// split a string with a character
216void splitchar (text_t::const_iterator first, text_t::const_iterator last,
217 unsigned short c, text_tset &outlist);
218void splitchar (text_t::const_iterator first, text_t::const_iterator last,
219 unsigned short c, text_tlist &outlist);
220void splitchar (text_t::const_iterator first, text_t::const_iterator last,
221 unsigned short c, text_tarray &outlist);
222
223// join a string using a character
224void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext);
225void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
226void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
227
228// count the occurances of a character within a range
229int countchar (text_t::const_iterator first, text_t::const_iterator last,
230 unsigned short c);
231
232// return a substring of string from first up to but not including last
233text_t substr (text_t::const_iterator first, text_t::const_iterator last);
234
235// convert to lowercase
236void lc (text_t::iterator first, text_t::iterator last);
237inline void lc (text_t &t) {lc (t.begin(), t.end());}
238
239// convert to uppercase
240void uc (text_t::iterator first, text_t::iterator last);
241inline void uc (text_t &t) {uc (t.begin(), t.end());}
242
243// checks to see if it is a number (i.e. contains only 0-9)
244bool is_number (const text_t &text);
245
246
247// conversion classes used for getting information in to and out of
248// the text_t class.
249
250class convertclass
251{
252public:
253 enum status_t {finished, stopped, unfinished};
254
255 convertclass ();
256 virtual void reset ();
257};
258
259
260
261// convert from a char stream to the text_t class
262// the default version assumes the input is a ascii
263// character array
264class inconvertclass : public convertclass
265{
266public:
267 inconvertclass ();
268 void reset ();
269 void setinput (char *thestart, size_t thelen);
270
271 // output will be cleared before the conversion
272 virtual void convert (text_t &output, status_t &status);
273
274 // will treat the text_t as a 8-bit string and convert
275 // it to a 16-bit string using the about convert method.
276 text_t convert (const text_t &t);
277
278protected:
279 char *start;
280 size_t len;
281};
282
283// to get something which will do the conversion
284// to ascii declare a (non global!) instance like
285// this
286// inconvertclass ascii2text_t;
287
288
289// Convert from a text_t class to a char stream
290// This default version assumes the output is a ascii
291// character array. If you set the output stream you
292// can use this class to output to a stream using the
293// << operator. The << operator can also be conveniently
294// used to set the output stream by doing something like
295//
296// cout << text_t2ascii << textstr << anothertextstr;
297//
298// this class assumes that the input text doesn't change
299// while the conversion takes place
300class outconvertclass : public convertclass
301{
302public:
303 outconvertclass ();
304 void reset ();
305 void setinput (text_t *theinput);
306 // note that convert does not null-terminate the
307 // output array of characters
308 virtual void convert (char *output, size_t maxlen,
309 size_t &len, status_t &status);
310
311 // will convert the 16-bit string to a 8-bit stream
312 // and place the result in a text_t. This method uses
313 // the above convert function.
314 text_t convert (const text_t &t);
315
316 void setostream (ostream *theouts);
317 ostream *getostream ();
318
319protected:
320 text_t *input;
321 text_t::iterator texthere; // only valid if input is valid
322
323 ostream *outs;
324};
325
326// to get something which will do the conversion
327// to text_t declare a (non global!) instance like
328// this
329// outconvertclass text_t2ascii;
330
331
332// stream operators for the output class
333outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
334outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);
335
336#endif
Note: See TracBrowser for help on using the repository browser.