source: trunk/gsdl/lib/text_t.h@ 146

Last change on this file since 146 was 138, checked in by sjboddie, 25 years ago

Got interface to handle subcollections and language subcollections -
committed changes made to some of the collections

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1/**********************************************************************
2 *
3 * text_t.h -- a simple 16-bit charater string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: text_t.h 138 1999-02-03 01:13:30Z sjboddie $
9 *
10 *********************************************************************/
11
12
13#ifndef TEXT_T_H
14#define TEXT_T_H
15
16#include "gsdlconf.h"
17
18#if defined(GSDL_USE_OBJECTSPACE)
19# include <ospace\std\vector>
20# include <ospace\std\list>
21# include <ospace\std\set>
22#elif defined(GSDL_USE_STL_H)
23# include <vector.h>
24# include <list.h>
25# include <set.h>
26#else
27# include <vector>
28# include <list>
29# include <set>
30#endif
31
32// use the standard namespace
33#if defined(GSDL_USE_OBJECTSPACE)
34using namespace ospace::std;
35#elif !defined(GSDL_NAMESPACE_BROKEN)
36using namespace std;
37#endif
38
39
40// class prototypes
41class inconvertclass;
42class outconvertclass;
43
44
45// for those stupid compilers which need it
46#if defined(GSDL_NEED_DESTROY_USHORT)
47inline void destroy(unsigned short *) {};
48#endif
49
50typedef vector<unsigned short> usvector;
51
52// The class text_t can handle long strings which may contain
53// null characters. It uses unsigned shorts to represent up to
54// 64K character values.
55class text_t {
56 public:
57 //type support for ucvector
58 typedef usvector::iterator iterator;
59 typedef usvector::const_iterator const_iterator;
60 typedef usvector::reference reference;
61 typedef usvector::const_reference const_reference;
62 typedef usvector::size_type size_type;
63 typedef usvector::difference_type difference_type;
64 typedef usvector::const_reverse_iterator const_reverse_iterator;
65 typedef usvector::reverse_iterator reverse_iterator;
66
67protected:
68 usvector text;
69 unsigned short encoding; // 0 = unicode, 1 = other
70
71public:
72 // constructors
73 text_t ();
74 text_t (int i);
75 text_t (char *s); // assumed to be a normal c string
76
77 void setencoding (unsigned short theencoding) {encoding=theencoding;};
78 unsigned short getencoding () {return encoding;};
79
80 // basic container support
81 iterator begin () {return text.begin();}
82 const_iterator begin () const {return text.begin();}
83 iterator end () {return text.end();}
84 const_iterator end () const {return text.end();}
85
86 void erase(iterator pos) {text.erase(pos);}
87 void erase(iterator first, iterator last) {text.erase(first, last);}
88 void push_back(unsigned short c) {text.push_back(c);}
89 void pop_back() {text.pop_back();}
90 text_t &operator=(const text_t &x) {text=x.text; encoding=x.encoding; return *this;}
91 reference operator[](size_type n) {return text[n];};
92 const_reference operator[](size_type n) const {return text[n];};
93
94 bool empty () const {return text.empty();}
95 size_type size() const {return text.size();}
96 friend bool operator!=(const text_t& x, const text_t& y);
97 friend bool operator==(const text_t& x, const text_t& y);
98 friend bool operator<(const text_t& x, const text_t& y);
99 friend bool operator>(const text_t& x, const text_t& y);
100
101 // added functionality
102 void clear () {text.erase(text.begin(),text.end());}
103 void append (const text_t &t);
104 void appendrange (iterator first, iterator last);
105 void appendrange (const_iterator first, const_iterator last);
106 text_t &operator+= (const text_t &t) {append(t);return *this;}
107
108 // support for integers
109 void appendint (int i);
110 void setint (int i) {clear();appendint(i);}
111 text_t &operator=(int i) {setint (i);return *this;}
112 text_t &operator+= (int i) {appendint(i);return *this;}
113 int getint ();
114
115 // support for arrays of chars
116 void appendcarr (char *s, size_type len);
117 void setcarr (char *s, size_type len) {clear();appendcarr(s,len);}
118
119 // support for null-terminated C strings
120 void appendcstr (char *s);
121 void setcstr (char *s) {clear();appendcstr(s);}
122 text_t &operator= (char *s) {setcstr(s);return *this;} // c string
123 text_t &operator+= (char *s) {appendcstr(s);return *this;} // c string
124
125 // strings returned from getcarr and getcstr become the callers
126 // responsibility and should be deallocated with "delete"
127 char *getcarr(size_type &len) const;
128 char *getcstr() const;
129};
130
131
132inline text_t operator+(const text_t &t1, const text_t &t2)
133{
134 text_t tnew = t1;
135 tnew.append(t2);
136 return tnew;
137}
138
139inline text_t operator+(const text_t &t1, int i1)
140{
141 text_t tnew = t1;
142 tnew.appendint(i1);
143 return tnew;
144}
145
146inline text_t operator+(const text_t &t1, char *s1)
147{
148 text_t tnew = t1;
149 tnew.appendcstr(s1);
150 return tnew;
151}
152
153
154inline bool operator!=(const text_t& x, const text_t& y)
155{
156 return (x.text != y.text);
157}
158
159inline bool operator==(const text_t& x, const text_t& y)
160{
161 return (x.text == y.text);
162}
163
164inline bool operator<(const text_t& x, const text_t& y)
165{
166 return (x.text < y.text);
167}
168
169inline bool operator>(const text_t& x, const text_t& y)
170{
171 return (x.text > y.text);
172}
173
174
175struct eqtext_t
176{
177 bool operator()(const text_t t1, const text_t t2) const
178 { return t1 == t2; }
179};
180
181struct lttext_t
182{
183 bool operator()(const text_t t1, const text_t t2) const
184 { return t1 < t2; }
185};
186
187
188// frequently used derived types
189typedef set<text_t,lttext_t> text_tset;
190typedef list<text_t> text_tlist; // more efficient for insertions/deletions
191typedef vector<text_t> text_tarray; // more space efficient than text_tlist
192
193
194// general functions which work on text_ts
195
196// find a character within a range
197text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
198 unsigned short c);
199text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
200 unsigned short c);
201
202// get a string up to the next delimiter (which is skipped)
203text_t::const_iterator getdelimitstr (text_t::const_iterator first,
204 text_t::const_iterator last,
205 unsigned short c, text_t &outstr);
206text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
207 unsigned short c, text_t &outstr);
208
209// split a string with a character
210void splitchar (text_t::const_iterator first, text_t::const_iterator last,
211 unsigned short c, text_tlist &outlist);
212void splitchar (text_t::const_iterator first, text_t::const_iterator last,
213 unsigned short c, text_tarray &outlist);
214
215// join a string using a character
216void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
217void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
218
219// count the occurances of a character within a range
220int countchar (text_t::const_iterator first, text_t::const_iterator last,
221 unsigned short c);
222
223// return a substring of string from first up to but not including last
224text_t substr (text_t::const_iterator first, text_t::const_iterator last);
225
226
227
228// conversion classes used for getting information in to and out of
229// the text_t class.
230
231class convertclass
232{
233public:
234 enum status_t {finished, stopped, unfinished};
235
236 convertclass ();
237 virtual void reset ();
238};
239
240
241
242// convert from a char stream to the text_t class
243// the default version assumes the input is a ascii
244// character array
245class inconvertclass : public convertclass
246{
247public:
248 inconvertclass ();
249 void reset ();
250 void setinput (char *thestart, size_t thelen);
251
252 // output will be cleared before the conversion
253 virtual void convert (text_t &output, status_t &status);
254
255 // will treat the text_t as a 8-bit string and convert
256 // it to a 16-bit string using the about convert method.
257 text_t convert (const text_t &t);
258
259protected:
260 char *start;
261 size_t len;
262};
263
264// to get something which will do the conversion
265// to ascii declare a (non global!) instance like
266// this
267// inconvertclass ascii2text_t;
268
269
270// Convert from a text_t class to a char stream
271// This default version assumes the output is a ascii
272// character array. If you set the output stream you
273// can use this class to output to a stream using the
274// << operator. The << operator can also be conveniently
275// used to set the output stream by doing something like
276//
277// cout << text_t2ascii << textstr << anothertextstr;
278//
279// this class assumes that the input text doesn't change
280// while the conversion takes place
281class outconvertclass : public convertclass
282{
283public:
284 outconvertclass ();
285 void reset ();
286 void setinput (text_t *theinput);
287 // note that convert does not null-terminate the
288 // output array of characters
289 virtual void convert (char *output, size_t maxlen,
290 size_t &len, status_t &status);
291
292 // will convert the 16-bit string to a 8-bit stream
293 // and place the result in a text_t. This method uses
294 // the above convert function.
295 text_t convert (const text_t &t);
296
297 void setostream (ostream *theouts);
298 ostream *getostream ();
299
300protected:
301 text_t *input;
302 text_t::iterator texthere; // only valid if input is valid
303
304 ostream *outs;
305};
306
307// to get something which will do the conversion
308// to text_t declare a (non global!) instance like
309// this
310// outconvertclass text_t2ascii;
311
312
313// stream operators for the output class
314outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter);
315outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t);
316
317#endif
Note: See TracBrowser for help on using the repository browser.