source: trunk/gsdl/lib/text_t.cpp@ 114

Last change on this file since 114 was 114, checked in by rjmcnab, 25 years ago

Made the source more portable.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1/**********************************************************************
2 *
3 * text_t.cpp -- a simple 16-bit charater string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: text_t.cpp 114 1999-01-19 01:38:20Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.5 1999/01/19 01:38:14 rjmcnab
15
16 Made the source more portable.
17
18 Revision 1.4 1999/01/12 01:51:00 rjmcnab
19
20 Standard header.
21
22 Revision 1.3 1999/01/08 02:33:16 rjmcnab
23
24 Added standard header to source files.
25
26 */
27
28
29#include "text_t.h"
30
31#if defined(GSDL_USE_OBJECTSPACE)
32# include <ospace\std\algorithm>
33#elif defined(GSDL_USE_STL_H)
34# if defined(GSDL_USE_ALGO_H)
35# include <algo.h>
36# else
37# include <algorithm.h>
38# endif
39#else
40# include <algorithm>
41#endif
42
43
44#include "unitool.h"
45
46////////////////////////////////////
47// text_t methods
48////////////////////////////////////
49
50text_t::text_t ()
51{
52 setencoding(0);
53 clear ();
54}
55
56text_t::text_t (int i)
57{
58 setencoding(0);
59 clear ();
60 appendint (i);
61}
62
63text_t::text_t (char *s)
64{
65 setencoding(0);
66 clear ();
67 appendcstr (s);
68}
69
70void text_t::append (const text_t &t)
71{
72 const_iterator here, end=t.end();
73 for (here=t.begin(); here!=end;here++)
74 {
75 text.push_back(*here);
76 }
77}
78
79void text_t::appendrange (iterator first, iterator last)
80{
81 while (first != last)
82 {
83 text.push_back (*first);
84 first++;
85 }
86}
87
88void text_t::appendrange (const_iterator first, const_iterator last)
89{
90 while (first != last)
91 {
92 text.push_back (*first);
93 first++;
94 }
95}
96
97void text_t::appendint (int i)
98{
99 // deal with zeros and negatives
100 if (i == 0)
101 {
102 text.push_back('0');
103 return;
104 }
105 else if (i < 0)
106 {
107 text.push_back('-');
108 i *= -1;
109 }
110
111 // get a buffer for the conversion
112 int maxbuflen = sizeof(int)*3;
113 char *buf = new char[maxbuflen];
114 int len = 0;
115
116 // get the number in reverse
117 while (i > 0)
118 {
119 buf[len++] = '0'+ (i%10);
120 i = i/10;
121 }
122
123 // reverse the number
124 while (len > 0)
125 {
126 text.push_back(buf[--len]);
127 }
128
129 delete buf;
130}
131
132int text_t::getint ()
133{
134 int i = 0;
135 int mult = 1; // become -1 for negative numbers
136
137 iterator here = text.begin();
138 iterator end = text.end();
139
140 // do plus and minus signs
141 if (here != end)
142 {
143 if (*here == '-')
144 {
145 mult = -1;
146 here++;
147 }
148 else if (*here == '+')
149 {
150 mult = 1;
151 here++;
152 }
153 }
154
155 // deal with the number
156 while ((here != end) && (*here >= '0') && (*here <= '9'))
157 {
158 i = 10*i + (*here - '0');
159 here++;
160 }
161
162 i *= mult;
163 return i;
164}
165
166
167
168void text_t::appendcarr (char *s, size_type len)
169{
170 unsigned char *us = (unsigned char *)s;
171 while (len > 0)
172 {
173 text.push_back (*us); // append this character
174 us++;
175 len--;
176 }
177}
178
179void text_t::appendcstr (char *s)
180{
181 unsigned char *us = (unsigned char *)s;
182 while (*us != '\0')
183 {
184 text.push_back (*us); // append this character
185 us++;
186 }
187}
188
189
190// strings returned from getcarr and getcstr become the callers
191// responsibility and should be deallocated with "delete"
192
193char *text_t::getcarr(size_type &len) const
194{
195 unsigned char *cstr = new unsigned char[size()];
196 const_iterator ithere = begin();
197 const_iterator itend = end();
198
199 while (ithere != itend)
200 {
201 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere);
202 else {
203 // put a space or a question mark depending on what
204 // the character is. Question marks tell the user that
205 // they are missing some information.
206 if (is_unicode_space (*ithere)) cstr[len] = ' ';
207 else cstr[len] = '?';
208 }
209 len++;
210 ithere++;
211 }
212
213 return (char *)cstr;
214}
215
216char *text_t::getcstr() const
217{
218 unsigned char *cstr = new unsigned char[size() + 1];
219 const_iterator ithere = begin();
220 const_iterator itend = end();
221 int len = 0;
222
223 while (ithere != itend)
224 {
225 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere);
226 else {
227 // put a space or a question mark depending on what
228 // the character is. Question marks tell the user that
229 // they are missing some information.
230 if (is_unicode_space (*ithere)) cstr[len] = ' ';
231 else cstr[len] = '?';
232 }
233 len++;
234 ithere++;
235 }
236
237 cstr[len] = '\0';
238
239 return (char *)cstr;
240}
241
242
243// general functions which work on text_ts
244
245// find a character within a range
246text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
247 unsigned short c)
248{
249 while (first != last)
250 {
251 if (*first == c) break;
252 first++;
253 }
254 return first;
255}
256
257text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
258 unsigned short c)
259{
260 while (first != last)
261 {
262 if (*first == c) break;
263 first++;
264 }
265 return first;
266}
267
268// get a string up to the next delimiter (which is skipped)
269text_t::const_iterator getdelimitstr (text_t::const_iterator first,
270 text_t::const_iterator last,
271 unsigned short c, text_t &outstr)
272{
273 text_t::const_iterator here = first;
274 here = findchar (first, last, c);
275 outstr.clear();
276 outstr.appendrange (first, here);
277 if (here != last) here++; // skip c
278 return here;
279}
280
281text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
282 unsigned short c, text_t &outstr)
283{
284 text_t::iterator here = first;
285 here = findchar (first, last, c);
286 outstr.clear();
287 outstr.appendrange (first, here);
288 if (here != last) here++; // skip c
289 return here;
290}
291
292// split a string with a character
293void splitchar (text_t::const_iterator first, text_t::const_iterator last,
294 unsigned short c, text_tlist &outlist)
295{
296 outlist.erase(outlist.begin(), outlist.end());
297
298 text_t t;
299
300 while (first != last)
301 {
302 first = getdelimitstr (first, last, c, t);
303 outlist.push_back (t);
304 }
305}
306
307void splitchar (text_t::const_iterator first, text_t::const_iterator last,
308 unsigned short c, text_tarray &outlist)
309{
310 outlist.erase(outlist.begin(), outlist.end());
311
312 text_t t;
313
314 while (first != last)
315 {
316 first = getdelimitstr (first, last, c, t);
317 outlist.push_back (t);
318 }
319}
320
321// join a string using a character
322void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
323{
324 outtext.clear ();
325
326 text_tlist::const_iterator here = inlist.begin ();
327 text_tlist::const_iterator end = inlist.end ();
328 bool first = true;
329 while (here != end)
330 {
331 if (!first) outtext.push_back (c);
332 first = false;
333 outtext += *here;
334 here++;
335 }
336}
337
338void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
339{
340 outtext.clear ();
341
342 text_tarray::const_iterator here = inlist.begin ();
343 text_tarray::const_iterator end = inlist.end ();
344 bool first = true;
345 while (here != end)
346 {
347 if (!first) outtext.push_back (c);
348 first = false;
349 outtext += *here;
350 here++;
351 }
352}
353
354// count the occurances of a character within a range
355int countchar (text_t::const_iterator first, text_t::const_iterator last,
356 unsigned short c)
357{
358 int count = 0;
359 while (first != last) {
360 if (*first == c) count ++;
361 first ++;
362 }
363 return count;
364}
365
366
367
368////////////////////////////////////
369// convertclass methods
370////////////////////////////////////
371
372// conversion classes used for getting information in to and out of
373// the text_t class.
374
375convertclass::convertclass ()
376{
377 // nothing to do
378}
379
380void convertclass::reset ()
381{
382 // nothing to do
383}
384
385
386////////////////////////////////////
387// inconvertclass methods
388////////////////////////////////////
389
390// convert from a char stream to the text_t class
391// the default version assumes the input is a ascii
392// character array
393
394inconvertclass::inconvertclass ()
395{
396 start = NULL;
397 len = 0;
398}
399
400
401void inconvertclass::reset ()
402{
403 start = NULL;
404 len = 0;
405}
406
407void inconvertclass::setinput (char *thestart, size_t thelen)
408{
409 start = thestart;
410 len = thelen;
411}
412
413void inconvertclass::convert (text_t &output, status_t &status)
414{
415 output.clear();
416
417 if (start == NULL || len == 0)
418 {
419 status = finished;
420 return;
421 }
422
423 // don't want any funny sign conversions happening
424 unsigned char *here = (unsigned char *)start;
425 while (len > 0)
426 {
427 output.push_back (*here); // append this character
428 ++here;
429 --len;
430 }
431
432 start = (char *)here; // save current position
433 status = finished;
434}
435
436// will treat the text_t as a 8-bit string and convert
437// it to a 16-bit string using the about convert method.
438text_t inconvertclass::convert (const text_t &t) {
439 text_t out;
440 text_t tmpout;
441 status_t status;
442 text_t::const_iterator here = t.begin();
443 text_t::const_iterator end = t.end();
444 unsigned char cbuf[256];
445 size_t cbuflen = 0;
446
447 while (here != end) {
448 while (here != end && cbuflen < 256) {
449 cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
450 here++;
451 }
452
453 if (cbuflen > 0) {
454 setinput ((char *)cbuf, cbuflen);
455 status = unfinished;
456 while (status == unfinished) {
457 convert (tmpout, status);
458 out += tmpout;
459 }
460 cbuflen = 0;
461 }
462 }
463
464 out.setencoding (0); // unicode
465
466 return out;
467}
468
469// an instance of the default inconvertclass to do simple
470// conversions. Note that any functions that use this are
471// not reentrant. If a function needs to be reentrant it
472// should declare its own instance.
473inconvertclass ascii2text_t;
474
475
476////////////////////////////////////
477// outconvertclass methods
478////////////////////////////////////
479
480// Convert from a text_t class to a char stream
481// This default version assumes the output is a ascii
482// character array. If you set the output stream you
483// can use this class to output to a stream using the
484// << operator. The << operator can also be conveniently
485// used to set the output stream by doing something like
486//
487// cout << text_t2ascii << text_tstr << anothertext_tstr;
488//
489outconvertclass::outconvertclass ()
490{
491 input = NULL;
492 outs = NULL;
493}
494
495void outconvertclass::reset ()
496{
497 input = NULL;
498 outs = NULL;
499}
500
501void outconvertclass::setinput (text_t *theinput)
502{
503 input = theinput;
504 if (input != NULL) texthere = input->begin();
505}
506
507void outconvertclass::convert (char *output, size_t maxlen,
508 size_t &len, status_t &status)
509{
510 if (input == NULL || output == NULL)
511 {
512 status = finished;
513 return;
514 }
515
516 // don't want any funny sign conversions happening
517 unsigned char *uoutput = (unsigned char *)output;
518 text_t::iterator textend = input->end();
519 len = 0;
520 while ((len < maxlen) && (texthere != textend))
521 {
522 if (*texthere < 256) *uoutput = (unsigned char)(*texthere);
523 else {
524 // put a space or a question mark depending on what
525 // the character is. Question marks tell the user that
526 // they are missing some information.
527 if (is_unicode_space (*texthere)) *uoutput = ' ';
528 else *uoutput = '?';
529 }
530 ++uoutput;
531 ++len;
532 ++texthere;
533 }
534
535 if (texthere == textend) status = finished;
536 else status = unfinished;
537}
538
539// will convert the 16-bit string to a 8-bit stream
540// and place the result in a text_t. This method uses
541// the above convert function.
542text_t outconvertclass::convert (const text_t &t) {
543 text_t out;
544 unsigned char cbuf[256];
545 size_t cbuflen = 0;
546 status_t status = unfinished;
547
548 setinput ((text_t *)&t); // discard constant
549 while (status == unfinished) {
550 convert ((char *)cbuf, 256, cbuflen, status);
551 out.appendcarr ((char *)cbuf, cbuflen);
552 }
553
554 out.setencoding (1); // other encoding
555
556 return out;
557}
558
559
560void outconvertclass::setostream (ostream *theouts)
561{
562 outs = theouts;
563}
564
565ostream *outconvertclass::getostream ()
566{
567 return outs;
568}
569
570
571
572
573// an instance of the default outconvertclass to do simple
574// conversions
575outconvertclass text_t2ascii;
576
577
578
579// stream operators for the output class
580
581outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
582{
583 outconverter.setostream(&theouts);
584 return outconverter;
585}
586
587
588#define STREAMBUFSIZE 256
589outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
590{
591 ostream *outstream = outconverter.getostream();
592
593 if (outstream == NULL) return outconverter;
594
595 char outbuf[STREAMBUFSIZE];
596 size_t len;
597 outconvertclass::status_t status = outconvertclass::unfinished;
598
599 // assume that there is no data needing converting
600 // left in the converter
601 outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
602
603 while (status == outconvertclass::unfinished)
604 {
605 outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
606 if (len > 0) outstream->write(outbuf, len);
607 }
608
609 return outconverter;
610}
Note: See TracBrowser for help on using the repository browser.