source: main/trunk/greenstone2/common-src/src/lib/text_t.cpp@ 24112

Last change on this file since 24112 was 24112, checked in by ak19, 13 years ago

Sam fixed a vector iteration bug

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
RevLine 
[1076]1/**********************************************************************
2 *
3 * text_t.cpp -- a simple 16-bit character string class
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[1860]24 * $Id: text_t.cpp 24112 2011-06-03 02:12:23Z ak19 $
25 *
[1076]26 *********************************************************************/
27
28#include "text_t.h"
29
30#if defined(GSDL_USE_OBJECTSPACE)
31# include <ospace\std\algorithm>
32#elif defined(GSDL_USE_STL_H)
33# if defined(GSDL_USE_ALGO_H)
34# include <algo.h>
35# else
36# include <algorithm.h>
37# endif
38#else
39# include <algorithm>
40#endif
41
[1860]42#ifdef HAVE_CONFIG_H
43# ifdef __WIN32__
[14909]44# include "win32cfg.h"
[1860]45# else
46# include "config.h"
47# endif
48#endif
[1076]49
[18880]50#include <cstring>
[1860]51
[1076]52#include "unitool.h"
[24110]53#include <iostream>
[1076]54
[7382]55const text_t g_EmptyText("");
56
[1076]57////////////////////////////////////
58// text_t methods
59////////////////////////////////////
60
[1860]61// new stream converter ...
[8727]62ostream& operator<< (ostream &o, const text_t &text)
[1860]63{
64 text_t::const_iterator ithere = text.begin();
65 text_t::const_iterator itend = text.end();
66
67 while (ithere != itend)
68 {
69 if (*ithere < 256)
70 {
71 o << (unsigned char)(*ithere);
72 }
73 else
74 {
75 // put a space or a question mark depending on what
76 // the character is. Question marks tell the user that
77 // they are missing some information.
78 if (is_unicode_space (*ithere))
79 o << ' ';
80 else
81 o << '?';
82 }
[8727]83 ++ithere;
[1860]84 }
85
86 return o;
87}
88
[1076]89text_t::text_t ()
90{
91 setencoding(0);
92 clear ();
93}
94
95text_t::text_t (int i)
96{
97 setencoding(0);
98 clear ();
99 appendint (i);
100}
101
[8727]102text_t::text_t (const char *s)
[1076]103{
104 setencoding(0);
105 clear ();
106 appendcstr (s);
107}
108
[8727]109text_t::text_t (const char *s, size_type nLength)
110{
111 setencoding(0);
112 clear ();
113 appendcarr(s, nLength);
114}
[1860]115
[8727]116
[1076]117void text_t::append (const text_t &t)
118{
119 text.insert(text.end(), t.begin(), t.end());
120}
121
122void text_t::appendrange (iterator first, iterator last)
123{
124 text.insert(text.end(), first, last);
125}
126
127void text_t::appendrange (const_iterator first, const_iterator last)
128{
129 text.insert(text.end(), first, last);
130}
131
132void text_t::appendint (int i)
133{
134 // deal with zeros and negatives
135 if (i == 0)
136 {
137 text.push_back('0');
138 return;
139 }
140 else if (i < 0)
141 {
142 text.push_back('-');
143 i *= -1;
144 }
145
146 // get a buffer for the conversion
147 int maxbuflen = sizeof(int)*3;
148 char *buf = new char[maxbuflen];
149 int len = 0;
150
151 // get the number in reverse
152 while (i > 0)
153 {
154 buf[len++] = '0'+ (i%10);
155 i = i/10;
156 }
157
158 // reverse the number
159 while (len > 0)
160 {
161 text.push_back(buf[--len]);
162 }
163
[8727]164 delete []buf;
[1076]165}
166
167int text_t::getint () const
168{
169 int i = 0;
170 int mult = 1; // become -1 for negative numbers
171
172 const_iterator here = text.begin();
173 const_iterator end = text.end();
174
175 // do plus and minus signs
176 if (here != end)
177 {
178 if (*here == '-')
179 {
180 mult = -1;
[9593]181 ++here;
[1076]182 }
183 else if (*here == '+')
184 {
185 mult = 1;
[8727]186 ++here;
[1076]187 }
188 }
189
190 // deal with the number
191 while ((here != end) && (*here >= '0') && (*here <= '9'))
192 {
193 i = 10*i + (*here - '0');
[8727]194 ++here;
[1076]195 }
196
197 i *= mult;
198 return i;
199}
200
[2487]201unsigned long text_t::getulong () const
202{
203 unsigned long i = 0;
[1076]204
[2487]205 const_iterator here = text.begin();
206 const_iterator end = text.end();
[1076]207
[2487]208 while ((here != end) && (*here >= '0') && (*here <= '9'))
209 {
210 i = 10*i + (*here - '0');
[8727]211 ++here;
[2487]212 }
213
214 return i;
215}
216
[8727]217void text_t::appendcarr (const char *s, size_type len)
[1076]218{
219 unsigned char *us = (unsigned char *)s;
[8727]220 if (text.capacity() < (text.size() + len + 2)) {
221 text.reserve(text.size() + len + 2);
222 }
223
[1076]224 while (len > 0)
225 {
226 text.push_back (*us); // append this character
[8727]227 ++us;
228 --len;
[1076]229 }
230}
231
[8727]232void text_t::appendcstr (const char *s)
[1076]233{
[8727]234 size_t len = strlen(s);
235 if (text.capacity() < (text.size() + len + 2)) {
236 text.reserve(text.size() + len + 2);
237 }
238
[1076]239 unsigned char *us = (unsigned char *)s;
240 while (*us != '\0')
241 {
242 text.push_back (*us); // append this character
[8727]243 ++us;
[1076]244 }
245}
246
247
248// strings returned from getcarr and getcstr become the callers
[8727]249// responsibility and should be deallocated with "delete []"
[1076]250
251char *text_t::getcarr(size_type &len) const
252{
253 unsigned char *cstr = new unsigned char[size()];
254 len = 0;
255
256 const_iterator ithere = begin();
257 const_iterator itend = end();
258 while (ithere != itend)
259 {
260 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere);
261 else {
262 // put a space or a question mark depending on what
263 // the character is. Question marks tell the user that
264 // they are missing some information.
265 if (is_unicode_space (*ithere)) cstr[len] = ' ';
266 else cstr[len] = '?';
267 }
[8727]268 ++len;
269 ++ithere;
[1076]270 }
271
272 return (char *)cstr;
273}
274
275char *text_t::getcstr() const
276{
277 unsigned char *cstr = new unsigned char[size() + 1];
278 const_iterator ithere = begin();
279 const_iterator itend = end();
280 int len = 0;
281
282 while (ithere != itend)
283 {
284 if (*ithere < 256) cstr[len] = (unsigned char)(*ithere);
285 else {
286 // put a space or a question mark depending on what
287 // the character is. Question marks tell the user that
288 // they are missing some information.
289 if (is_unicode_space (*ithere)) cstr[len] = ' ';
290 else cstr[len] = '?';
291 }
[8727]292 ++len;
293 ++ithere;
[1076]294 }
295
296 cstr[len] = '\0';
297
298 return (char *)cstr;
299}
300
301
[14342]302int text_t::replace(text_t toreplace, text_t replacement)
303{
304 // Get the beginning and end of the current text
305 text_t::iterator text_begin = text.begin(), text_end = text.end();
306 int count = 0;
307 text_t new_text, temp_text;
308
309 // Loop through and grab the text off the end
310 while (text_begin < text_end)
311 {
312 // Find where the next toreplace is
313 text_t::iterator next_toreplace = findword(text_begin, text_end, toreplace);
314
[15077]315 // We've found a match
316 if (next_toreplace != text_end)
[14342]317 {
[15077]318 new_text.append(substr(text_begin, next_toreplace));
319 new_text.append(replacement);
320 count++;
321 text_begin = next_toreplace + toreplace.size();
[14342]322 }
[15077]323 // We haven't found a match
[14342]324 else
325 {
[15077]326 new_text.append(substr(text_begin, text_end));
327 text_begin = text_end;
[14342]328 }
329 }
330
331 text.clear();
332 text = new_text.text_as_usvector();
333 return count;
334}
335
336
[1076]337// general functions which work on text_ts
338
339// find a character within a range
340text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
341 unsigned short c)
342{
343 while (first != last)
344 {
345 if (*first == c) break;
[8727]346 ++first;
[1076]347 }
348 return first;
349}
350
351text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
352 unsigned short c)
353{
354 while (first != last)
355 {
356 if (*first == c) break;
[8727]357 ++first;
[1076]358 }
359 return first;
360}
361
[18700]362text_t::iterator findlastchar (text_t::iterator first, text_t::iterator last_plus_one,
[12504]363 unsigned short c)
364{
[18821]365 text_t::iterator current = (last_plus_one != first) ? last_plus_one - 1 : first;
[12504]366 while (current != first) {
367 if (*current == c) break;
368 --current;
369 }
370 if (current == first) {
371 if (*current == c) return current;
[18700]372 return last_plus_one;
[12504]373 }
374
375 return current;
376}
377
[16066]378text_t::const_iterator findword (text_t::const_iterator first,
379 text_t::const_iterator last,
380 const text_t& word)
381{
382 text_t::const_iterator word_begin = word.begin();
383 text_t::const_iterator word_end = word.end();
384
385 while (first != last)
386 {
387 text_t::const_iterator char_match = first;
388 text_t::const_iterator word_here = word_begin;
[20762]389 while (word_here != word_end && char_match != last)
[16066]390 {
391 if (*char_match != *word_here)
392 {
393 break;
394 }
395 ++char_match;
396 ++word_here;
397 }
398 if (word_here==word_end)
399 {
400 return first;
401 }
402 ++first;
403 }
404 return last; // get to here only if there is no match
405}
406
[8727]407text_t::iterator findword (text_t::iterator first,
408 text_t::iterator last,
[1860]409 const text_t& word)
410{
411 text_t::const_iterator word_begin = word.begin();
412 text_t::const_iterator word_end = word.end();
413
414 while (first != last)
415 {
416 text_t::iterator char_match = first;
417 text_t::const_iterator word_here = word_begin;
[20762]418 while (word_here != word_end && char_match != last)
[1860]419 {
420 if (*char_match != *word_here)
421 {
422 break;
423 }
[8727]424 ++char_match;
425 ++word_here;
[1860]426 }
427 if (word_here==word_end)
428 {
429 return first;
430 }
[8727]431 ++first;
[1860]432 }
433 return last; // get to here only if there is no match
434}
435
[1076]436// get a string up to the next delimiter (which is skipped)
437text_t::const_iterator getdelimitstr (text_t::const_iterator first,
438 text_t::const_iterator last,
439 unsigned short c, text_t &outstr)
440{
441 text_t::const_iterator here = first;
442 here = findchar (first, last, c);
443 outstr.clear();
444 outstr.appendrange (first, here);
[8727]445 if (here != last) ++here; // skip c
[1076]446 return here;
447}
448
449text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
450 unsigned short c, text_t &outstr)
451{
452 text_t::iterator here = first;
453 here = findchar (first, last, c);
454 outstr.clear();
455 outstr.appendrange (first, here);
[8727]456 if (here != last) ++here; // skip c
[1076]457 return here;
458}
459
[16066]460text_t::const_iterator getdelimitstr (text_t::const_iterator first, text_t::const_iterator last,
461 text_t w, text_t &outstr)
462{
463 text_t::const_iterator here = first;
464 here = findword (first, last, w);
465 outstr.clear();
466 outstr.appendrange (first, here);
467 if (here != last) here += w.size(); // skip w
468 return here;
469}
470
[1076]471// split a string with a character
472void splitchar (text_t::const_iterator first, text_t::const_iterator last,
473 unsigned short c, text_tset &outlist)
474{
475 outlist.erase(outlist.begin(), outlist.end());
476
477 text_t t;
478
479 while (first != last)
480 {
481 first = getdelimitstr (first, last, c, t);
482 outlist.insert (t);
483 }
484}
485
486void splitchar (text_t::const_iterator first, text_t::const_iterator last,
487 unsigned short c, text_tlist &outlist)
488{
489 outlist.erase(outlist.begin(), outlist.end());
490
491 text_t t;
492
493 while (first != last)
494 {
495 first = getdelimitstr (first, last, c, t);
496 outlist.push_back (t);
497 }
498}
499
500void splitchar (text_t::const_iterator first, text_t::const_iterator last,
501 unsigned short c, text_tarray &outlist)
502{
503 outlist.erase(outlist.begin(), outlist.end());
504
505 text_t t;
506
507 while (first != last)
508 {
509 first = getdelimitstr (first, last, c, t);
510 outlist.push_back (t);
511 }
512}
513
[16066]514void splitword (text_t::const_iterator first, text_t::const_iterator last,
515 text_t w, text_tlist &outlist)
516{
517 outlist.erase(outlist.begin(), outlist.end());
518
519 text_t t;
520
521 while (first != last)
522 {
523 first = getdelimitstr (first, last, w, t);
524 outlist.push_back (t);
525 }
526}
527
[1076]528// join a string using a character
529void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext)
530{
531 outtext.clear ();
532
533 text_tset::const_iterator here = inlist.begin ();
534 text_tset::const_iterator end = inlist.end ();
[8727]535
536 if (here != end) {
537 outtext += *here; ++here;
538 while (here != end) {
539 outtext.push_back (c);
[1076]540 outtext += *here;
[8727]541 ++here;
[1076]542 }
[8727]543 }
[1076]544}
545
546void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
547{
548 outtext.clear ();
549
550 text_tlist::const_iterator here = inlist.begin ();
551 text_tlist::const_iterator end = inlist.end ();
[8727]552 if (here != end) {
553 outtext += *here; ++here;
554 while (here != end) {
555 outtext.push_back (c);
[1076]556 outtext += *here;
[8727]557 ++here;
[1076]558 }
[8727]559 }
[1076]560}
561
562void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
563{
564 outtext.clear ();
565
566 text_tarray::const_iterator here = inlist.begin ();
567 text_tarray::const_iterator end = inlist.end ();
[8727]568 if (here != end) {
569 outtext += *here; ++here;
570 while (here != end) {
571 outtext.push_back (c);
[1076]572 outtext += *here;
[8727]573 ++here;
[1076]574 }
[8727]575 }
[1076]576}
577
[8727]578void joinchar (const text_tlist &inlist, const text_t &c, text_t &outtext)
[1088]579{
580 outtext.clear ();
581
582 text_tlist::const_iterator here = inlist.begin ();
583 text_tlist::const_iterator end = inlist.end ();
[8727]584 if (here != end) {
585 outtext += *here; ++here;
586 while (here != end) {
587 outtext += c;
[1088]588 outtext += *here;
[8727]589 ++here;
[1088]590 }
[8727]591 }
[1088]592}
593
[8727]594void joinchar (const text_tset &inlist, const text_t &c, text_t &outtext)
[1088]595{
596 outtext.clear ();
597
598 text_tset::const_iterator here = inlist.begin ();
599 text_tset::const_iterator end = inlist.end ();
[8727]600 if (here != end) {
601 outtext += *here; ++here;
602 while (here != end) {
603 outtext += c;
[1088]604 outtext += *here;
[8727]605 ++here;
[1088]606 }
[8727]607 }
[1088]608}
609
[8727]610void joinchar (const text_tarray &inlist, const text_t &c, text_t &outtext)
[1076]611{
612 outtext.clear ();
613
614 text_tarray::const_iterator here = inlist.begin ();
615 text_tarray::const_iterator end = inlist.end ();
[8727]616 if (here != end) {
617 outtext += *here; ++here;
618 while (here != end) {
619 outtext += c;
[1076]620 outtext += *here;
[8727]621 ++here;
[1076]622 }
[8727]623 }
[1076]624}
625
626// count the occurances of a character within a range
627int countchar (text_t::const_iterator first, text_t::const_iterator last,
628 unsigned short c)
629{
630 int count = 0;
631 while (first != last) {
[9593]632 if (*first == c) ++count;
633 ++first;
[1076]634 }
635 return count;
636}
637
638// return a substring of string from first up to but not including last
639text_t substr (text_t::const_iterator first, text_t::const_iterator last) {
640
[8727]641 text_t substr; substr.reserve(last - first + 2);
[1076]642 while (first != last) {
643 substr.push_back(*first);
[8727]644 ++first;
[1076]645 }
646 return substr;
647}
648
649
650// convert to lowercase
651void lc (text_t::iterator first, text_t::iterator last) {
652 while (first != last) {
653 *first = unicode_tolower(*first);
[8727]654 ++first;
[1076]655 }
656}
657
658// convert to uppercase
659void uc (text_t::iterator first, text_t::iterator last) {
660 while (first != last) {
661 *first = unicode_toupper(*first);
[8727]662 ++first;
[1076]663 }
664}
665
666
667// checks to see if it is a number (i.e. contains only 0-9)
668bool is_number (const text_t &text) {
669
670 text_t::const_iterator here = text.begin();
671 text_t::const_iterator end = text.end();
672
673 while (here != end) {
674 if ((*here!='0') && (*here!='1') && (*here!='2') &&
675 (*here!='3') && (*here!='4') && (*here!='5') &&
676 (*here!='6') && (*here!='7') && (*here!='8') &&
677 (*here!='9')) return false;
[8727]678 ++here;
[1076]679 }
680 return true;
681}
682
683
684// checks to see if the text has any letters or digits
685bool has_unicode_letdig (const text_t &text) {
686 if (text.empty()) return false;
687
688 text_t::const_iterator here = text.begin();
689 text_t::const_iterator end = text.end();
690 while (here != end) {
691 if (is_unicode_letdig (*here)) return true;
[8727]692 ++here;
[1076]693 }
694
695 return false;
696}
697
[10140]698// checks to see if a text_t starts with the specified prefix
699bool starts_with(const text_t& text, const text_t& prefix) {
700 if (prefix.empty()) return true;
701 if (text.empty() || text.size()<prefix.size()) return false;
702 text_t substring = substr(text.begin(), text.begin()+prefix.size());
703 return substring == prefix;
704}
705// checks to see if a text_t ends with the specified suffix
706bool ends_with(const text_t& text, const text_t& suffix) {
707 if (suffix.empty()) return true;
708 if (text.empty() || text.size() < suffix.size()) return false;
709 text_t substring = substr(text.end()-suffix.size(),text.end());
710 return substring == suffix;
[1076]711
[10140]712}
[1076]713
[24110]714//Trims the whitespace off the beginning and end of a given string
715text_t trim (const text_t& text) {
[10140]716
[24110]717 if(text.size() == 0) {
718 return text;
719 }
720
721 text_t::const_iterator firstLetter = text.begin();
722 text_t::const_iterator lastLetter = text.end();
723
724 //Find the start
725 while (firstLetter != lastLetter) {
726 if(!is_unicode_space(*firstLetter)) {
727 break;
728 }
729 firstLetter++;
730 }
731
732 //Find the end
[24112]733 lastLetter-=1;
[24110]734 while (lastLetter != firstLetter) {
735 if(!is_unicode_space(*lastLetter)) {
736 break;
737 }
738 lastLetter--;
739 }
740
741 return substr(firstLetter, lastLetter+1);
742}
743
[1076]744////////////////////////////////////
745// convertclass methods
746////////////////////////////////////
747
748// conversion classes used for getting information in to and out of
749// the text_t class.
750
751convertclass::convertclass ()
752{
753 // nothing to do
754}
755
[22141]756convertclass::~convertclass ()
757{
758 // nothing to do
759}
760
[1076]761void convertclass::reset ()
762{
763 // nothing to do
764}
765
766
767////////////////////////////////////
768// inconvertclass methods
769////////////////////////////////////
770
771// convert from a char stream to the text_t class
772// the default version assumes the input is a ascii
773// character array
774
775inconvertclass::inconvertclass ()
776{
777 start = NULL;
778 len = 0;
779}
780
[22141]781inconvertclass::~inconvertclass ()
782{
783 // nothing to do
784}
[1076]785
[22141]786
[1076]787void inconvertclass::reset ()
788{
789 start = NULL;
790 len = 0;
791}
792
793void inconvertclass::setinput (char *thestart, size_t thelen)
794{
795 start = thestart;
796 len = thelen;
797}
798
799void inconvertclass::convert (text_t &output, status_t &status)
800{
801 output.clear();
802
803 if (start == NULL || len == 0)
804 {
805 status = finished;
806 return;
807 }
808
[8727]809 if (output.capacity() < len + 2)
810 output.reserve(len + 2);
811
[1076]812 // don't want any funny sign conversions happening
813 unsigned char *here = (unsigned char *)start;
814 while (len > 0)
815 {
816 output.push_back (*here); // append this character
817 ++here;
818 --len;
819 }
820
821 start = (char *)here; // save current position
822 status = finished;
823}
824
825// will treat the text_t as a 8-bit string and convert
826// it to a 16-bit string using the about convert method.
827text_t inconvertclass::convert (const text_t &t) {
828 text_t out;
829 text_t tmpout;
830 status_t status;
831 text_t::const_iterator here = t.begin();
832 text_t::const_iterator end = t.end();
833 unsigned char cbuf[256];
834 size_t cbuflen = 0;
835
[8727]836 out.clear();
837 if (out.capacity() < t.size() + 2)
838 out.reserve(t.size() + 2);
[1076]839 while (here != end) {
840 while (here != end && cbuflen < 256) {
841 cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
[8727]842 ++here;
[1076]843 }
844
845 if (cbuflen > 0) {
846 setinput ((char *)cbuf, cbuflen);
847 status = unfinished;
848 while (status == unfinished) {
849 convert (tmpout, status);
850 out += tmpout;
851 }
852 cbuflen = 0;
853 }
854 }
855
856 out.setencoding (0); // unicode
857
858 return out;
859}
860
861// an instance of the default inconvertclass to do simple
862// conversions. Note that any functions that use this are
863// not reentrant. If a function needs to be reentrant it
864// should declare its own instance.
865inconvertclass ascii2text_t;
866
867
868////////////////////////////////////
869// outconvertclass methods
870////////////////////////////////////
871
872// Convert from a text_t class to a char stream
873// This default version assumes the output is a ascii
874// character array. If you set the output stream you
875// can use this class to output to a stream using the
876// << operator. The << operator can also be conveniently
877// used to set the output stream by doing something like
878//
879// cout << text_t2ascii << text_tstr << anothertext_tstr;
880//
881outconvertclass::outconvertclass ()
882{
883 input = NULL;
884 outs = NULL;
885}
886
[22141]887outconvertclass::~outconvertclass ()
888{
889 // nothing to do
890}
891
892
[1076]893void outconvertclass::reset ()
894{
895 input = NULL;
896 outs = NULL;
897}
898
899void outconvertclass::setinput (text_t *theinput)
900{
901 input = theinput;
902 if (input != NULL) texthere = input->begin();
903}
904
[8727]905void outconvertclass::setdata(text_t *theinput, text_t::iterator thetexthere)
906{
907 input = theinput;
908 texthere = thetexthere;
909}
910
[1076]911void outconvertclass::convert (char *output, size_t maxlen,
912 size_t &len, status_t &status)
913{
914 if (input == NULL || output == NULL)
915 {
916 status = finished;
917 return;
918 }
919
920 // don't want any funny sign conversions happening
921 unsigned char *uoutput = (unsigned char *)output;
922 text_t::iterator textend = input->end();
923 len = 0;
924 while ((len < maxlen) && (texthere != textend))
925 {
926 if (*texthere < 256) *uoutput = (unsigned char)(*texthere);
927 else {
928 // put a space or a question mark depending on what
929 // the character is. Question marks tell the user that
930 // they are missing some information.
931 if (is_unicode_space (*texthere)) *uoutput = ' ';
932 else *uoutput = '?';
933 }
934 ++uoutput;
935 ++len;
936 ++texthere;
937 }
938
939 if (texthere == textend) status = finished;
940 else status = unfinished;
941}
942
943// will convert the 16-bit string to a 8-bit stream
944// and place the result in a text_t. This method uses
945// the above convert function.
946text_t outconvertclass::convert (const text_t &t) {
947 text_t out;
948 unsigned char cbuf[256];
949 size_t cbuflen = 0;
950 status_t status = unfinished;
[8727]951
952 out.clear();
953 if (out.capacity() < t.size() + 2)
954 out.reserve(t.size() + 2);
[1076]955 setinput ((text_t *)&t); // discard constant
956 while (status == unfinished) {
957 convert ((char *)cbuf, 256, cbuflen, status);
958 out.appendcarr ((char *)cbuf, cbuflen);
959 }
960
961 out.setencoding (1); // other encoding
962
963 return out;
964}
965
966
967void outconvertclass::setostream (ostream *theouts)
968{
969 outs = theouts;
970}
971
972ostream *outconvertclass::getostream ()
973{
974 return outs;
975}
976
977
978
979
980// an instance of the default outconvertclass to do simple
981// conversions
982outconvertclass text_t2ascii;
983
984
985
986// stream operators for the output class
987
988outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
989{
990 outconverter.setostream(&theouts);
991 return outconverter;
992}
993
994
995#define STREAMBUFSIZE 256
996outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
997{
998 ostream *outstream = outconverter.getostream();
999
1000 if (outstream == NULL) return outconverter;
1001
1002 char outbuf[STREAMBUFSIZE];
1003 size_t len;
1004 outconvertclass::status_t status = outconvertclass::unfinished;
1005
1006 // assume that there is no data needing converting
1007 // left in the converter
1008 outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
1009
1010 while (status == outconvertclass::unfinished)
1011 {
1012 outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
1013 if (len > 0) outstream->write(outbuf, len);
1014 }
1015
1016 return outconverter;
1017}
Note: See TracBrowser for help on using the repository browser.