Context Navigation

source: trunk/gsdl/lib/text_t.cpp@ 114

Last change on this file since 114 was 114, checked in by rjmcnab, 25 years ago
Made the source more portable.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 12.3 KB

Line
1	/**********************************************************************
2	*
3	* text_t.cpp -- a simple 16-bit charater string class
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* PUT COPYRIGHT NOTICE HERE
7	*
8	* $Id: text_t.cpp 114 1999-01-19 01:38:20Z rjmcnab $
9	*
10	*********************************************************************/
11
12	/*
13	$Log$
14	Revision 1.5 1999/01/19 01:38:14 rjmcnab
15
16	Made the source more portable.
17
18	Revision 1.4 1999/01/12 01:51:00 rjmcnab
19
20	Standard header.
21
22	Revision 1.3 1999/01/08 02:33:16 rjmcnab
23
24	Added standard header to source files.
25
26	*/
27
28
29	#include "text_t.h"
30
31	#if defined(GSDL_USE_OBJECTSPACE)
32	# include <ospace\std\algorithm>
33	#elif defined(GSDL_USE_STL_H)
34	# if defined(GSDL_USE_ALGO_H)
35	# include <algo.h>
36	# else
37	# include <algorithm.h>
38	# endif
39	#else
40	# include <algorithm>
41	#endif
42
43
44	#include "unitool.h"
45
46	////////////////////////////////////
47	// text_t methods
48	////////////////////////////////////
49
50	text_t::text_t ()
51	{
52	setencoding(0);
53	clear ();
54	}
55
56	text_t::text_t (int i)
57	{
58	setencoding(0);
59	clear ();
60	appendint (i);
61	}
62
63	text_t::text_t (char *s)
64	{
65	setencoding(0);
66	clear ();
67	appendcstr (s);
68	}
69
70	void text_t::append (const text_t &t)
71	{
72	const_iterator here, end=t.end();
73	for (here=t.begin(); here!=end;here++)
74	{
75	text.push_back(*here);
76	}
77	}
78
79	void text_t::appendrange (iterator first, iterator last)
80	{
81	while (first != last)
82	{
83	text.push_back (*first);
84	first++;
85	}
86	}
87
88	void text_t::appendrange (const_iterator first, const_iterator last)
89	{
90	while (first != last)
91	{
92	text.push_back (*first);
93	first++;
94	}
95	}
96
97	void text_t::appendint (int i)
98	{
99	// deal with zeros and negatives
100	if (i == 0)
101	{
102	text.push_back('0');
103	return;
104	}
105	else if (i < 0)
106	{
107	text.push_back('-');
108	i *= -1;
109	}
110
111	// get a buffer for the conversion
112	int maxbuflen = sizeof(int)*3;
113	char *buf = new char[maxbuflen];
114	int len = 0;
115
116	// get the number in reverse
117	while (i > 0)
118	{
119	buf[len++] = '0'+ (i%10);
120	i = i/10;
121	}
122
123	// reverse the number
124	while (len > 0)
125	{
126	text.push_back(buf[--len]);
127	}
128
129	delete buf;
130	}
131
132	int text_t::getint ()
133	{
134	int i = 0;
135	int mult = 1; // become -1 for negative numbers
136
137	iterator here = text.begin();
138	iterator end = text.end();
139
140	// do plus and minus signs
141	if (here != end)
142	{
143	if (*here == '-')
144	{
145	mult = -1;
146	here++;
147	}
148	else if (*here == '+')
149	{
150	mult = 1;
151	here++;
152	}
153	}
154
155	// deal with the number
156	while ((here != end) && (here >= '0') && (here <= '9'))
157	{
158	i = 10i + (here - '0');
159	here++;
160	}
161
162	i *= mult;
163	return i;
164	}
165
166
167
168	void text_t::appendcarr (char *s, size_type len)
169	{
170	unsigned char us = (unsigned char )s;
171	while (len > 0)
172	{
173	text.push_back (*us); // append this character
174	us++;
175	len--;
176	}
177	}
178
179	void text_t::appendcstr (char *s)
180	{
181	unsigned char us = (unsigned char )s;
182	while (*us != '\0')
183	{
184	text.push_back (*us); // append this character
185	us++;
186	}
187	}
188
189
190	// strings returned from getcarr and getcstr become the callers
191	// responsibility and should be deallocated with "delete"
192
193	char *text_t::getcarr(size_type &len) const
194	{
195	unsigned char *cstr = new unsigned char[size()];
196	const_iterator ithere = begin();
197	const_iterator itend = end();
198
199	while (ithere != itend)
200	{
201	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
202	else {
203	// put a space or a question mark depending on what
204	// the character is. Question marks tell the user that
205	// they are missing some information.
206	if (is_unicode_space (*ithere)) cstr[len] = ' ';
207	else cstr[len] = '?';
208	}
209	len++;
210	ithere++;
211	}
212
213	return (char *)cstr;
214	}
215
216	char *text_t::getcstr() const
217	{
218	unsigned char *cstr = new unsigned char[size() + 1];
219	const_iterator ithere = begin();
220	const_iterator itend = end();
221	int len = 0;
222
223	while (ithere != itend)
224	{
225	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
226	else {
227	// put a space or a question mark depending on what
228	// the character is. Question marks tell the user that
229	// they are missing some information.
230	if (is_unicode_space (*ithere)) cstr[len] = ' ';
231	else cstr[len] = '?';
232	}
233	len++;
234	ithere++;
235	}
236
237	cstr[len] = '\0';
238
239	return (char *)cstr;
240	}
241
242
243	// general functions which work on text_ts
244
245	// find a character within a range
246	text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
247	unsigned short c)
248	{
249	while (first != last)
250	{
251	if (*first == c) break;
252	first++;
253	}
254	return first;
255	}
256
257	text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
258	unsigned short c)
259	{
260	while (first != last)
261	{
262	if (*first == c) break;
263	first++;
264	}
265	return first;
266	}
267
268	// get a string up to the next delimiter (which is skipped)
269	text_t::const_iterator getdelimitstr (text_t::const_iterator first,
270	text_t::const_iterator last,
271	unsigned short c, text_t &outstr)
272	{
273	text_t::const_iterator here = first;
274	here = findchar (first, last, c);
275	outstr.clear();
276	outstr.appendrange (first, here);
277	if (here != last) here++; // skip c
278	return here;
279	}
280
281	text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
282	unsigned short c, text_t &outstr)
283	{
284	text_t::iterator here = first;
285	here = findchar (first, last, c);
286	outstr.clear();
287	outstr.appendrange (first, here);
288	if (here != last) here++; // skip c
289	return here;
290	}
291
292	// split a string with a character
293	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
294	unsigned short c, text_tlist &outlist)
295	{
296	outlist.erase(outlist.begin(), outlist.end());
297
298	text_t t;
299
300	while (first != last)
301	{
302	first = getdelimitstr (first, last, c, t);
303	outlist.push_back (t);
304	}
305	}
306
307	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
308	unsigned short c, text_tarray &outlist)
309	{
310	outlist.erase(outlist.begin(), outlist.end());
311
312	text_t t;
313
314	while (first != last)
315	{
316	first = getdelimitstr (first, last, c, t);
317	outlist.push_back (t);
318	}
319	}
320
321	// join a string using a character
322	void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
323	{
324	outtext.clear ();
325
326	text_tlist::const_iterator here = inlist.begin ();
327	text_tlist::const_iterator end = inlist.end ();
328	bool first = true;
329	while (here != end)
330	{
331	if (!first) outtext.push_back (c);
332	first = false;
333	outtext += *here;
334	here++;
335	}
336	}
337
338	void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
339	{
340	outtext.clear ();
341
342	text_tarray::const_iterator here = inlist.begin ();
343	text_tarray::const_iterator end = inlist.end ();
344	bool first = true;
345	while (here != end)
346	{
347	if (!first) outtext.push_back (c);
348	first = false;
349	outtext += *here;
350	here++;
351	}
352	}
353
354	// count the occurances of a character within a range
355	int countchar (text_t::const_iterator first, text_t::const_iterator last,
356	unsigned short c)
357	{
358	int count = 0;
359	while (first != last) {
360	if (*first == c) count ++;
361	first ++;
362	}
363	return count;
364	}
365
366
367
368	////////////////////////////////////
369	// convertclass methods
370	////////////////////////////////////
371
372	// conversion classes used for getting information in to and out of
373	// the text_t class.
374
375	convertclass::convertclass ()
376	{
377	// nothing to do
378	}
379
380	void convertclass::reset ()
381	{
382	// nothing to do
383	}
384
385
386	////////////////////////////////////
387	// inconvertclass methods
388	////////////////////////////////////
389
390	// convert from a char stream to the text_t class
391	// the default version assumes the input is a ascii
392	// character array
393
394	inconvertclass::inconvertclass ()
395	{
396	start = NULL;
397	len = 0;
398	}
399
400
401	void inconvertclass::reset ()
402	{
403	start = NULL;
404	len = 0;
405	}
406
407	void inconvertclass::setinput (char *thestart, size_t thelen)
408	{
409	start = thestart;
410	len = thelen;
411	}
412
413	void inconvertclass::convert (text_t &output, status_t &status)
414	{
415	output.clear();
416
417	if (start == NULL \|\| len == 0)
418	{
419	status = finished;
420	return;
421	}
422
423	// don't want any funny sign conversions happening
424	unsigned char here = (unsigned char )start;
425	while (len > 0)
426	{
427	output.push_back (*here); // append this character
428	++here;
429	--len;
430	}
431
432	start = (char *)here; // save current position
433	status = finished;
434	}
435
436	// will treat the text_t as a 8-bit string and convert
437	// it to a 16-bit string using the about convert method.
438	text_t inconvertclass::convert (const text_t &t) {
439	text_t out;
440	text_t tmpout;
441	status_t status;
442	text_t::const_iterator here = t.begin();
443	text_t::const_iterator end = t.end();
444	unsigned char cbuf[256];
445	size_t cbuflen = 0;
446
447	while (here != end) {
448	while (here != end && cbuflen < 256) {
449	cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
450	here++;
451	}
452
453	if (cbuflen > 0) {
454	setinput ((char *)cbuf, cbuflen);
455	status = unfinished;
456	while (status == unfinished) {
457	convert (tmpout, status);
458	out += tmpout;
459	}
460	cbuflen = 0;
461	}
462	}
463
464	out.setencoding (0); // unicode
465
466	return out;
467	}
468
469	// an instance of the default inconvertclass to do simple
470	// conversions. Note that any functions that use this are
471	// not reentrant. If a function needs to be reentrant it
472	// should declare its own instance.
473	inconvertclass ascii2text_t;
474
475
476	////////////////////////////////////
477	// outconvertclass methods
478	////////////////////////////////////
479
480	// Convert from a text_t class to a char stream
481	// This default version assumes the output is a ascii
482	// character array. If you set the output stream you
483	// can use this class to output to a stream using the
484	// << operator. The << operator can also be conveniently
485	// used to set the output stream by doing something like
486	//
487	// cout << text_t2ascii << text_tstr << anothertext_tstr;
488	//
489	outconvertclass::outconvertclass ()
490	{
491	input = NULL;
492	outs = NULL;
493	}
494
495	void outconvertclass::reset ()
496	{
497	input = NULL;
498	outs = NULL;
499	}
500
501	void outconvertclass::setinput (text_t *theinput)
502	{
503	input = theinput;
504	if (input != NULL) texthere = input->begin();
505	}
506
507	void outconvertclass::convert (char *output, size_t maxlen,
508	size_t &len, status_t &status)
509	{
510	if (input == NULL \|\| output == NULL)
511	{
512	status = finished;
513	return;
514	}
515
516	// don't want any funny sign conversions happening
517	unsigned char uoutput = (unsigned char )output;
518	text_t::iterator textend = input->end();
519	len = 0;
520	while ((len < maxlen) && (texthere != textend))
521	{
522	if (texthere < 256) uoutput = (unsigned char)(*texthere);
523	else {
524	// put a space or a question mark depending on what
525	// the character is. Question marks tell the user that
526	// they are missing some information.
527	if (is_unicode_space (texthere)) uoutput = ' ';
528	else *uoutput = '?';
529	}
530	++uoutput;
531	++len;
532	++texthere;
533	}
534
535	if (texthere == textend) status = finished;
536	else status = unfinished;
537	}
538
539	// will convert the 16-bit string to a 8-bit stream
540	// and place the result in a text_t. This method uses
541	// the above convert function.
542	text_t outconvertclass::convert (const text_t &t) {
543	text_t out;
544	unsigned char cbuf[256];
545	size_t cbuflen = 0;
546	status_t status = unfinished;
547
548	setinput ((text_t *)&t); // discard constant
549	while (status == unfinished) {
550	convert ((char *)cbuf, 256, cbuflen, status);
551	out.appendcarr ((char *)cbuf, cbuflen);
552	}
553
554	out.setencoding (1); // other encoding
555
556	return out;
557	}
558
559
560	void outconvertclass::setostream (ostream *theouts)
561	{
562	outs = theouts;
563	}
564
565	ostream *outconvertclass::getostream ()
566	{
567	return outs;
568	}
569
570
571
572
573	// an instance of the default outconvertclass to do simple
574	// conversions
575	outconvertclass text_t2ascii;
576
577
578
579	// stream operators for the output class
580
581	outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
582	{
583	outconverter.setostream(&theouts);
584	return outconverter;
585	}
586
587
588	#define STREAMBUFSIZE 256
589	outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
590	{
591	ostream *outstream = outconverter.getostream();
592
593	if (outstream == NULL) return outconverter;
594
595	char outbuf[STREAMBUFSIZE];
596	size_t len;
597	outconvertclass::status_t status = outconvertclass::unfinished;
598
599	// assume that there is no data needing converting
600	// left in the converter
601	outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
602
603	while (status == outconvertclass::unfinished)
604	{
605	outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
606	if (len > 0) outstream->write(outbuf, len);
607	}
608
609	return outconverter;
610	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: