Context Navigation

source: trunk/gsdl/lib/text_t.cpp@ 100

Last change on this file since 100 was 100, checked in by rjmcnab, 25 years ago
Added standard header to source files.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 12.1 KB

Line
1	/**********************************************************************
2	*
3	* text_t.cpp -- a simple 16-bit charater string class
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* PUT COPYRIGHT NOTICE HERE
7	*
8	* $Id: text_t.cpp 100 1999-01-08 02:33:16Z rjmcnab $
9	*
10	*********************************************************************/
11
12	/*
13	$Log$
14	Revision 1.3 1999/01/08 02:33:16 rjmcnab
15
16	Added standard header to source files.
17
18	*/
19
20	static char *RCSID = "$Id: text_t.cpp 100 1999-01-08 02:33:16Z rjmcnab $";
21
22
23	#include "text_t.h"
24
25	#ifndef USE_OBJECTSPACE
26	# include <algorithm>
27	#else
28	# include <ospace\std\algorithm>
29	#endif
30
31
32	#include "unitool.h"
33
34	////////////////////////////////////
35	// text_t methods
36	////////////////////////////////////
37
38	text_t::text_t ()
39	{
40	setencoding(0);
41	clear ();
42	}
43
44	text_t::text_t (int i)
45	{
46	setencoding(0);
47	clear ();
48	appendint (i);
49	}
50
51	text_t::text_t (char *s)
52	{
53	setencoding(0);
54	clear ();
55	appendcstr (s);
56	}
57
58	void text_t::append (const text_t &t)
59	{
60	const_iterator here, end=t.end();
61	for (here=t.begin(); here!=end;here++)
62	{
63	text.push_back(*here);
64	}
65	}
66
67	void text_t::appendrange (iterator first, iterator last)
68	{
69	while (first != last)
70	{
71	text.push_back (*first);
72	first++;
73	}
74	}
75
76	void text_t::appendrange (const_iterator first, const_iterator last)
77	{
78	while (first != last)
79	{
80	text.push_back (*first);
81	first++;
82	}
83	}
84
85	void text_t::appendint (int i)
86	{
87	// deal with zeros and negatives
88	if (i == 0)
89	{
90	text.push_back('0');
91	return;
92	}
93	else if (i < 0)
94	{
95	text.push_back('-');
96	i *= -1;
97	}
98
99	// get a buffer for the conversion
100	int maxbuflen = sizeof(int)*3;
101	char *buf = new char[maxbuflen];
102	int len = 0;
103
104	// get the number in reverse
105	while (i > 0)
106	{
107	buf[len++] = '0'+ (i%10);
108	i = i/10;
109	}
110
111	// reverse the number
112	while (len > 0)
113	{
114	text.push_back(buf[--len]);
115	}
116
117	delete buf;
118	}
119
120	int text_t::getint ()
121	{
122	int i = 0;
123	int mult = 1; // become -1 for negative numbers
124
125	iterator here = text.begin();
126	iterator end = text.end();
127
128	// do plus and minus signs
129	if (here != end)
130	{
131	if (*here == '-')
132	{
133	mult = -1;
134	here++;
135	}
136	else if (*here == '+')
137	{
138	mult = 1;
139	here++;
140	}
141	}
142
143	// deal with the number
144	while ((here != end) && (here >= '0') && (here <= '9'))
145	{
146	i = 10i + (here - '0');
147	here++;
148	}
149
150	i *= mult;
151	return i;
152	}
153
154
155
156	void text_t::appendcarr (char *s, size_type len)
157	{
158	unsigned char us = (unsigned char )s;
159	while (len > 0)
160	{
161	text.push_back (*us); // append this character
162	us++;
163	len--;
164	}
165	}
166
167	void text_t::appendcstr (char *s)
168	{
169	unsigned char us = (unsigned char )s;
170	while (*us != '\0')
171	{
172	text.push_back (*us); // append this character
173	us++;
174	}
175	}
176
177
178	// strings returned from getcarr and getcstr become the callers
179	// responsibility and should be deallocated with "delete"
180
181	char *text_t::getcarr(size_type &len) const
182	{
183	unsigned char *cstr = new unsigned char[size()];
184	const_iterator ithere = begin();
185	const_iterator itend = end();
186
187	while (ithere != itend)
188	{
189	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
190	else {
191	// put a space or a question mark depending on what
192	// the character is. Question marks tell the user that
193	// they are missing some information.
194	if (is_unicode_space (*ithere)) cstr[len] = ' ';
195	else cstr[len] = '?';
196	}
197	len++;
198	ithere++;
199	}
200
201	return (char *)cstr;
202	}
203
204	char *text_t::getcstr() const
205	{
206	unsigned char *cstr = new unsigned char[size() + 1];
207	const_iterator ithere = begin();
208	const_iterator itend = end();
209	int len = 0;
210
211	while (ithere != itend)
212	{
213	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
214	else {
215	// put a space or a question mark depending on what
216	// the character is. Question marks tell the user that
217	// they are missing some information.
218	if (is_unicode_space (*ithere)) cstr[len] = ' ';
219	else cstr[len] = '?';
220	}
221	len++;
222	ithere++;
223	}
224
225	cstr[len] = '\0';
226
227	return (char *)cstr;
228	}
229
230
231	// general functions which work on text_ts
232
233	// find a character within a range
234	text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
235	unsigned short c)
236	{
237	while (first != last)
238	{
239	if (*first == c) break;
240	first++;
241	}
242	return first;
243	}
244
245	text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
246	unsigned short c)
247	{
248	while (first != last)
249	{
250	if (*first == c) break;
251	first++;
252	}
253	return first;
254	}
255
256	// get a string up to the next delimiter (which is skipped)
257	text_t::const_iterator getdelimitstr (text_t::const_iterator first,
258	text_t::const_iterator last,
259	unsigned short c, text_t &outstr)
260	{
261	text_t::const_iterator here = first;
262	here = findchar (first, last, c);
263	outstr.clear();
264	outstr.appendrange (first, here);
265	if (here != last) here++; // skip c
266	return here;
267	}
268
269	text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
270	unsigned short c, text_t &outstr)
271	{
272	text_t::iterator here = first;
273	here = findchar (first, last, c);
274	outstr.clear();
275	outstr.appendrange (first, here);
276	if (here != last) here++; // skip c
277	return here;
278	}
279
280	// split a string with a character
281	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
282	unsigned short c, text_tlist &outlist)
283	{
284	outlist.erase(outlist.begin(), outlist.end());
285
286	text_t t;
287
288	while (first != last)
289	{
290	first = getdelimitstr (first, last, c, t);
291	outlist.push_back (t);
292	}
293	}
294
295	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
296	unsigned short c, text_tarray &outlist)
297	{
298	outlist.erase(outlist.begin(), outlist.end());
299
300	text_t t;
301
302	while (first != last)
303	{
304	first = getdelimitstr (first, last, c, t);
305	outlist.push_back (t);
306	}
307	}
308
309	// join a string using a character
310	void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
311	{
312	outtext.clear ();
313
314	text_tlist::const_iterator here = inlist.begin ();
315	text_tlist::const_iterator end = inlist.end ();
316	bool first = true;
317	while (here != end)
318	{
319	if (!first) outtext.push_back (c);
320	first = false;
321	outtext += *here;
322	here++;
323	}
324	}
325
326	void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
327	{
328	outtext.clear ();
329
330	text_tarray::const_iterator here = inlist.begin ();
331	text_tarray::const_iterator end = inlist.end ();
332	bool first = true;
333	while (here != end)
334	{
335	if (!first) outtext.push_back (c);
336	first = false;
337	outtext += *here;
338	here++;
339	}
340	}
341
342	// count the occurances of a character within a range
343	int countchar (text_t::const_iterator first, text_t::const_iterator last,
344	unsigned short c)
345	{
346	int count = 0;
347	while (first != last) {
348	if (*first == c) count ++;
349	first ++;
350	}
351	return count;
352	}
353
354
355
356	////////////////////////////////////
357	// convertclass methods
358	////////////////////////////////////
359
360	// conversion classes used for getting information in to and out of
361	// the text_t class.
362
363	convertclass::convertclass ()
364	{
365	// nothing to do
366	}
367
368	void convertclass::reset ()
369	{
370	// nothing to do
371	}
372
373
374	////////////////////////////////////
375	// inconvertclass methods
376	////////////////////////////////////
377
378	// convert from a char stream to the text_t class
379	// the default version assumes the input is a ascii
380	// character array
381
382	inconvertclass::inconvertclass ()
383	{
384	start = NULL;
385	len = 0;
386	}
387
388
389	void inconvertclass::reset ()
390	{
391	start = NULL;
392	len = 0;
393	}
394
395	void inconvertclass::setinput (char *thestart, size_t thelen)
396	{
397	start = thestart;
398	len = thelen;
399	}
400
401	void inconvertclass::convert (text_t &output, status_t &status)
402	{
403	output.clear();
404
405	if (start == NULL \|\| len == 0)
406	{
407	status = finished;
408	return;
409	}
410
411	// don't want any funny sign conversions happening
412	unsigned char here = (unsigned char )start;
413	while (len > 0)
414	{
415	output.push_back (*here); // append this character
416	++here;
417	--len;
418	}
419
420	start = (char *)here; // save current position
421	status = finished;
422	}
423
424	// will treat the text_t as a 8-bit string and convert
425	// it to a 16-bit string using the about convert method.
426	text_t inconvertclass::convert (const text_t &t) {
427	text_t out;
428	text_t tmpout;
429	status_t status;
430	text_t::const_iterator here = t.begin();
431	text_t::const_iterator end = t.end();
432	unsigned char cbuf[256];
433	size_t cbuflen = 0;
434
435	while (here != end) {
436	while (here != end && cbuflen < 256) {
437	cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
438	here++;
439	}
440
441	if (cbuflen > 0) {
442	setinput ((char *)cbuf, cbuflen);
443	status = unfinished;
444	while (status == unfinished) {
445	convert (tmpout, status);
446	out += tmpout;
447	}
448	cbuflen = 0;
449	}
450	}
451
452	out.setencoding (0); // unicode
453
454	return out;
455	}
456
457	// an instance of the default inconvertclass to do simple
458	// conversions. Note that any functions that use this are
459	// not reentrant. If a function needs to be reentrant it
460	// should declare its own instance.
461	inconvertclass ascii2text_t;
462
463
464	////////////////////////////////////
465	// outconvertclass methods
466	////////////////////////////////////
467
468	// Convert from a text_t class to a char stream
469	// This default version assumes the output is a ascii
470	// character array. If you set the output stream you
471	// can use this class to output to a stream using the
472	// << operator. The << operator can also be conveniently
473	// used to set the output stream by doing something like
474	//
475	// cout << text_t2ascii << text_tstr << anothertext_tstr;
476	//
477	outconvertclass::outconvertclass ()
478	{
479	input = NULL;
480	outs = NULL;
481	}
482
483	void outconvertclass::reset ()
484	{
485	input = NULL;
486	outs = NULL;
487	}
488
489	void outconvertclass::setinput (text_t *theinput)
490	{
491	input = theinput;
492	if (input != NULL) texthere = input->begin();
493	}
494
495	void outconvertclass::convert (char *output, size_t maxlen,
496	size_t &len, status_t &status)
497	{
498	if (input == NULL \|\| output == NULL)
499	{
500	status = finished;
501	return;
502	}
503
504	// don't want any funny sign conversions happening
505	unsigned char uoutput = (unsigned char )output;
506	text_t::iterator textend = input->end();
507	len = 0;
508	while ((len < maxlen) && (texthere != textend))
509	{
510	if (texthere < 256) uoutput = (unsigned char)(*texthere);
511	else {
512	// put a space or a question mark depending on what
513	// the character is. Question marks tell the user that
514	// they are missing some information.
515	if (is_unicode_space (texthere)) uoutput = ' ';
516	else *uoutput = '?';
517	}
518	++uoutput;
519	++len;
520	++texthere;
521	}
522
523	if (texthere == textend) status = finished;
524	else status = unfinished;
525	}
526
527	// will convert the 16-bit string to a 8-bit stream
528	// and place the result in a text_t. This method uses
529	// the above convert function.
530	text_t outconvertclass::convert (const text_t &t) {
531	text_t out;
532	unsigned char cbuf[256];
533	size_t cbuflen = 0;
534	status_t status = unfinished;
535
536	setinput ((text_t *)&t); // discard constant
537	while (status == unfinished) {
538	convert ((char *)cbuf, 256, cbuflen, status);
539	out.appendcarr ((char *)cbuf, cbuflen);
540	}
541
542	out.setencoding (1); // other encoding
543
544	return out;
545	}
546
547
548	void outconvertclass::setostream (ostream *theouts)
549	{
550	outs = theouts;
551	}
552
553	ostream *outconvertclass::getostream ()
554	{
555	return outs;
556	}
557
558
559
560
561	// an instance of the default outconvertclass to do simple
562	// conversions
563	outconvertclass text_t2ascii;
564
565
566
567	// stream operators for the output class
568
569	outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
570	{
571	outconverter.setostream(&theouts);
572	return outconverter;
573	}
574
575
576	#define STREAMBUFSIZE 256
577	outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
578	{
579	ostream *outstream = outconverter.getostream();
580
581	if (outstream == NULL) return outconverter;
582
583	char outbuf[STREAMBUFSIZE];
584	size_t len;
585	outconvertclass::status_t status = outconvertclass::unfinished;
586
587	// assume that there is no data needing converting
588	// left in the converter
589	outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
590
591	while (status == outconvertclass::unfinished)
592	{
593	outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
594	if (len > 0) outstream->write(outbuf, len);
595	}
596
597	return outconverter;
598	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: