Context Navigation

source: gsdl/trunk/common-src/src/lib/text_t.cpp@ 20762

Last change on this file since 20762 was 20762, checked in by mdewsnip, 15 years ago
Fixed memory bugs (causing crashes on some Windows machines) in findword(), identified by Valgrind.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.2 KB

Rev	Line
[1076]	1	/**********************************************************************
	2	*
	3	* text_t.cpp -- a simple 16-bit character string class
	4	* Copyright (C) 1999 The New Zealand Digital Library Project
	5	*
	6	* A component of the Greenstone digital library software
	7	* from the New Zealand Digital Library Project at the
	8	* University of Waikato, New Zealand.
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License as published by
	12	* the Free Software Foundation; either version 2 of the License, or
	13	* (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	* GNU General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public License
	21	* along with this program; if not, write to the Free Software
	22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	*
[1860]	24	* $Id: text_t.cpp 20762 2009-09-29 22:54:21Z mdewsnip $
	25	*
[1076]	26	*********************************************************************/
	27
	28	#include "text_t.h"
	29
	30	#if defined(GSDL_USE_OBJECTSPACE)
	31	# include <ospace\std\algorithm>
	32	#elif defined(GSDL_USE_STL_H)
	33	# if defined(GSDL_USE_ALGO_H)
	34	# include <algo.h>
	35	# else
	36	# include <algorithm.h>
	37	# endif
	38	#else
	39	# include <algorithm>
	40	#endif
	41
[1860]	42	#ifdef HAVE_CONFIG_H
	43	# ifdef __WIN32__
[14909]	44	# include "win32cfg.h"
[1860]	45	# else
	46	# include "config.h"
	47	# endif
	48	#endif
[1076]	49
[18880]	50	#include <cstring>
[1860]	51
[1076]	52	#include "unitool.h"
	53
[7382]	54	const text_t g_EmptyText("");
	55
[1076]	56	////////////////////////////////////
	57	// text_t methods
	58	////////////////////////////////////
	59
[1860]	60	// new stream converter ...
[8727]	61	ostream& operator<< (ostream &o, const text_t &text)
[1860]	62	{
	63	text_t::const_iterator ithere = text.begin();
	64	text_t::const_iterator itend = text.end();
	65
	66	while (ithere != itend)
	67	{
	68	if (*ithere < 256)
	69	{
	70	o << (unsigned char)(*ithere);
	71	}
	72	else
	73	{
	74	// put a space or a question mark depending on what
	75	// the character is. Question marks tell the user that
	76	// they are missing some information.
	77	if (is_unicode_space (*ithere))
	78	o << ' ';
	79	else
	80	o << '?';
	81	}
[8727]	82	++ithere;
[1860]	83	}
	84
	85	return o;
	86	}
	87
[1076]	88	text_t::text_t ()
	89	{
	90	setencoding(0);
	91	clear ();
	92	}
	93
	94	text_t::text_t (int i)
	95	{
	96	setencoding(0);
	97	clear ();
	98	appendint (i);
	99	}
	100
[8727]	101	text_t::text_t (const char *s)
[1076]	102	{
	103	setencoding(0);
	104	clear ();
	105	appendcstr (s);
	106	}
	107
[8727]	108	text_t::text_t (const char *s, size_type nLength)
	109	{
	110	setencoding(0);
	111	clear ();
	112	appendcarr(s, nLength);
	113	}
[1860]	114
[8727]	115
[1076]	116	void text_t::append (const text_t &t)
	117	{
	118	text.insert(text.end(), t.begin(), t.end());
	119	}
	120
	121	void text_t::appendrange (iterator first, iterator last)
	122	{
	123	text.insert(text.end(), first, last);
	124	}
	125
	126	void text_t::appendrange (const_iterator first, const_iterator last)
	127	{
	128	text.insert(text.end(), first, last);
	129	}
	130
	131	void text_t::appendint (int i)
	132	{
	133	// deal with zeros and negatives
	134	if (i == 0)
	135	{
	136	text.push_back('0');
	137	return;
	138	}
	139	else if (i < 0)
	140	{
	141	text.push_back('-');
	142	i *= -1;
	143	}
	144
	145	// get a buffer for the conversion
	146	int maxbuflen = sizeof(int)*3;
	147	char *buf = new char[maxbuflen];
	148	int len = 0;
	149
	150	// get the number in reverse
	151	while (i > 0)
	152	{
	153	buf[len++] = '0'+ (i%10);
	154	i = i/10;
	155	}
	156
	157	// reverse the number
	158	while (len > 0)
	159	{
	160	text.push_back(buf[--len]);
	161	}
	162
[8727]	163	delete []buf;
[1076]	164	}
	165
	166	int text_t::getint () const
	167	{
	168	int i = 0;
	169	int mult = 1; // become -1 for negative numbers
	170
	171	const_iterator here = text.begin();
	172	const_iterator end = text.end();
	173
	174	// do plus and minus signs
	175	if (here != end)
	176	{
	177	if (*here == '-')
	178	{
	179	mult = -1;
[9593]	180	++here;
[1076]	181	}
	182	else if (*here == '+')
	183	{
	184	mult = 1;
[8727]	185	++here;
[1076]	186	}
	187	}
	188
	189	// deal with the number
	190	while ((here != end) && (here >= '0') && (here <= '9'))
	191	{
	192	i = 10i + (here - '0');
[8727]	193	++here;
[1076]	194	}
	195
	196	i *= mult;
	197	return i;
	198	}
	199
[2487]	200	unsigned long text_t::getulong () const
	201	{
	202	unsigned long i = 0;
[1076]	203
[2487]	204	const_iterator here = text.begin();
	205	const_iterator end = text.end();
[1076]	206
[2487]	207	while ((here != end) && (here >= '0') && (here <= '9'))
	208	{
	209	i = 10i + (here - '0');
[8727]	210	++here;
[2487]	211	}
	212
	213	return i;
	214	}
	215
[8727]	216	void text_t::appendcarr (const char *s, size_type len)
[1076]	217	{
	218	unsigned char us = (unsigned char )s;
[8727]	219	if (text.capacity() < (text.size() + len + 2)) {
	220	text.reserve(text.size() + len + 2);
	221	}
	222
[1076]	223	while (len > 0)
	224	{
	225	text.push_back (*us); // append this character
[8727]	226	++us;
	227	--len;
[1076]	228	}
	229	}
	230
[8727]	231	void text_t::appendcstr (const char *s)
[1076]	232	{
[8727]	233	size_t len = strlen(s);
	234	if (text.capacity() < (text.size() + len + 2)) {
	235	text.reserve(text.size() + len + 2);
	236	}
	237
[1076]	238	unsigned char us = (unsigned char )s;
	239	while (*us != '\0')
	240	{
	241	text.push_back (*us); // append this character
[8727]	242	++us;
[1076]	243	}
	244	}
	245
	246
	247	// strings returned from getcarr and getcstr become the callers
[8727]	248	// responsibility and should be deallocated with "delete []"
[1076]	249
	250	char *text_t::getcarr(size_type &len) const
	251	{
	252	unsigned char *cstr = new unsigned char[size()];
	253	len = 0;
	254
	255	const_iterator ithere = begin();
	256	const_iterator itend = end();
	257	while (ithere != itend)
	258	{
	259	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
	260	else {
	261	// put a space or a question mark depending on what
	262	// the character is. Question marks tell the user that
	263	// they are missing some information.
	264	if (is_unicode_space (*ithere)) cstr[len] = ' ';
	265	else cstr[len] = '?';
	266	}
[8727]	267	++len;
	268	++ithere;
[1076]	269	}
	270
	271	return (char *)cstr;
	272	}
	273
	274	char *text_t::getcstr() const
	275	{
	276	unsigned char *cstr = new unsigned char[size() + 1];
	277	const_iterator ithere = begin();
	278	const_iterator itend = end();
	279	int len = 0;
	280
	281	while (ithere != itend)
	282	{
	283	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
	284	else {
	285	// put a space or a question mark depending on what
	286	// the character is. Question marks tell the user that
	287	// they are missing some information.
	288	if (is_unicode_space (*ithere)) cstr[len] = ' ';
	289	else cstr[len] = '?';
	290	}
[8727]	291	++len;
	292	++ithere;
[1076]	293	}
	294
	295	cstr[len] = '\0';
	296
	297	return (char *)cstr;
	298	}
	299
	300
[14342]	301	int text_t::replace(text_t toreplace, text_t replacement)
	302	{
	303	// Get the beginning and end of the current text
	304	text_t::iterator text_begin = text.begin(), text_end = text.end();
	305	int count = 0;
	306	text_t new_text, temp_text;
	307
	308	// Loop through and grab the text off the end
	309	while (text_begin < text_end)
	310	{
	311	// Find where the next toreplace is
	312	text_t::iterator next_toreplace = findword(text_begin, text_end, toreplace);
	313
[15077]	314	// We've found a match
	315	if (next_toreplace != text_end)
[14342]	316	{
[15077]	317	new_text.append(substr(text_begin, next_toreplace));
	318	new_text.append(replacement);
	319	count++;
	320	text_begin = next_toreplace + toreplace.size();
[14342]	321	}
[15077]	322	// We haven't found a match
[14342]	323	else
	324	{
[15077]	325	new_text.append(substr(text_begin, text_end));
	326	text_begin = text_end;
[14342]	327	}
	328	}
	329
	330	text.clear();
	331	text = new_text.text_as_usvector();
	332	return count;
	333	}
	334
	335
[1076]	336	// general functions which work on text_ts
	337
	338	// find a character within a range
	339	text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
	340	unsigned short c)
	341	{
	342	while (first != last)
	343	{
	344	if (*first == c) break;
[8727]	345	++first;
[1076]	346	}
	347	return first;
	348	}
	349
	350	text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
	351	unsigned short c)
	352	{
	353	while (first != last)
	354	{
	355	if (*first == c) break;
[8727]	356	++first;
[1076]	357	}
	358	return first;
	359	}
	360
[18700]	361	text_t::iterator findlastchar (text_t::iterator first, text_t::iterator last_plus_one,
[12504]	362	unsigned short c)
	363	{
[18821]	364	text_t::iterator current = (last_plus_one != first) ? last_plus_one - 1 : first;
[12504]	365	while (current != first) {
	366	if (*current == c) break;
	367	--current;
	368	}
	369	if (current == first) {
	370	if (*current == c) return current;
[18700]	371	return last_plus_one;
[12504]	372	}
	373
	374	return current;
	375	}
	376
[16066]	377	text_t::const_iterator findword (text_t::const_iterator first,
	378	text_t::const_iterator last,
	379	const text_t& word)
	380	{
	381	text_t::const_iterator word_begin = word.begin();
	382	text_t::const_iterator word_end = word.end();
	383
	384	while (first != last)
	385	{
	386	text_t::const_iterator char_match = first;
	387	text_t::const_iterator word_here = word_begin;
[20762]	388	while (word_here != word_end && char_match != last)
[16066]	389	{
	390	if (char_match != word_here)
	391	{
	392	break;
	393	}
	394	++char_match;
	395	++word_here;
	396	}
	397	if (word_here==word_end)
	398	{
	399	return first;
	400	}
	401	++first;
	402	}
	403	return last; // get to here only if there is no match
	404	}
	405
[8727]	406	text_t::iterator findword (text_t::iterator first,
	407	text_t::iterator last,
[1860]	408	const text_t& word)
	409	{
	410	text_t::const_iterator word_begin = word.begin();
	411	text_t::const_iterator word_end = word.end();
	412
	413	while (first != last)
	414	{
	415	text_t::iterator char_match = first;
	416	text_t::const_iterator word_here = word_begin;
[20762]	417	while (word_here != word_end && char_match != last)
[1860]	418	{
	419	if (char_match != word_here)
	420	{
	421	break;
	422	}
[8727]	423	++char_match;
	424	++word_here;
[1860]	425	}
	426	if (word_here==word_end)
	427	{
	428	return first;
	429	}
[8727]	430	++first;
[1860]	431	}
	432	return last; // get to here only if there is no match
	433	}
	434
[1076]	435	// get a string up to the next delimiter (which is skipped)
	436	text_t::const_iterator getdelimitstr (text_t::const_iterator first,
	437	text_t::const_iterator last,
	438	unsigned short c, text_t &outstr)
	439	{
	440	text_t::const_iterator here = first;
	441	here = findchar (first, last, c);
	442	outstr.clear();
	443	outstr.appendrange (first, here);
[8727]	444	if (here != last) ++here; // skip c
[1076]	445	return here;
	446	}
	447
	448	text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
	449	unsigned short c, text_t &outstr)
	450	{
	451	text_t::iterator here = first;
	452	here = findchar (first, last, c);
	453	outstr.clear();
	454	outstr.appendrange (first, here);
[8727]	455	if (here != last) ++here; // skip c
[1076]	456	return here;
	457	}
	458
[16066]	459	text_t::const_iterator getdelimitstr (text_t::const_iterator first, text_t::const_iterator last,
	460	text_t w, text_t &outstr)
	461	{
	462	text_t::const_iterator here = first;
	463	here = findword (first, last, w);
	464	outstr.clear();
	465	outstr.appendrange (first, here);
	466	if (here != last) here += w.size(); // skip w
	467	return here;
	468	}
	469
[1076]	470	// split a string with a character
	471	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	472	unsigned short c, text_tset &outlist)
	473	{
	474	outlist.erase(outlist.begin(), outlist.end());
	475
	476	text_t t;
	477
	478	while (first != last)
	479	{
	480	first = getdelimitstr (first, last, c, t);
	481	outlist.insert (t);
	482	}
	483	}
	484
	485	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	486	unsigned short c, text_tlist &outlist)
	487	{
	488	outlist.erase(outlist.begin(), outlist.end());
	489
	490	text_t t;
	491
	492	while (first != last)
	493	{
	494	first = getdelimitstr (first, last, c, t);
	495	outlist.push_back (t);
	496	}
	497	}
	498
	499	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	500	unsigned short c, text_tarray &outlist)
	501	{
	502	outlist.erase(outlist.begin(), outlist.end());
	503
	504	text_t t;
	505
	506	while (first != last)
	507	{
	508	first = getdelimitstr (first, last, c, t);
	509	outlist.push_back (t);
	510	}
	511	}
	512
[16066]	513	void splitword (text_t::const_iterator first, text_t::const_iterator last,
	514	text_t w, text_tlist &outlist)
	515	{
	516	outlist.erase(outlist.begin(), outlist.end());
	517
	518	text_t t;
	519
	520	while (first != last)
	521	{
	522	first = getdelimitstr (first, last, w, t);
	523	outlist.push_back (t);
	524	}
	525	}
	526
[1076]	527	// join a string using a character
	528	void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext)
	529	{
	530	outtext.clear ();
	531
	532	text_tset::const_iterator here = inlist.begin ();
	533	text_tset::const_iterator end = inlist.end ();
[8727]	534
	535	if (here != end) {
	536	outtext += *here; ++here;
	537	while (here != end) {
	538	outtext.push_back (c);
[1076]	539	outtext += *here;
[8727]	540	++here;
[1076]	541	}
[8727]	542	}
[1076]	543	}
	544
	545	void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
	546	{
	547	outtext.clear ();
	548
	549	text_tlist::const_iterator here = inlist.begin ();
	550	text_tlist::const_iterator end = inlist.end ();
[8727]	551	if (here != end) {
	552	outtext += *here; ++here;
	553	while (here != end) {
	554	outtext.push_back (c);
[1076]	555	outtext += *here;
[8727]	556	++here;
[1076]	557	}
[8727]	558	}
[1076]	559	}
	560
	561	void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
	562	{
	563	outtext.clear ();
	564
	565	text_tarray::const_iterator here = inlist.begin ();
	566	text_tarray::const_iterator end = inlist.end ();
[8727]	567	if (here != end) {
	568	outtext += *here; ++here;
	569	while (here != end) {
	570	outtext.push_back (c);
[1076]	571	outtext += *here;
[8727]	572	++here;
[1076]	573	}
[8727]	574	}
[1076]	575	}
	576
[8727]	577	void joinchar (const text_tlist &inlist, const text_t &c, text_t &outtext)
[1088]	578	{
	579	outtext.clear ();
	580
	581	text_tlist::const_iterator here = inlist.begin ();
	582	text_tlist::const_iterator end = inlist.end ();
[8727]	583	if (here != end) {
	584	outtext += *here; ++here;
	585	while (here != end) {
	586	outtext += c;
[1088]	587	outtext += *here;
[8727]	588	++here;
[1088]	589	}
[8727]	590	}
[1088]	591	}
	592
[8727]	593	void joinchar (const text_tset &inlist, const text_t &c, text_t &outtext)
[1088]	594	{
	595	outtext.clear ();
	596
	597	text_tset::const_iterator here = inlist.begin ();
	598	text_tset::const_iterator end = inlist.end ();
[8727]	599	if (here != end) {
	600	outtext += *here; ++here;
	601	while (here != end) {
	602	outtext += c;
[1088]	603	outtext += *here;
[8727]	604	++here;
[1088]	605	}
[8727]	606	}
[1088]	607	}
	608
[8727]	609	void joinchar (const text_tarray &inlist, const text_t &c, text_t &outtext)
[1076]	610	{
	611	outtext.clear ();
	612
	613	text_tarray::const_iterator here = inlist.begin ();
	614	text_tarray::const_iterator end = inlist.end ();
[8727]	615	if (here != end) {
	616	outtext += *here; ++here;
	617	while (here != end) {
	618	outtext += c;
[1076]	619	outtext += *here;
[8727]	620	++here;
[1076]	621	}
[8727]	622	}
[1076]	623	}
	624
	625	// count the occurances of a character within a range
	626	int countchar (text_t::const_iterator first, text_t::const_iterator last,
	627	unsigned short c)
	628	{
	629	int count = 0;
	630	while (first != last) {
[9593]	631	if (*first == c) ++count;
	632	++first;
[1076]	633	}
	634	return count;
	635	}
	636
	637	// return a substring of string from first up to but not including last
	638	text_t substr (text_t::const_iterator first, text_t::const_iterator last) {
	639
[8727]	640	text_t substr; substr.reserve(last - first + 2);
[1076]	641	while (first != last) {
	642	substr.push_back(*first);
[8727]	643	++first;
[1076]	644	}
	645	return substr;
	646	}
	647
	648
	649	// convert to lowercase
	650	void lc (text_t::iterator first, text_t::iterator last) {
	651	while (first != last) {
	652	first = unicode_tolower(first);
[8727]	653	++first;
[1076]	654	}
	655	}
	656
	657	// convert to uppercase
	658	void uc (text_t::iterator first, text_t::iterator last) {
	659	while (first != last) {
	660	first = unicode_toupper(first);
[8727]	661	++first;
[1076]	662	}
	663	}
	664
	665
	666	// checks to see if it is a number (i.e. contains only 0-9)
	667	bool is_number (const text_t &text) {
	668
	669	text_t::const_iterator here = text.begin();
	670	text_t::const_iterator end = text.end();
	671
	672	while (here != end) {
	673	if ((here!='0') && (here!='1') && (*here!='2') &&
	674	(here!='3') && (here!='4') && (*here!='5') &&
	675	(here!='6') && (here!='7') && (*here!='8') &&
	676	(*here!='9')) return false;
[8727]	677	++here;
[1076]	678	}
	679	return true;
	680	}
	681
	682
	683	// checks to see if the text has any letters or digits
	684	bool has_unicode_letdig (const text_t &text) {
	685	if (text.empty()) return false;
	686
	687	text_t::const_iterator here = text.begin();
	688	text_t::const_iterator end = text.end();
	689	while (here != end) {
	690	if (is_unicode_letdig (*here)) return true;
[8727]	691	++here;
[1076]	692	}
	693
	694	return false;
	695	}
	696
[10140]	697	// checks to see if a text_t starts with the specified prefix
	698	bool starts_with(const text_t& text, const text_t& prefix) {
	699	if (prefix.empty()) return true;
	700	if (text.empty() \|\| text.size()<prefix.size()) return false;
	701	text_t substring = substr(text.begin(), text.begin()+prefix.size());
	702	return substring == prefix;
	703	}
	704	// checks to see if a text_t ends with the specified suffix
	705	bool ends_with(const text_t& text, const text_t& suffix) {
	706	if (suffix.empty()) return true;
	707	if (text.empty() \|\| text.size() < suffix.size()) return false;
	708	text_t substring = substr(text.end()-suffix.size(),text.end());
	709	return substring == suffix;
[1076]	710
[10140]	711	}
[1076]	712
[10140]	713
[1076]	714	////////////////////////////////////
	715	// convertclass methods
	716	////////////////////////////////////
	717
	718	// conversion classes used for getting information in to and out of
	719	// the text_t class.
	720
	721	convertclass::convertclass ()
	722	{
	723	// nothing to do
	724	}
	725
	726	void convertclass::reset ()
	727	{
	728	// nothing to do
	729	}
	730
	731
	732	////////////////////////////////////
	733	// inconvertclass methods
	734	////////////////////////////////////
	735
	736	// convert from a char stream to the text_t class
	737	// the default version assumes the input is a ascii
	738	// character array
	739
	740	inconvertclass::inconvertclass ()
	741	{
	742	start = NULL;
	743	len = 0;
	744	}
	745
	746
	747	void inconvertclass::reset ()
	748	{
	749	start = NULL;
	750	len = 0;
	751	}
	752
	753	void inconvertclass::setinput (char *thestart, size_t thelen)
	754	{
	755	start = thestart;
	756	len = thelen;
	757	}
	758
	759	void inconvertclass::convert (text_t &output, status_t &status)
	760	{
	761	output.clear();
	762
	763	if (start == NULL \|\| len == 0)
	764	{
	765	status = finished;
	766	return;
	767	}
	768
[8727]	769	if (output.capacity() < len + 2)
	770	output.reserve(len + 2);
	771
[1076]	772	// don't want any funny sign conversions happening
	773	unsigned char here = (unsigned char )start;
	774	while (len > 0)
	775	{
	776	output.push_back (*here); // append this character
	777	++here;
	778	--len;
	779	}
	780
	781	start = (char *)here; // save current position
	782	status = finished;
	783	}
	784
	785	// will treat the text_t as a 8-bit string and convert
	786	// it to a 16-bit string using the about convert method.
	787	text_t inconvertclass::convert (const text_t &t) {
	788	text_t out;
	789	text_t tmpout;
	790	status_t status;
	791	text_t::const_iterator here = t.begin();
	792	text_t::const_iterator end = t.end();
	793	unsigned char cbuf[256];
	794	size_t cbuflen = 0;
	795
[8727]	796	out.clear();
	797	if (out.capacity() < t.size() + 2)
	798	out.reserve(t.size() + 2);
[1076]	799	while (here != end) {
	800	while (here != end && cbuflen < 256) {
	801	cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
[8727]	802	++here;
[1076]	803	}
	804
	805	if (cbuflen > 0) {
	806	setinput ((char *)cbuf, cbuflen);
	807	status = unfinished;
	808	while (status == unfinished) {
	809	convert (tmpout, status);
	810	out += tmpout;
	811	}
	812	cbuflen = 0;
	813	}
	814	}
	815
	816	out.setencoding (0); // unicode
	817
	818	return out;
	819	}
	820
	821	// an instance of the default inconvertclass to do simple
	822	// conversions. Note that any functions that use this are
	823	// not reentrant. If a function needs to be reentrant it
	824	// should declare its own instance.
	825	inconvertclass ascii2text_t;
	826
	827
	828	////////////////////////////////////
	829	// outconvertclass methods
	830	////////////////////////////////////
	831
	832	// Convert from a text_t class to a char stream
	833	// This default version assumes the output is a ascii
	834	// character array. If you set the output stream you
	835	// can use this class to output to a stream using the
	836	// << operator. The << operator can also be conveniently
	837	// used to set the output stream by doing something like
	838	//
	839	// cout << text_t2ascii << text_tstr << anothertext_tstr;
	840	//
	841	outconvertclass::outconvertclass ()
	842	{
	843	input = NULL;
	844	outs = NULL;
	845	}
	846
	847	void outconvertclass::reset ()
	848	{
	849	input = NULL;
	850	outs = NULL;
	851	}
	852
	853	void outconvertclass::setinput (text_t *theinput)
	854	{
	855	input = theinput;
	856	if (input != NULL) texthere = input->begin();
	857	}
	858
[8727]	859	void outconvertclass::setdata(text_t *theinput, text_t::iterator thetexthere)
	860	{
	861	input = theinput;
	862	texthere = thetexthere;
	863	}
	864
[1076]	865	void outconvertclass::convert (char *output, size_t maxlen,
	866	size_t &len, status_t &status)
	867	{
	868	if (input == NULL \|\| output == NULL)
	869	{
	870	status = finished;
	871	return;
	872	}
	873
	874	// don't want any funny sign conversions happening
	875	unsigned char uoutput = (unsigned char )output;
	876	text_t::iterator textend = input->end();
	877	len = 0;
	878	while ((len < maxlen) && (texthere != textend))
	879	{
	880	if (texthere < 256) uoutput = (unsigned char)(*texthere);
	881	else {
	882	// put a space or a question mark depending on what
	883	// the character is. Question marks tell the user that
	884	// they are missing some information.
	885	if (is_unicode_space (texthere)) uoutput = ' ';
	886	else *uoutput = '?';
	887	}
	888	++uoutput;
	889	++len;
	890	++texthere;
	891	}
	892
	893	if (texthere == textend) status = finished;
	894	else status = unfinished;
	895	}
	896
	897	// will convert the 16-bit string to a 8-bit stream
	898	// and place the result in a text_t. This method uses
	899	// the above convert function.
	900	text_t outconvertclass::convert (const text_t &t) {
	901	text_t out;
	902	unsigned char cbuf[256];
	903	size_t cbuflen = 0;
	904	status_t status = unfinished;
[8727]	905
	906	out.clear();
	907	if (out.capacity() < t.size() + 2)
	908	out.reserve(t.size() + 2);
[1076]	909	setinput ((text_t *)&t); // discard constant
	910	while (status == unfinished) {
	911	convert ((char *)cbuf, 256, cbuflen, status);
	912	out.appendcarr ((char *)cbuf, cbuflen);
	913	}
	914
	915	out.setencoding (1); // other encoding
	916
	917	return out;
	918	}
	919
	920
	921	void outconvertclass::setostream (ostream *theouts)
	922	{
	923	outs = theouts;
	924	}
	925
	926	ostream *outconvertclass::getostream ()
	927	{
	928	return outs;
	929	}
	930
	931
	932
	933
	934	// an instance of the default outconvertclass to do simple
	935	// conversions
	936	outconvertclass text_t2ascii;
	937
	938
	939
	940	// stream operators for the output class
	941
	942	outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
	943	{
	944	outconverter.setostream(&theouts);
	945	return outconverter;
	946	}
	947
	948
	949	#define STREAMBUFSIZE 256
	950	outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
	951	{
	952	ostream *outstream = outconverter.getostream();
	953
	954	if (outstream == NULL) return outconverter;
	955
	956	char outbuf[STREAMBUFSIZE];
	957	size_t len;
	958	outconvertclass::status_t status = outconvertclass::unfinished;
	959
	960	// assume that there is no data needing converting
	961	// left in the converter
	962	outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
	963
	964	while (status == outconvertclass::unfinished)
	965	{
	966	outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
	967	if (len > 0) outstream->write(outbuf, len);
	968	}
	969
	970	return outconverter;
	971	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: