Context Navigation

source: main/trunk/greenstone2/common-src/src/lib/text_t.cpp@ 24112

Last change on this file since 24112 was 24112, checked in by ak19, 13 years ago
Sam fixed a vector iteration bug
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.9 KB

Rev	Line
[1076]	1	/**********************************************************************
	2	*
	3	* text_t.cpp -- a simple 16-bit character string class
	4	* Copyright (C) 1999 The New Zealand Digital Library Project
	5	*
	6	* A component of the Greenstone digital library software
	7	* from the New Zealand Digital Library Project at the
	8	* University of Waikato, New Zealand.
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License as published by
	12	* the Free Software Foundation; either version 2 of the License, or
	13	* (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	* GNU General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public License
	21	* along with this program; if not, write to the Free Software
	22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	*
[1860]	24	* $Id: text_t.cpp 24112 2011-06-03 02:12:23Z ak19 $
	25	*
[1076]	26	*********************************************************************/
	27
	28	#include "text_t.h"
	29
	30	#if defined(GSDL_USE_OBJECTSPACE)
	31	# include <ospace\std\algorithm>
	32	#elif defined(GSDL_USE_STL_H)
	33	# if defined(GSDL_USE_ALGO_H)
	34	# include <algo.h>
	35	# else
	36	# include <algorithm.h>
	37	# endif
	38	#else
	39	# include <algorithm>
	40	#endif
	41
[1860]	42	#ifdef HAVE_CONFIG_H
	43	# ifdef __WIN32__
[14909]	44	# include "win32cfg.h"
[1860]	45	# else
	46	# include "config.h"
	47	# endif
	48	#endif
[1076]	49
[18880]	50	#include <cstring>
[1860]	51
[1076]	52	#include "unitool.h"
[24110]	53	#include <iostream>
[1076]	54
[7382]	55	const text_t g_EmptyText("");
	56
[1076]	57	////////////////////////////////////
	58	// text_t methods
	59	////////////////////////////////////
	60
[1860]	61	// new stream converter ...
[8727]	62	ostream& operator<< (ostream &o, const text_t &text)
[1860]	63	{
	64	text_t::const_iterator ithere = text.begin();
	65	text_t::const_iterator itend = text.end();
	66
	67	while (ithere != itend)
	68	{
	69	if (*ithere < 256)
	70	{
	71	o << (unsigned char)(*ithere);
	72	}
	73	else
	74	{
	75	// put a space or a question mark depending on what
	76	// the character is. Question marks tell the user that
	77	// they are missing some information.
	78	if (is_unicode_space (*ithere))
	79	o << ' ';
	80	else
	81	o << '?';
	82	}
[8727]	83	++ithere;
[1860]	84	}
	85
	86	return o;
	87	}
	88
[1076]	89	text_t::text_t ()
	90	{
	91	setencoding(0);
	92	clear ();
	93	}
	94
	95	text_t::text_t (int i)
	96	{
	97	setencoding(0);
	98	clear ();
	99	appendint (i);
	100	}
	101
[8727]	102	text_t::text_t (const char *s)
[1076]	103	{
	104	setencoding(0);
	105	clear ();
	106	appendcstr (s);
	107	}
	108
[8727]	109	text_t::text_t (const char *s, size_type nLength)
	110	{
	111	setencoding(0);
	112	clear ();
	113	appendcarr(s, nLength);
	114	}
[1860]	115
[8727]	116
[1076]	117	void text_t::append (const text_t &t)
	118	{
	119	text.insert(text.end(), t.begin(), t.end());
	120	}
	121
	122	void text_t::appendrange (iterator first, iterator last)
	123	{
	124	text.insert(text.end(), first, last);
	125	}
	126
	127	void text_t::appendrange (const_iterator first, const_iterator last)
	128	{
	129	text.insert(text.end(), first, last);
	130	}
	131
	132	void text_t::appendint (int i)
	133	{
	134	// deal with zeros and negatives
	135	if (i == 0)
	136	{
	137	text.push_back('0');
	138	return;
	139	}
	140	else if (i < 0)
	141	{
	142	text.push_back('-');
	143	i *= -1;
	144	}
	145
	146	// get a buffer for the conversion
	147	int maxbuflen = sizeof(int)*3;
	148	char *buf = new char[maxbuflen];
	149	int len = 0;
	150
	151	// get the number in reverse
	152	while (i > 0)
	153	{
	154	buf[len++] = '0'+ (i%10);
	155	i = i/10;
	156	}
	157
	158	// reverse the number
	159	while (len > 0)
	160	{
	161	text.push_back(buf[--len]);
	162	}
	163
[8727]	164	delete []buf;
[1076]	165	}
	166
	167	int text_t::getint () const
	168	{
	169	int i = 0;
	170	int mult = 1; // become -1 for negative numbers
	171
	172	const_iterator here = text.begin();
	173	const_iterator end = text.end();
	174
	175	// do plus and minus signs
	176	if (here != end)
	177	{
	178	if (*here == '-')
	179	{
	180	mult = -1;
[9593]	181	++here;
[1076]	182	}
	183	else if (*here == '+')
	184	{
	185	mult = 1;
[8727]	186	++here;
[1076]	187	}
	188	}
	189
	190	// deal with the number
	191	while ((here != end) && (here >= '0') && (here <= '9'))
	192	{
	193	i = 10i + (here - '0');
[8727]	194	++here;
[1076]	195	}
	196
	197	i *= mult;
	198	return i;
	199	}
	200
[2487]	201	unsigned long text_t::getulong () const
	202	{
	203	unsigned long i = 0;
[1076]	204
[2487]	205	const_iterator here = text.begin();
	206	const_iterator end = text.end();
[1076]	207
[2487]	208	while ((here != end) && (here >= '0') && (here <= '9'))
	209	{
	210	i = 10i + (here - '0');
[8727]	211	++here;
[2487]	212	}
	213
	214	return i;
	215	}
	216
[8727]	217	void text_t::appendcarr (const char *s, size_type len)
[1076]	218	{
	219	unsigned char us = (unsigned char )s;
[8727]	220	if (text.capacity() < (text.size() + len + 2)) {
	221	text.reserve(text.size() + len + 2);
	222	}
	223
[1076]	224	while (len > 0)
	225	{
	226	text.push_back (*us); // append this character
[8727]	227	++us;
	228	--len;
[1076]	229	}
	230	}
	231
[8727]	232	void text_t::appendcstr (const char *s)
[1076]	233	{
[8727]	234	size_t len = strlen(s);
	235	if (text.capacity() < (text.size() + len + 2)) {
	236	text.reserve(text.size() + len + 2);
	237	}
	238
[1076]	239	unsigned char us = (unsigned char )s;
	240	while (*us != '\0')
	241	{
	242	text.push_back (*us); // append this character
[8727]	243	++us;
[1076]	244	}
	245	}
	246
	247
	248	// strings returned from getcarr and getcstr become the callers
[8727]	249	// responsibility and should be deallocated with "delete []"
[1076]	250
	251	char *text_t::getcarr(size_type &len) const
	252	{
	253	unsigned char *cstr = new unsigned char[size()];
	254	len = 0;
	255
	256	const_iterator ithere = begin();
	257	const_iterator itend = end();
	258	while (ithere != itend)
	259	{
	260	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
	261	else {
	262	// put a space or a question mark depending on what
	263	// the character is. Question marks tell the user that
	264	// they are missing some information.
	265	if (is_unicode_space (*ithere)) cstr[len] = ' ';
	266	else cstr[len] = '?';
	267	}
[8727]	268	++len;
	269	++ithere;
[1076]	270	}
	271
	272	return (char *)cstr;
	273	}
	274
	275	char *text_t::getcstr() const
	276	{
	277	unsigned char *cstr = new unsigned char[size() + 1];
	278	const_iterator ithere = begin();
	279	const_iterator itend = end();
	280	int len = 0;
	281
	282	while (ithere != itend)
	283	{
	284	if (ithere < 256) cstr[len] = (unsigned char)(ithere);
	285	else {
	286	// put a space or a question mark depending on what
	287	// the character is. Question marks tell the user that
	288	// they are missing some information.
	289	if (is_unicode_space (*ithere)) cstr[len] = ' ';
	290	else cstr[len] = '?';
	291	}
[8727]	292	++len;
	293	++ithere;
[1076]	294	}
	295
	296	cstr[len] = '\0';
	297
	298	return (char *)cstr;
	299	}
	300
	301
[14342]	302	int text_t::replace(text_t toreplace, text_t replacement)
	303	{
	304	// Get the beginning and end of the current text
	305	text_t::iterator text_begin = text.begin(), text_end = text.end();
	306	int count = 0;
	307	text_t new_text, temp_text;
	308
	309	// Loop through and grab the text off the end
	310	while (text_begin < text_end)
	311	{
	312	// Find where the next toreplace is
	313	text_t::iterator next_toreplace = findword(text_begin, text_end, toreplace);
	314
[15077]	315	// We've found a match
	316	if (next_toreplace != text_end)
[14342]	317	{
[15077]	318	new_text.append(substr(text_begin, next_toreplace));
	319	new_text.append(replacement);
	320	count++;
	321	text_begin = next_toreplace + toreplace.size();
[14342]	322	}
[15077]	323	// We haven't found a match
[14342]	324	else
	325	{
[15077]	326	new_text.append(substr(text_begin, text_end));
	327	text_begin = text_end;
[14342]	328	}
	329	}
	330
	331	text.clear();
	332	text = new_text.text_as_usvector();
	333	return count;
	334	}
	335
	336
[1076]	337	// general functions which work on text_ts
	338
	339	// find a character within a range
	340	text_t::const_iterator findchar (text_t::const_iterator first, text_t::const_iterator last,
	341	unsigned short c)
	342	{
	343	while (first != last)
	344	{
	345	if (*first == c) break;
[8727]	346	++first;
[1076]	347	}
	348	return first;
	349	}
	350
	351	text_t::iterator findchar (text_t::iterator first, text_t::iterator last,
	352	unsigned short c)
	353	{
	354	while (first != last)
	355	{
	356	if (*first == c) break;
[8727]	357	++first;
[1076]	358	}
	359	return first;
	360	}
	361
[18700]	362	text_t::iterator findlastchar (text_t::iterator first, text_t::iterator last_plus_one,
[12504]	363	unsigned short c)
	364	{
[18821]	365	text_t::iterator current = (last_plus_one != first) ? last_plus_one - 1 : first;
[12504]	366	while (current != first) {
	367	if (*current == c) break;
	368	--current;
	369	}
	370	if (current == first) {
	371	if (*current == c) return current;
[18700]	372	return last_plus_one;
[12504]	373	}
	374
	375	return current;
	376	}
	377
[16066]	378	text_t::const_iterator findword (text_t::const_iterator first,
	379	text_t::const_iterator last,
	380	const text_t& word)
	381	{
	382	text_t::const_iterator word_begin = word.begin();
	383	text_t::const_iterator word_end = word.end();
	384
	385	while (first != last)
	386	{
	387	text_t::const_iterator char_match = first;
	388	text_t::const_iterator word_here = word_begin;
[20762]	389	while (word_here != word_end && char_match != last)
[16066]	390	{
	391	if (char_match != word_here)
	392	{
	393	break;
	394	}
	395	++char_match;
	396	++word_here;
	397	}
	398	if (word_here==word_end)
	399	{
	400	return first;
	401	}
	402	++first;
	403	}
	404	return last; // get to here only if there is no match
	405	}
	406
[8727]	407	text_t::iterator findword (text_t::iterator first,
	408	text_t::iterator last,
[1860]	409	const text_t& word)
	410	{
	411	text_t::const_iterator word_begin = word.begin();
	412	text_t::const_iterator word_end = word.end();
	413
	414	while (first != last)
	415	{
	416	text_t::iterator char_match = first;
	417	text_t::const_iterator word_here = word_begin;
[20762]	418	while (word_here != word_end && char_match != last)
[1860]	419	{
	420	if (char_match != word_here)
	421	{
	422	break;
	423	}
[8727]	424	++char_match;
	425	++word_here;
[1860]	426	}
	427	if (word_here==word_end)
	428	{
	429	return first;
	430	}
[8727]	431	++first;
[1860]	432	}
	433	return last; // get to here only if there is no match
	434	}
	435
[1076]	436	// get a string up to the next delimiter (which is skipped)
	437	text_t::const_iterator getdelimitstr (text_t::const_iterator first,
	438	text_t::const_iterator last,
	439	unsigned short c, text_t &outstr)
	440	{
	441	text_t::const_iterator here = first;
	442	here = findchar (first, last, c);
	443	outstr.clear();
	444	outstr.appendrange (first, here);
[8727]	445	if (here != last) ++here; // skip c
[1076]	446	return here;
	447	}
	448
	449	text_t::iterator getdelimitstr (text_t::iterator first, text_t::iterator last,
	450	unsigned short c, text_t &outstr)
	451	{
	452	text_t::iterator here = first;
	453	here = findchar (first, last, c);
	454	outstr.clear();
	455	outstr.appendrange (first, here);
[8727]	456	if (here != last) ++here; // skip c
[1076]	457	return here;
	458	}
	459
[16066]	460	text_t::const_iterator getdelimitstr (text_t::const_iterator first, text_t::const_iterator last,
	461	text_t w, text_t &outstr)
	462	{
	463	text_t::const_iterator here = first;
	464	here = findword (first, last, w);
	465	outstr.clear();
	466	outstr.appendrange (first, here);
	467	if (here != last) here += w.size(); // skip w
	468	return here;
	469	}
	470
[1076]	471	// split a string with a character
	472	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	473	unsigned short c, text_tset &outlist)
	474	{
	475	outlist.erase(outlist.begin(), outlist.end());
	476
	477	text_t t;
	478
	479	while (first != last)
	480	{
	481	first = getdelimitstr (first, last, c, t);
	482	outlist.insert (t);
	483	}
	484	}
	485
	486	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	487	unsigned short c, text_tlist &outlist)
	488	{
	489	outlist.erase(outlist.begin(), outlist.end());
	490
	491	text_t t;
	492
	493	while (first != last)
	494	{
	495	first = getdelimitstr (first, last, c, t);
	496	outlist.push_back (t);
	497	}
	498	}
	499
	500	void splitchar (text_t::const_iterator first, text_t::const_iterator last,
	501	unsigned short c, text_tarray &outlist)
	502	{
	503	outlist.erase(outlist.begin(), outlist.end());
	504
	505	text_t t;
	506
	507	while (first != last)
	508	{
	509	first = getdelimitstr (first, last, c, t);
	510	outlist.push_back (t);
	511	}
	512	}
	513
[16066]	514	void splitword (text_t::const_iterator first, text_t::const_iterator last,
	515	text_t w, text_tlist &outlist)
	516	{
	517	outlist.erase(outlist.begin(), outlist.end());
	518
	519	text_t t;
	520
	521	while (first != last)
	522	{
	523	first = getdelimitstr (first, last, w, t);
	524	outlist.push_back (t);
	525	}
	526	}
	527
[1076]	528	// join a string using a character
	529	void joinchar (const text_tset &inlist, unsigned short c, text_t &outtext)
	530	{
	531	outtext.clear ();
	532
	533	text_tset::const_iterator here = inlist.begin ();
	534	text_tset::const_iterator end = inlist.end ();
[8727]	535
	536	if (here != end) {
	537	outtext += *here; ++here;
	538	while (here != end) {
	539	outtext.push_back (c);
[1076]	540	outtext += *here;
[8727]	541	++here;
[1076]	542	}
[8727]	543	}
[1076]	544	}
	545
	546	void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext)
	547	{
	548	outtext.clear ();
	549
	550	text_tlist::const_iterator here = inlist.begin ();
	551	text_tlist::const_iterator end = inlist.end ();
[8727]	552	if (here != end) {
	553	outtext += *here; ++here;
	554	while (here != end) {
	555	outtext.push_back (c);
[1076]	556	outtext += *here;
[8727]	557	++here;
[1076]	558	}
[8727]	559	}
[1076]	560	}
	561
	562	void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext)
	563	{
	564	outtext.clear ();
	565
	566	text_tarray::const_iterator here = inlist.begin ();
	567	text_tarray::const_iterator end = inlist.end ();
[8727]	568	if (here != end) {
	569	outtext += *here; ++here;
	570	while (here != end) {
	571	outtext.push_back (c);
[1076]	572	outtext += *here;
[8727]	573	++here;
[1076]	574	}
[8727]	575	}
[1076]	576	}
	577
[8727]	578	void joinchar (const text_tlist &inlist, const text_t &c, text_t &outtext)
[1088]	579	{
	580	outtext.clear ();
	581
	582	text_tlist::const_iterator here = inlist.begin ();
	583	text_tlist::const_iterator end = inlist.end ();
[8727]	584	if (here != end) {
	585	outtext += *here; ++here;
	586	while (here != end) {
	587	outtext += c;
[1088]	588	outtext += *here;
[8727]	589	++here;
[1088]	590	}
[8727]	591	}
[1088]	592	}
	593
[8727]	594	void joinchar (const text_tset &inlist, const text_t &c, text_t &outtext)
[1088]	595	{
	596	outtext.clear ();
	597
	598	text_tset::const_iterator here = inlist.begin ();
	599	text_tset::const_iterator end = inlist.end ();
[8727]	600	if (here != end) {
	601	outtext += *here; ++here;
	602	while (here != end) {
	603	outtext += c;
[1088]	604	outtext += *here;
[8727]	605	++here;
[1088]	606	}
[8727]	607	}
[1088]	608	}
	609
[8727]	610	void joinchar (const text_tarray &inlist, const text_t &c, text_t &outtext)
[1076]	611	{
	612	outtext.clear ();
	613
	614	text_tarray::const_iterator here = inlist.begin ();
	615	text_tarray::const_iterator end = inlist.end ();
[8727]	616	if (here != end) {
	617	outtext += *here; ++here;
	618	while (here != end) {
	619	outtext += c;
[1076]	620	outtext += *here;
[8727]	621	++here;
[1076]	622	}
[8727]	623	}
[1076]	624	}
	625
	626	// count the occurances of a character within a range
	627	int countchar (text_t::const_iterator first, text_t::const_iterator last,
	628	unsigned short c)
	629	{
	630	int count = 0;
	631	while (first != last) {
[9593]	632	if (*first == c) ++count;
	633	++first;
[1076]	634	}
	635	return count;
	636	}
	637
	638	// return a substring of string from first up to but not including last
	639	text_t substr (text_t::const_iterator first, text_t::const_iterator last) {
	640
[8727]	641	text_t substr; substr.reserve(last - first + 2);
[1076]	642	while (first != last) {
	643	substr.push_back(*first);
[8727]	644	++first;
[1076]	645	}
	646	return substr;
	647	}
	648
	649
	650	// convert to lowercase
	651	void lc (text_t::iterator first, text_t::iterator last) {
	652	while (first != last) {
	653	first = unicode_tolower(first);
[8727]	654	++first;
[1076]	655	}
	656	}
	657
	658	// convert to uppercase
	659	void uc (text_t::iterator first, text_t::iterator last) {
	660	while (first != last) {
	661	first = unicode_toupper(first);
[8727]	662	++first;
[1076]	663	}
	664	}
	665
	666
	667	// checks to see if it is a number (i.e. contains only 0-9)
	668	bool is_number (const text_t &text) {
	669
	670	text_t::const_iterator here = text.begin();
	671	text_t::const_iterator end = text.end();
	672
	673	while (here != end) {
	674	if ((here!='0') && (here!='1') && (*here!='2') &&
	675	(here!='3') && (here!='4') && (*here!='5') &&
	676	(here!='6') && (here!='7') && (*here!='8') &&
	677	(*here!='9')) return false;
[8727]	678	++here;
[1076]	679	}
	680	return true;
	681	}
	682
	683
	684	// checks to see if the text has any letters or digits
	685	bool has_unicode_letdig (const text_t &text) {
	686	if (text.empty()) return false;
	687
	688	text_t::const_iterator here = text.begin();
	689	text_t::const_iterator end = text.end();
	690	while (here != end) {
	691	if (is_unicode_letdig (*here)) return true;
[8727]	692	++here;
[1076]	693	}
	694
	695	return false;
	696	}
	697
[10140]	698	// checks to see if a text_t starts with the specified prefix
	699	bool starts_with(const text_t& text, const text_t& prefix) {
	700	if (prefix.empty()) return true;
	701	if (text.empty() \|\| text.size()<prefix.size()) return false;
	702	text_t substring = substr(text.begin(), text.begin()+prefix.size());
	703	return substring == prefix;
	704	}
	705	// checks to see if a text_t ends with the specified suffix
	706	bool ends_with(const text_t& text, const text_t& suffix) {
	707	if (suffix.empty()) return true;
	708	if (text.empty() \|\| text.size() < suffix.size()) return false;
	709	text_t substring = substr(text.end()-suffix.size(),text.end());
	710	return substring == suffix;
[1076]	711
[10140]	712	}
[1076]	713
[24110]	714	//Trims the whitespace off the beginning and end of a given string
	715	text_t trim (const text_t& text) {
[10140]	716
[24110]	717	if(text.size() == 0) {
	718	return text;
	719	}
	720
	721	text_t::const_iterator firstLetter = text.begin();
	722	text_t::const_iterator lastLetter = text.end();
	723
	724	//Find the start
	725	while (firstLetter != lastLetter) {
	726	if(!is_unicode_space(*firstLetter)) {
	727	break;
	728	}
	729	firstLetter++;
	730	}
	731
	732	//Find the end
[24112]	733	lastLetter-=1;
[24110]	734	while (lastLetter != firstLetter) {
	735	if(!is_unicode_space(*lastLetter)) {
	736	break;
	737	}
	738	lastLetter--;
	739	}
	740
	741	return substr(firstLetter, lastLetter+1);
	742	}
	743
[1076]	744	////////////////////////////////////
	745	// convertclass methods
	746	////////////////////////////////////
	747
	748	// conversion classes used for getting information in to and out of
	749	// the text_t class.
	750
	751	convertclass::convertclass ()
	752	{
	753	// nothing to do
	754	}
	755
[22141]	756	convertclass::~convertclass ()
	757	{
	758	// nothing to do
	759	}
	760
[1076]	761	void convertclass::reset ()
	762	{
	763	// nothing to do
	764	}
	765
	766
	767	////////////////////////////////////
	768	// inconvertclass methods
	769	////////////////////////////////////
	770
	771	// convert from a char stream to the text_t class
	772	// the default version assumes the input is a ascii
	773	// character array
	774
	775	inconvertclass::inconvertclass ()
	776	{
	777	start = NULL;
	778	len = 0;
	779	}
	780
[22141]	781	inconvertclass::~inconvertclass ()
	782	{
	783	// nothing to do
	784	}
[1076]	785
[22141]	786
[1076]	787	void inconvertclass::reset ()
	788	{
	789	start = NULL;
	790	len = 0;
	791	}
	792
	793	void inconvertclass::setinput (char *thestart, size_t thelen)
	794	{
	795	start = thestart;
	796	len = thelen;
	797	}
	798
	799	void inconvertclass::convert (text_t &output, status_t &status)
	800	{
	801	output.clear();
	802
	803	if (start == NULL \|\| len == 0)
	804	{
	805	status = finished;
	806	return;
	807	}
	808
[8727]	809	if (output.capacity() < len + 2)
	810	output.reserve(len + 2);
	811
[1076]	812	// don't want any funny sign conversions happening
	813	unsigned char here = (unsigned char )start;
	814	while (len > 0)
	815	{
	816	output.push_back (*here); // append this character
	817	++here;
	818	--len;
	819	}
	820
	821	start = (char *)here; // save current position
	822	status = finished;
	823	}
	824
	825	// will treat the text_t as a 8-bit string and convert
	826	// it to a 16-bit string using the about convert method.
	827	text_t inconvertclass::convert (const text_t &t) {
	828	text_t out;
	829	text_t tmpout;
	830	status_t status;
	831	text_t::const_iterator here = t.begin();
	832	text_t::const_iterator end = t.end();
	833	unsigned char cbuf[256];
	834	size_t cbuflen = 0;
	835
[8727]	836	out.clear();
	837	if (out.capacity() < t.size() + 2)
	838	out.reserve(t.size() + 2);
[1076]	839	while (here != end) {
	840	while (here != end && cbuflen < 256) {
	841	cbuf[cbuflen++] = (unsigned char)(*here & 0xff);
[8727]	842	++here;
[1076]	843	}
	844
	845	if (cbuflen > 0) {
	846	setinput ((char *)cbuf, cbuflen);
	847	status = unfinished;
	848	while (status == unfinished) {
	849	convert (tmpout, status);
	850	out += tmpout;
	851	}
	852	cbuflen = 0;
	853	}
	854	}
	855
	856	out.setencoding (0); // unicode
	857
	858	return out;
	859	}
	860
	861	// an instance of the default inconvertclass to do simple
	862	// conversions. Note that any functions that use this are
	863	// not reentrant. If a function needs to be reentrant it
	864	// should declare its own instance.
	865	inconvertclass ascii2text_t;
	866
	867
	868	////////////////////////////////////
	869	// outconvertclass methods
	870	////////////////////////////////////
	871
	872	// Convert from a text_t class to a char stream
	873	// This default version assumes the output is a ascii
	874	// character array. If you set the output stream you
	875	// can use this class to output to a stream using the
	876	// << operator. The << operator can also be conveniently
	877	// used to set the output stream by doing something like
	878	//
	879	// cout << text_t2ascii << text_tstr << anothertext_tstr;
	880	//
	881	outconvertclass::outconvertclass ()
	882	{
	883	input = NULL;
	884	outs = NULL;
	885	}
	886
[22141]	887	outconvertclass::~outconvertclass ()
	888	{
	889	// nothing to do
	890	}
	891
	892
[1076]	893	void outconvertclass::reset ()
	894	{
	895	input = NULL;
	896	outs = NULL;
	897	}
	898
	899	void outconvertclass::setinput (text_t *theinput)
	900	{
	901	input = theinput;
	902	if (input != NULL) texthere = input->begin();
	903	}
	904
[8727]	905	void outconvertclass::setdata(text_t *theinput, text_t::iterator thetexthere)
	906	{
	907	input = theinput;
	908	texthere = thetexthere;
	909	}
	910
[1076]	911	void outconvertclass::convert (char *output, size_t maxlen,
	912	size_t &len, status_t &status)
	913	{
	914	if (input == NULL \|\| output == NULL)
	915	{
	916	status = finished;
	917	return;
	918	}
	919
	920	// don't want any funny sign conversions happening
	921	unsigned char uoutput = (unsigned char )output;
	922	text_t::iterator textend = input->end();
	923	len = 0;
	924	while ((len < maxlen) && (texthere != textend))
	925	{
	926	if (texthere < 256) uoutput = (unsigned char)(*texthere);
	927	else {
	928	// put a space or a question mark depending on what
	929	// the character is. Question marks tell the user that
	930	// they are missing some information.
	931	if (is_unicode_space (texthere)) uoutput = ' ';
	932	else *uoutput = '?';
	933	}
	934	++uoutput;
	935	++len;
	936	++texthere;
	937	}
	938
	939	if (texthere == textend) status = finished;
	940	else status = unfinished;
	941	}
	942
	943	// will convert the 16-bit string to a 8-bit stream
	944	// and place the result in a text_t. This method uses
	945	// the above convert function.
	946	text_t outconvertclass::convert (const text_t &t) {
	947	text_t out;
	948	unsigned char cbuf[256];
	949	size_t cbuflen = 0;
	950	status_t status = unfinished;
[8727]	951
	952	out.clear();
	953	if (out.capacity() < t.size() + 2)
	954	out.reserve(t.size() + 2);
[1076]	955	setinput ((text_t *)&t); // discard constant
	956	while (status == unfinished) {
	957	convert ((char *)cbuf, 256, cbuflen, status);
	958	out.appendcarr ((char *)cbuf, cbuflen);
	959	}
	960
	961	out.setencoding (1); // other encoding
	962
	963	return out;
	964	}
	965
	966
	967	void outconvertclass::setostream (ostream *theouts)
	968	{
	969	outs = theouts;
	970	}
	971
	972	ostream *outconvertclass::getostream ()
	973	{
	974	return outs;
	975	}
	976
	977
	978
	979
	980	// an instance of the default outconvertclass to do simple
	981	// conversions
	982	outconvertclass text_t2ascii;
	983
	984
	985
	986	// stream operators for the output class
	987
	988	outconvertclass &operator<< (ostream &theouts, outconvertclass &outconverter)
	989	{
	990	outconverter.setostream(&theouts);
	991	return outconverter;
	992	}
	993
	994
	995	#define STREAMBUFSIZE 256
	996	outconvertclass &operator<< (outconvertclass &outconverter, const text_t &t)
	997	{
	998	ostream *outstream = outconverter.getostream();
	999
	1000	if (outstream == NULL) return outconverter;
	1001
	1002	char outbuf[STREAMBUFSIZE];
	1003	size_t len;
	1004	outconvertclass::status_t status = outconvertclass::unfinished;
	1005
	1006	// assume that there is no data needing converting
	1007	// left in the converter
	1008	outconverter.setinput ((text_t *)(&t)); // note the const -> nonconst conversion
	1009
	1010	while (status == outconvertclass::unfinished)
	1011	{
	1012	outconverter.convert (outbuf, STREAMBUFSIZE, len, status);
	1013	if (len > 0) outstream->write(outbuf, len);
	1014	}
	1015
	1016	return outconverter;
	1017	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: