Context Navigation

source: gsdl/tags/gsdl-2_75-distribution/lib/cfgread.cpp@ 18510

Last change on this file since 18510 was 9515, checked in by mdewsnip, 19 years ago
Fixed a fairly obscure bug where isspace was returning true on characters that weren't actually spaces, thus messing up some Unicode values when reading from main.cfg/collect.cfg files on Windows only. Seems that it was an issue with signed/unsigned chars, because casting the isspace argument to a unsigned char fixed the problem.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 6.1 KB

Line
1	/**********************************************************************
2	*
3	* cfgread.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cfgread.h"
27	#include <cctype> // for isspace()
28
29	int write_ini_line (ofstream &fileout, const text_t &key, const text_t &value) {
30	if (key.empty() \|\| value.empty()) return -1;
31	outconvertclass text_t2ascii;
32	fileout << text_t2ascii << key << "=" << value << "\n";
33	return 0;
34	}
35
36	int read_ini_line (ifstream &filein, text_t &key, text_t &value) {
37	if (filein.eof()) return -1;
38
39	key.clear();
40	value.clear();
41	char c;
42	filein.get(c);
43
44	int foundeq = 0;
45	while (!filein.eof() && c != '\n') {
46	if (!foundeq && c == '=') {foundeq = 1; filein.get(c);}
47
48	if (foundeq) value.push_back(c);
49	else key.push_back(c);
50	filein.get(c);
51	}
52	if (key.empty()) return 0; // blank line maybe?
53	return 0;
54	}
55
56	// write out line of values to cfgfile
57	// does nothing fancy - make sure no values contain carriage returns
58	int write_cfg_line (ofstream &fileout, const text_tarray &values) {
59	outconvertclass text_t2ascii;
60
61	text_tarray::const_iterator here = values.begin();
62	text_tarray::const_iterator end = values.end();
63
64	bool first = true;
65	while (here != end) {
66	if (first) fileout << text_t2ascii << *here;
67	else fileout << text_t2ascii << " \"" << *here << "\"";
68	first = false;
69	++here;
70	}
71	fileout << "\n";
72	return 0;
73	}
74
75	// same, but use a file descriptor this time
76	int write_cfg_line (int fileout, const text_tarray &values) {
77	outconvertclass text_t2ascii;
78
79	text_tarray::const_iterator here = values.begin();
80	text_tarray::const_iterator end = values.end();
81
82	if (here != end) {
83	char *s=here->getcstr();
84	write(fileout,s ,here->size());
85	delete []s;
86
87	++here;
88	}
89	while (here != end) {
90	write(fileout, " \"", 2);
91	char *s=here->getcstr();
92	write(fileout,s ,here->size());
93	delete []s;
94	write(fileout, "\"", 1);
95
96	++here;
97	}
98	write(fileout,"\n",1);
99	return 0;
100	}
101
102
103	// returns 0 on success, -1 on failure
104	int read_cfg_line (ifstream &filein, text_tarray &values) {
105	// we split up the line into tokens, pushing each token into the
106	// values array. Quoted phrases are a single token. A "\" at the end
107	// of a line continues onto the next line.
108
109	values.erase(values.begin(), values.end());
110
111	if (!filein.good()) return -1;
112
113	text_t curvalue;
114	char c1;
115	filein.get(c1);
116
117	// skip white space
118	while (!filein.eof() && isspace((unsigned char) c1)) { filein.get(c1); }
119
120	// ignore comments
121	while (c1 == '#') {
122	while (!filein.eof() && c1!='\n' && c1!='\r') { filein.get(c1); }
123	// skip white space...
124	while (!filein.eof() && isspace((unsigned char) c1)) { filein.get(c1); }
125	}
126
127	// deal with all the records on this line (possibly multi-line)
128
129	while (!filein.eof()) {
130	if (c1=='\n' \|\| c1=='\r') { // shouldn't happen?
131	break;
132	}
133
134	// get the next token
135	curvalue.clear();
136
137	bool inquote=false;
138	char quotemark='"';
139	char preceding='\0'; // 1-char state to allow \" and \'
140	// see if this is a quoted phrase
141	if (c1=='\'' \|\| c1=='\"') { // starts with a quote
142	inquote=true;
143	quotemark = c1;
144	preceding = c1; // just to initialise
145	filein.get(c1);
146	}
147
148	// get token or a whole phrase
149	while (!filein.eof()) {
150	if (isspace((unsigned char) c1)) {
151	if (! inquote) {
152	// end of token, not inside quote marks
153	break;
154	} else {
155	// inside quote marks.
156	/* Turn eol into space, in case other parsing bits expect eol to
157	also mean end of parsing... */
158	c1=' ';
159	}
160	}
161	if (c1 == quotemark && inquote && preceding != '\\') {
162	// end of quoted phrase found
163	inquote=false;
164	filein.get(c1);
165	continue;
166	}
167
168	// add current char to token/phrase
169	// see if current byte is part of a multibyte char (utf-8 only!)
170	unsigned short int c; // text_t uses 16bit unicode
171	unsigned char uc1=(unsigned)c1;
172	if (uc1 < 0x80) {
173	c=uc1;
174	} else if (uc1 >= 0xc0 && uc1 <= 0xdf) {
175	// 2-byte utf-8
176	unsigned char c2;
177	// two byte character
178	filein.get((char&)c2); // get takes a signed char
179	c = ((uc1 & 0x1f) << 6) + (c2 & 0x3f);
180	} else if (uc1 >= 0xe0 && uc1 <= 0xef) {
181	// 3-byte character
182	unsigned char c2, c3;
183	filein.get((char&)c2);
184	filein.get((char&)c3);
185	c = ((uc1 & 0xf) << 12) + ((c2 & 0x3f) << 6)
186	+ (c3 & 0x3f);
187	} else {c=uc1;} // we don't do group2/plane0 (4,5,6-byte utf-8)
188
189	curvalue.push_back(c); // 16bit unicode
190	if (inquote)
191	preceding = c1;
192
193	filein.get(c1);
194	}
195	// we now have a token or a phrase
196
197	// see if we've reached the end of the line
198	if (c1 == '\n' \|\| c1 == '\r') {
199	if (curvalue != "\\") { // the line DOESN'T continue. End of line.
200	values.push_back(curvalue);
201	break; // end of token/phrase
202	} else {
203	// swallow up the EOL chars
204	while (!filein.eof() && (c1=='\r' \|\| c1=='\n')) filein.get(c1);
205	// the current token "\\" will be cleared below
206	}
207	} else { // no new line seen
208	values.push_back(curvalue);
209	}
210
211	curvalue.clear();
212
213	// remove whitespace (but not newline/CR chars) before next token
214	while (!filein.eof() && (c1==' ' \|\| c1=='\t')) filein.get(c1);
215
216	} // while(1)
217
218
219	return 0;
220	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: