Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 22934

Last change on this file since 22934 was 22934, checked in by ak19, 14 years ago
For ticket no 712. Fixes to 2 related crashes that occurred when using a combination of advanced (server.exe and library.cgi depending on which web server was used): 1. When parsing cgi args, arrays stem and fold contained the URL encodings percent-2-C rather than commas for delimiters and weren't split properly resulting in arrays of unexpected lengths (and values). Need to decode the percent-2-C to commas by calling decode_commas() in cgiutils.cpp before splitting. 2. decode_commas in cgiutils.cpp was performing an illegal iterator operation by attempting to peek PAST the end of the iterator which doesn't seem to be allowed by the STL code. When the iteration really got past the end, the iteration operation causes a problem resulting in a server.exe crash of its own.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 27.2 KB

Line
1	/**********************************************************************
2	*
3	* cgiutils.cpp -- general cgi utilities
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cgiutils.h"
27	#include "fileutil.h"
28	#include "gsdlunicode.h"
29	#include "fileutil.h"
30	#include "unitool.h" // in mg, for output_utf8_char
31	#include <cstdlib>
32	#include <time.h>
33
34	#if defined(GSDL_USE_OBJECTSPACE)
35	# include <ospace\std\iostream>
36	# include <ospace\std\fstream>
37	#elif defined(GSDL_USE_IOS_H)
38	# include <iostream.h>
39	# include <fstream.h>
40	#else
41	# include <iostream>
42	# include <fstream>
43	#endif
44
45
46	static unsigned short hexdigit (unsigned short c) {
47	if (c >= '0' && c <= '9') return (c-'0');
48	if (c >= 'a' && c <= 'f') return (c-'a'+10);
49	if (c >= 'A' && c <= 'F') return (c-'A'+10);
50	return c;
51	}
52
53
54	static void c2hex (unsigned short c, text_t &t) {
55	t.clear();
56
57	if (c >= 256) {
58	t = "20"; // ' '
59	return;
60	}
61
62	unsigned short o1, o2;
63
64	o1 = (c/16) % 16;
65	o2 = c % 16;
66	if (o1 >= 10) o1 += 'a' - 10;
67	else o1 += '0';
68	if (o2 >= 10) o2 += 'a' - 10;
69	else o2 += '0';
70
71	t.push_back(o1);
72	t.push_back(o2);
73	}
74
75	static text_t::iterator getline (text_t::iterator first,
76	text_t::iterator last,
77	bool include_crlf) {
78	while (first != last) {
79	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
80	// found <CRLF>
81	if (include_crlf) first += 2;
82	break;
83	}
84
85	first++;
86	}
87
88	return first;
89	}
90
91	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92	text_t &filetype, bool &isfile, text_t &argstr,
93	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95	if (!argname.empty()) {
96
97	if (!isfile) {
98	// argdata includes a trailing <CRLF> that we must remove
99	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
100	argdata.erase(argdata.end()-2, argdata.end());
101	}
102	if (!argstr.empty()) argstr += "&";
103	argstr += argname + "=" + argdata;
104
105	} else if (!filename.empty()) {
106	// filedata includes a trailing <CRLF> that we must remove
107	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
108	filedata.erase(filedata.end()-2, filedata.end());
109	}
110
111	// create tmp_name for storing the file on disk, using the current timestamp
112	text_t tmp_name(time(NULL));
113	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
114
115	char *tmp_name_c = tmp_name.getcstr();
116
117	// write the file data to disk
118	outconvertclass out;
119	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
120	filestream << out << filedata;
121	filestream.close();
122	delete tmp_name_c;
123
124	// populate the fields of a fileupload_t and put it in the
125	// fileuploads map
126	fileupload_t fu;
127	// note that filename currently may or may not include the path since
128	// some browsers (e.g. IE) include the path while others
129	// (e.g. mozilla) do not. we should probably remove the path from
130	// this field here to get a consistent value across all browsers.
131	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132	if (slash != filename.end()) {
133	filename = substr(slash+1, filename.end());
134	}
135	fu.name = filename;
136	fu.type = filetype;
137	// size has yet to be implemented
138	fu.size = filedata.size();
139
140	fu.tmp_name = tmp_name;
141	fileuploads[argname] = fu;
142	}
143	}
144	isfile = false;
145	argname.clear();
146	argdata.clear();
147	filename.clear();
148	filedata.clear();
149	filetype.clear();
150	}
151
152	// parse data obtained through a CGI POST request
153	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156	text_t argstr;
157
158	text_t::iterator content_type_begin = content_type.begin();
159	text_t::iterator content_type_end = content_type.end();
160	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161	// a simple post request
162
163	return raw_post_data;
164
165	} else {
166	// multipart/form data - may contain one or more uploaded files
167
168	/*
169	content_type should look something like the following
170	multipart/form-data; boundary=---------------------------7d411e1a50330
171
172	while raw_post_data will be as follows
173	-----------------------------7d43e73450330CRLF
174	Content-Disposition: form-data; name="e"<CRLF>
175	<CRLF>
176	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177	-----------------------------7d43e73450330<CRLF>
178	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179	Content-Type: application/msword<CRLF>
180	<CRLF>
181	<Content of file><CRLF>
182
183	*/
184
185	// first get the boundary from content-type
186	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
187	if (boundary_begin+9 < content_type_end)
188	{
189	// skip over "boundary=" part of string
190	boundary_begin += 9;
191	}
192	else {
193	// error
194	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
195	return "";
196	}
197	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198	int boundary_len = boundary.size();
199
200
201	text_t argname, argdata, filename, filedata, filetype;
202	bool isfile = false;
203	text_t::iterator data_here = raw_post_data.begin();
204	text_t::iterator data_end = raw_post_data.end();
205	while (data_here != data_end) {
206
207	// get the next available line (including the trailing <CRLF>
208	text_t line = substr(data_here, getline(data_here, data_end, true));
209
210	data_here += line.size();
211	text_t::iterator line_begin = line.begin();
212	text_t::iterator line_end = line.end();
213	if (findword(line_begin, line_end, boundary) != line_end) {
214	// we've found a boundary
215	process_post_section(argname, argdata, filename, filedata, filetype,
216	isfile, argstr, fileuploads, gsdlhome);
217
218	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219	// we've found the the beginning of a new section
220	argname.clear();
221	argdata.clear();
222
223	// get the name of this piece of form data
224	text_t::iterator it = findword(line_begin, line_end, "name=\"");
225	if (it == line_end) break; // error - this shouldn't happen
226	it = findchar(it, line_end, '"');
227	if ((it != line_end) && (it+1 != line_end)) {
228	argname = substr(it+1, findchar(it+1, line_end, '"'));
229	}
230
231	// if this piece of form data contains filename="" it's a file
232	// upload and needs to be treated special
233	it = (findword(line_begin, line_end, "filename=\""));
234	if (it != line_end) {
235	// we've found a file upload
236	isfile = true;
237	it = findchar(it, line_end, '"');
238	if ((it != line_end) && (it+1 != line_end)) {
239	filename = substr(it+1, findchar(it+1, line_end, '"'));
240	}
241
242	// the next line is the content-type of this section
243	line = substr(data_here, getline(data_here, data_end, true));
244	data_here += line.size();
245	line_begin = line.begin();
246	line_end = line.end();
247	it = (findword(line_begin, line_end, "Content-Type: "));
248	if (it != line_end) {
249	filetype = substr(it+14, getline(it, line_end, false));
250	}
251	}
252
253	// eat up the next line as it's just a <CRLF> on it's own
254	data_here += 2;
255
256	} else {
257	if (isfile) filedata += line;
258	else argdata += line;
259	}
260
261	}
262
263	// process last section
264	process_post_section(argname, argdata, filename, filedata, filetype,
265	isfile, argstr, fileuploads, gsdlhome);
266
267	return argstr;
268	}
269	}
270
271	// convert %xx and + to their appropriate equivalents
272	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
273	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274	// actually was, then this returns utf-8, and needs to_uni on the
275	// result to get it back to unicode. If the encoding wasn't utf-8, then the
276	// output may be crap. Seems to work for 8 bit encodings.
277	// Really, this should be given the encoding, and should always return unicode.
278	void decode_cgi_arg (text_t &argstr) {
279	text_t::iterator in = argstr.begin();
280	text_t::iterator out = in;
281	text_t::iterator end = argstr.end();
282
283	while (in != end) {
284	if (in == '+') out = ' ';
285
286	else if (*in == '%') {
287	unsigned short c = '%';
288	++in;
289	if (in != end) { // this is an encoding...
290	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291	// this assumes a short int is at least 16 bits...
292	++in;
293	if (in != end)
294	c=hexdigit(*in++) << 12;
295	if (in != end)
296	c+=hexdigit(*in++) << 8;
297	if (in != end)
298	c+=hexdigit(*in++) << 4;
299	if (in != end)
300	c+=hexdigit(*in);
301	/* BAD!! The following assumes the interface is using utf-8. But
302	at this point we don't know what encoding we are using, unless
303	we can parse it out of the string we are currently decoding... */
304	text_t uni=" ";
305	uni[0]=c;
306	text_t utf8=to_utf8(uni);
307	int last_byte=utf8.size()-1;
308	for (int i=0;i<last_byte;++i)
309	*out++ = utf8[i];
310	c=utf8[last_byte];
311	} else { // convert %HH to hex value
312	c = hexdigit (*in);
313	++in;
314	if (in != end && c < 16) { // sanity check on the previous character
315	c = c16 + hexdigit (in);
316	}
317	}
318	}
319	*out = c;
320	} else out = in;
321
322	if (in != end) ++in;
323	++out;
324	}
325
326	// remove the excess characters
327	argstr.erase (out, end);
328
329	}
330
331
332	// split up the cgi arguments
333	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
334	cgiargsclass &args) {
335	args.clear();
336
337	text_t::const_iterator here = argstr.begin();
338	text_t::const_iterator end = argstr.end();
339	text_t key, value;
340
341	// extract out the key=value pairs
342	while (here != end) {
343	// get the next key and value pair
344	here = getdelimitstr (here, end, '=', key);
345	here = getdelimitstr (here, end, '&', value);
346
347	// convert %xx and + to their appropriate equivalents
348	decode_cgi_arg (value);
349	value.setencoding(1); // other encoding
350	// store this key=value pair
351	if (!key.empty()) {
352
353	// if arg occurs multiple times (as is the case with multiple
354	// checkboxes using the same name) we'll create a comma separated
355	// list of all the values (this uses a hack that encodes naturally
356	// occurring commas as %2C - values will therefore need to be decoded
357	// again before use) - it should use an array instead
358	const cgiarginfo *info = argsinfo.getarginfo (key);
359	if (info==NULL) {
360	// If info is NULL, we can't tell if the arg is multiple value or not
361	// Because we need to have dynamically named arguments multivalued, we
362	// will always assume multiplevalue = true
363	// If the arg is not multi valued, then you need to decode the commas.
364	if (args.getarg(key)==NULL) {
365	args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
366	}
367	else {
368	text_t newvalue = args[key];
369
370	newvalue += "," + encode_commas(value);
371	newvalue.setencoding(1); // other encoding
372	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
373	}
374	}
375	else {
376	if (info->multiplevalue) {
377
378	text_t newvalue = args[key];
379	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
380	newvalue += encode_commas(value);
381	newvalue.setencoding(1); // other encoding
382	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
383
384	} else {
385	args.setarg (key, value, cgiarg_t::cgi_arg);
386	}
387	}
388	}
389	}
390	}
391
392	text_t encode_commas (const text_t &intext) {
393
394	text_t outtext;
395
396	text_t::const_iterator here = intext.begin ();
397	text_t::const_iterator end = intext.end ();
398
399	while (here != end) {
400	if (*here == ',') outtext += "%2C";
401	else outtext.push_back (*here);
402	++here;
403	}
404	return outtext;
405	}
406
407	text_t decode_commas (const text_t &intext) {
408
409	text_t outtext;
410
411	text_t::const_iterator here = intext.begin ();
412	text_t::const_iterator end = intext.end ();
413
414	/*while (here != end) {
415	if (((here+2)<end) && here == '%' && (here+1) == '2'
416	&& ((here+2) == 'C' \|\| (here+2) == 'c')) {
417	here += 2;
418	outtext.push_back(',');
419
420	}else outtext.push_back (*here);
421	++here;
422	}
423	return outtext;*/
424
425	// iterators do not allow a forward increment/peek that goes past
426	// iterator's end: tests like (here+2 <end) above cause errors if
427	// the result ends up being > end. So have to test one character
428	// at a time, since tests like (here != end) are allowed:
429
430	while (here != end) { // look for "%2C" to replace with ","
431	if(*here == '%') {
432	++here;
433	if(here != end && *here == '2') {
434	++here;
435	if(here != end && (here == 'C' \|\| here == 'c')) {
436	outtext.push_back(',');
437	} else {
438	here -= 2;
439	outtext.push_back (*here);
440	}
441	} else { // go back to % char and push it back
442	--here;
443	outtext.push_back (*here);
444	}
445	} else {
446	outtext.push_back (*here);
447	}
448
449	++here;
450	} // end while
451	return outtext;
452	}
453
454	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
455	text_t minus_safe (const text_t &intext, bool utf8) {
456
457	text_t outtext;
458
459	text_t::const_iterator here = intext.begin ();
460	text_t::const_iterator end = intext.end ();
461
462	while (here != end) {
463	if (*here == '-') outtext += "Zz-";
464	else outtext.push_back (*here);
465	++here;
466	}
467	if (utf8) {
468	outtext = cgi_safe_utf8 (outtext);
469	} else {
470	outtext = cgi_safe_unicode (outtext);
471	}
472	return outtext;
473	}
474
475	// takes utf-8 input
476	text_t cgi_safe_utf8 (const text_t &intext) {
477	text_t outtext;
478
479	text_t::const_iterator here = intext.begin ();
480	text_t::const_iterator end = intext.end ();
481	unsigned short c;
482	text_t ttmp;
483
484	while (here != end) {
485	c = *here;
486	if (((c >= 'a') && (c <= 'z')) \|\|
487	((c >= 'A') && (c <= 'Z')) \|\|
488	((c >= '0') && (c <= '9')) \|\|
489	(c == '%') \|\| (c == '-')) {
490	// alphanumeric character
491	outtext.push_back(c);
492	} else if (c == ' ') {
493	// space
494	outtext.push_back('+');
495	} else if (c > 255) { // not utf-8 character
496	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
497	} else {
498	// everything else
499	outtext.push_back('%');
500	c2hex(c, ttmp);
501	outtext += ttmp;
502	}
503
504	++here;
505	}
506
507	return outtext;
508	}
509	// takes unicode input
510	text_t cgi_safe_unicode (const text_t &intext) {
511	text_t outtext;
512
513	text_t::const_iterator here = intext.begin ();
514	text_t::const_iterator end = intext.end ();
515	unsigned short c;
516	text_t ttmp;
517
518	while (here != end) {
519	c = *here;
520	if (((c >= 'a') && (c <= 'z')) \|\|
521	((c >= 'A') && (c <= 'Z')) \|\|
522	((c >= '0') && (c <= '9')) \|\|
523	(c == '%') \|\| (c == '-')) {
524	// alphanumeric character
525	outtext.push_back(c);
526	} else if (c == ' ') {
527	// space
528	outtext.push_back('+');
529	} else if (c > 127) { // unicode character
530	unsigned char buf[3]; // up to 3 bytes
531	buf[0]='\0';buf[1]='\0';buf[2]='\0';
532	output_utf8_char(c,buf, buf+2);
533	outtext.push_back('%');
534	c2hex(buf[0], ttmp);
535	outtext += ttmp;
536	outtext.push_back('%');
537	c2hex(buf[1], ttmp);
538	outtext += ttmp;
539	if (buf[2]) {
540	outtext.push_back('%');
541	c2hex(buf[2], ttmp);
542	outtext += ttmp;
543	}
544	} else {
545	// everything else
546	outtext.push_back('%');
547	c2hex(c, ttmp);
548	outtext += ttmp;
549	}
550
551	++here;
552	}
553
554	return outtext;
555	}
556
557
558
559
560	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
561	text_t::const_iterator last,
562	text_t &argname) {
563	first = getdelimitstr (first, last, '-', argname);
564	return first;
565	}
566
567
568	// check_save_conf_str checks the configuration string for
569	// the saved args and makes sure it does not conflict with
570	// the information about the arguments. If an error is encountered
571	// it will return false and the program should not produce any
572	// output.
573	bool check_save_conf_str (const text_t &saveconf,
574	const cgiargsinfoclass &argsinfo,
575	ostream &logout) {
576	outconvertclass text_t2ascii;
577
578	text_tset argsset;
579	text_t::const_iterator saveconfhere = saveconf.begin ();
580	text_t::const_iterator saveconfend = saveconf.end ();
581	text_t argname;
582	const cgiarginfo *info;
583
584	// first check to make sure all saved arguments can be saved
585
586	while (saveconfhere != saveconfend) {
587	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
588
589	if (!argname.empty()) {
590	// save the argument name for later
591	argsset.insert (argname);
592
593	// check the argument
594	info = argsinfo.getarginfo (argname);
595	if (info == NULL) {
596	logout << text_t2ascii << "Error: the cgi argument \"" << argname
597	<< "\" is used in the configuration string for the\n"
598	<< "saved arguments but does not exist as a valid argument.\n\n";
599	return false;
600	}
601	if (info->savedarginfo == cgiarginfo::mustnot) {
602	logout << text_t2ascii << "Error: the cgi argument \"" << argname
603	<< "\" is used in the configuration string for the\n"
604	<< "saved arguments but has been specified as an argument whose\n"
605	<< "state must not be saved.\n\n";
606	return false;
607	}
608	}
609	}
610
611
612	// next check that all saved arguments that should be saved
613	// are saved
614	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
615	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
616
617	while (argsinfohere != argsinfoend) {
618	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
619	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
620	logout << text_t2ascii << "Error: the cgi argument \""
621	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
622	<< "be save but was not listed in the saved arguments.\n\n";
623	return false;
624	}
625
626	++argsinfohere;
627	}
628
629	return true; // made it, no clashes
630	}
631
632
633	// create_save_conf_str will create a configuration string
634	// based on the information in argsinfo. This method of configuration
635	// is not recomended as small changes can produce large changes in
636	// the resulting configuration string (for instance a totally different
637	// ordering). Only arguments which "must" be saved are included in
638	// the resulting string.
639	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
640	ostream &/logout/) {
641	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
642	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
643	text_t saveconf;
644	bool first = true;
645
646	while (argsinfohere != argsinfoend) {
647	// save this argument if it must be saved
648	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
649	if (!first) saveconf.push_back ('-');
650	else first = false;
651	saveconf += (*argsinfohere).second.shortname;
652	}
653
654	++argsinfohere;
655	}
656
657	return saveconf;
658	}
659
660
661	// expand_save_args will expand the saved arguments based
662	// on saveconf placing the results in args if they are not
663	// already defined. If it encounters an error it will return false
664	// and output more information to logout.
665	bool expand_save_args (const cgiargsinfoclass &argsinfo,
666	const text_t &saveconf,
667	cgiargsclass &args,
668	ostream &logout) {
669	outconvertclass text_t2ascii;
670
671	text_t *arg_e = args.getarg("e");
672	if (arg_e == NULL) return true; // no compressed arguments
673	if (arg_e->empty()) return true; // no compressed arguments
674
675	text_t argname, argvalue;
676	const cgiarginfo *argnameinfo;
677
678	text_t::const_iterator saveconfhere = saveconf.begin();
679	text_t::const_iterator saveconfend = saveconf.end();
680
681	text_t::iterator arg_ebegin = arg_e->begin();
682	text_t::iterator arg_eend = arg_e->end();
683	text_t::iterator arg_ehere = arg_ebegin;
684	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
685	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
686
687	if (!argname.empty()) {
688	// found another entry
689	argnameinfo = argsinfo.getarginfo (argname);
690
691	if (argnameinfo == NULL) {
692	// no information about the argument could be found
693	// we can't keep going because we don't know whether
694	// this argument is a single or multiple character value
695	logout << text_t2ascii << "Error: the cgi argument \"" << argname
696	<< "\" was specified as being a compressed argument\n"
697	<< "but no information about it could be found within the "
698	<< "cgiargsinfoclass.\n";
699	return false;
700
701	} else {
702
703	// found the argument information
704	if (argnameinfo->multiplechar) {
705	text_t::const_iterator sav = arg_ehere;
706	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
707	if (distance(arg_ebegin, arg_ehere) > 2) {
708	// replace any '-' chars escaped with 'Zz'
709	bool first = true;
710	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
711	if (first) argvalue.clear();
712
713	// Hey, here's a wild idea. Why don't we check that there is
714	// another hyphen in the cgiarge before we get a pointer to it and
715	// add one. That way we are far less likely to wander off into
716	// random memory merrily parsing arguments that are then lovingly
717	// spewed all over the HTML page returned at the usage logs.
718	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
719	if (minus_itr == arg_eend)
720	{
721	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
722	return false;
723	}
724	arg_ehere = minus_itr + 1;
725
726	while (sav != (arg_ehere-1)) {
727	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
728	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
729	++sav;
730	}
731	first = false;
732	}
733	}
734	argvalue.setencoding(1); // other encoding
735	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
736	} else {
737	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
738	++arg_ehere;
739	}
740	}
741	}
742	}
743
744	return true;
745	}
746
747
748	// adds the default values for those arguments which have not
749	// been specified
750	void add_default_args (const cgiargsinfoclass &argsinfo,
751	cgiargsclass &args,
752	ostream &/logout/) {
753	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
754	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
755
756	while (argsinfohere != argsinfoend) {
757	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
758	args.setdefaultarg ((*argsinfohere).second.shortname,
759	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
760	}
761	++argsinfohere;
762	}
763	}
764
765	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
766	cgiargsclass &args,
767	fileupload_tmap &fileuploads,
768	ostream &logout) {
769
770	const cgiarginfo *info = argsinfo.getarginfo("a");
771	fileupload_tmap::const_iterator this_file = fileuploads.begin();
772	fileupload_tmap::const_iterator end_file = fileuploads.end();
773	while (this_file != end_file) {
774	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
775	if (info != NULL) {
776
777	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
778
779	args.setargfile((this_file).first, (this_file).second);
780	}
781	}
782	this_file++;
783	}
784	}
785
786	// compress_save_args will compress the arguments and return
787	// them in compressed_args. If an error was encountered
788	// compressed_args will be set to to "", an error will be
789	// written to logout, and the function will return false.
790	bool compress_save_args (const cgiargsinfoclass &argsinfo,
791	const text_t &saveconf,
792	cgiargsclass &args,
793	text_t &compressed_args,
794	outconvertclass &outconvert,
795	ostream &logout) {
796	outconvertclass text_t2ascii;
797
798	compressed_args.clear();
799
800	text_t argname, argvalue;
801	const cgiarginfo *argnameinfo;
802
803	text_t::const_iterator saveconfhere = saveconf.begin();
804	text_t::const_iterator saveconfend = saveconf.end();
805
806	while (saveconfhere != saveconfend) {
807	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
808
809	if (!argname.empty()) {
810	// found another entry
811	argnameinfo = argsinfo.getarginfo (argname);
812
813	if (argnameinfo == NULL) {
814	// no information about the argument could be found
815	// we can't keep going because we don't know whether
816	// this argument is a single or multiple character value
817	logout << text_t2ascii << "Error: the cgi argument \"" << argname
818	<< "\" was specified as being a compressed argument\n"
819	<< "but no information about it could be found within the "
820	<< "cgiargsinfoclass.\n";
821	compressed_args.clear();
822	return false;
823
824	} else {
825	// found the argument information
826	if (argnameinfo->multiplechar) {
827	// multiple character argument -- sort out any '-' chars
828	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
829	compressed_args += minus_safe (args[argname], false);
830	else
831	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
832
833	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
834
835	} else {
836	// single character argument
837	if (args[argname].size() == 0) {
838	logout << text_t2ascii << "Error: the cgi argument \"" << argname
839	<< "\" was specified as being a compressed argument which\n"
840	<< "should have a one character value but it was empty.\n\n";
841	compressed_args.clear ();
842	return false;
843
844	} else if (args[argname].size() > 1) {
845	logout << text_t2ascii << "Error: the cgi argument \"" << argname
846	<< "\" was specified as being a compressed argument which\n"
847	<< "should have a one character value but it had multiple characters.\n\n";
848	compressed_args.clear ();
849	return false;
850	}
851
852	// everything is ok
853	compressed_args += args[argname];
854	}
855	}
856	}
857	}
858
859	return true;
860	}
861
862
863	// args_tounicode converts any arguments which are not in unicode
864	// to unicode using inconvert
865	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
866	cgiargsclass::iterator here = args.begin();
867	cgiargsclass::iterator end = args.end();
868
869	while (here != end) {
870	if ((*here).second.value.getencoding() > 0) {
871	// Call reset() before converting each argument, to prevent problems when converting the last
872	// argument left the converter in a bad state
873	inconvert.reset();
874	(here).second.value = inconvert.convert((here).second.value);
875	}
876
877	++here;
878	}
879	}
880
881	// fcgienv will be loaded with environment name-value pairs
882	// if using fastcgi (had to do this as getenv doesn't work
883	// with our implementation of fastcgi). if fcgienv is empty
884	// we'll simply use getenv
885	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
886	if (fcgienv.empty()) {
887	char *n = name.getcstr();
888	char *v = getenv(n);
889	delete []n;
890	if (v != NULL) return v;
891	return g_EmptyText;
892
893	} else return fcgienv[name];
894	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: