Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 26560

Last change on this file since 26560 was 26560, checked in by ak19, 11 years ago
URL encoding forward-slash breaks subcollections, as subcollections are designated with forward slash. Not encoding forward slash anymore.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 27.9 KB

Line
1	/**********************************************************************
2	*
3	* cgiutils.cpp -- general cgi utilities
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cgiutils.h"
27	#include "fileutil.h"
28	#include "gsdlunicode.h"
29	#include "fileutil.h"
30	#include "unitool.h" // in mg, for output_utf8_char
31	#include <cstdlib>
32	#include <time.h>
33
34	#if defined(GSDL_USE_OBJECTSPACE)
35	# include <ospace\std\iostream>
36	# include <ospace\std\fstream>
37	#elif defined(GSDL_USE_IOS_H)
38	# include <iostream.h>
39	# include <fstream.h>
40	#else
41	# include <iostream>
42	# include <fstream>
43	#endif
44
45
46	static unsigned short hexdigit (unsigned short c) {
47	if (c >= '0' && c <= '9') return (c-'0');
48	if (c >= 'a' && c <= 'f') return (c-'a'+10);
49	if (c >= 'A' && c <= 'F') return (c-'A'+10);
50	return c;
51	}
52
53
54	static void c2hex (unsigned short c, text_t &t) {
55	t.clear();
56
57	if (c >= 256) {
58	t = "20"; // ' '
59	return;
60	}
61
62	unsigned short o1, o2;
63
64	o1 = (c/16) % 16;
65	o2 = c % 16;
66	if (o1 >= 10) o1 += 'a' - 10;
67	else o1 += '0';
68	if (o2 >= 10) o2 += 'a' - 10;
69	else o2 += '0';
70
71	t.push_back(o1);
72	t.push_back(o2);
73	}
74
75	static text_t::iterator getline (text_t::iterator first,
76	text_t::iterator last,
77	bool include_crlf) {
78	while (first != last) {
79	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
80	// found <CRLF>
81	if (include_crlf) first += 2;
82	break;
83	}
84
85	first++;
86	}
87
88	return first;
89	}
90
91	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92	text_t &filetype, bool &isfile, text_t &argstr,
93	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95	if (!argname.empty()) {
96
97	if (!isfile) {
98	// argdata includes a trailing <CRLF> that we must remove
99	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
100	argdata.erase(argdata.end()-2, argdata.end());
101	}
102	if (!argstr.empty()) argstr += "&";
103	argstr += argname + "=" + argdata;
104
105	} else if (!filename.empty()) {
106	// filedata includes a trailing <CRLF> that we must remove
107	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
108	filedata.erase(filedata.end()-2, filedata.end());
109	}
110
111	// create tmp_name for storing the file on disk, using the current timestamp
112	text_t tmp_name(time(NULL));
113	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
114
115	char *tmp_name_c = tmp_name.getcstr();
116
117	// write the file data to disk
118	outconvertclass out;
119	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
120	filestream << out << filedata;
121	filestream.close();
122	delete tmp_name_c;
123
124	// populate the fields of a fileupload_t and put it in the
125	// fileuploads map
126	fileupload_t fu;
127	// note that filename currently may or may not include the path since
128	// some browsers (e.g. IE) include the path while others
129	// (e.g. mozilla) do not. we should probably remove the path from
130	// this field here to get a consistent value across all browsers.
131	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132	if (slash != filename.end()) {
133	filename = substr(slash+1, filename.end());
134	}
135	fu.name = filename;
136	fu.type = filetype;
137	// size has yet to be implemented
138	fu.size = filedata.size();
139
140	fu.tmp_name = tmp_name;
141	fileuploads[argname] = fu;
142	}
143	}
144	isfile = false;
145	argname.clear();
146	argdata.clear();
147	filename.clear();
148	filedata.clear();
149	filetype.clear();
150	}
151
152	// parse data obtained through a CGI POST request
153	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156	text_t argstr;
157
158	text_t::iterator content_type_begin = content_type.begin();
159	text_t::iterator content_type_end = content_type.end();
160	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161	// a simple post request
162
163	return raw_post_data;
164
165	} else {
166	// multipart/form data - may contain one or more uploaded files
167
168	/*
169	content_type should look something like the following
170	multipart/form-data; boundary=---------------------------7d411e1a50330
171
172	while raw_post_data will be as follows
173	-----------------------------7d43e73450330CRLF
174	Content-Disposition: form-data; name="e"<CRLF>
175	<CRLF>
176	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177	-----------------------------7d43e73450330<CRLF>
178	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179	Content-Type: application/msword<CRLF>
180	<CRLF>
181	<Content of file><CRLF>
182
183	*/
184
185	// first get the boundary from content-type
186	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
187	if (boundary_begin+9 < content_type_end)
188	{
189	// skip over "boundary=" part of string
190	boundary_begin += 9;
191	}
192	else {
193	// error
194	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
195	return "";
196	}
197	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198	int boundary_len = boundary.size();
199
200
201	text_t argname, argdata, filename, filedata, filetype;
202	bool isfile = false;
203	text_t::iterator data_here = raw_post_data.begin();
204	text_t::iterator data_end = raw_post_data.end();
205	while (data_here != data_end) {
206
207	// get the next available line (including the trailing <CRLF>
208	text_t line = substr(data_here, getline(data_here, data_end, true));
209
210	data_here += line.size();
211	text_t::iterator line_begin = line.begin();
212	text_t::iterator line_end = line.end();
213	if (findword(line_begin, line_end, boundary) != line_end) {
214	// we've found a boundary
215	process_post_section(argname, argdata, filename, filedata, filetype,
216	isfile, argstr, fileuploads, gsdlhome);
217
218	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219	// we've found the the beginning of a new section
220	argname.clear();
221	argdata.clear();
222
223	// get the name of this piece of form data
224	text_t::iterator it = findword(line_begin, line_end, "name=\"");
225	if (it == line_end) break; // error - this shouldn't happen
226	it = findchar(it, line_end, '"');
227	if ((it != line_end) && (it+1 != line_end)) {
228	argname = substr(it+1, findchar(it+1, line_end, '"'));
229	}
230
231	// if this piece of form data contains filename="" it's a file
232	// upload and needs to be treated special
233	it = (findword(line_begin, line_end, "filename=\""));
234	if (it != line_end) {
235	// we've found a file upload
236	isfile = true;
237	it = findchar(it, line_end, '"');
238	if ((it != line_end) && (it+1 != line_end)) {
239	filename = substr(it+1, findchar(it+1, line_end, '"'));
240	}
241
242	// the next line is the content-type of this section
243	line = substr(data_here, getline(data_here, data_end, true));
244	data_here += line.size();
245	line_begin = line.begin();
246	line_end = line.end();
247	it = (findword(line_begin, line_end, "Content-Type: "));
248	if (it != line_end) {
249	filetype = substr(it+14, getline(it, line_end, false));
250	}
251	}
252
253	// eat up the next line as it's just a <CRLF> on it's own
254	data_here += 2;
255
256	} else {
257	if (isfile) filedata += line;
258	else argdata += line;
259	}
260
261	}
262
263	// process last section
264	process_post_section(argname, argdata, filename, filedata, filetype,
265	isfile, argstr, fileuploads, gsdlhome);
266
267	return argstr;
268	}
269	}
270
271	// convert %xx and + to their appropriate equivalents
272	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
273	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274	// actually was, then this returns utf-8, and needs to_uni on the
275	// result to get it back to unicode. If the encoding wasn't utf-8, then the
276	// output may be crap. Seems to work for 8 bit encodings.
277	// Really, this should be given the encoding, and should always return unicode.
278	void decode_cgi_arg (text_t &argstr) {
279	text_t::iterator in = argstr.begin();
280	text_t::iterator out = in;
281	text_t::iterator end = argstr.end();
282
283	while (in != end) {
284	if (in == '+') out = ' ';
285
286	else if (*in == '%') {
287	unsigned short c = '%';
288	++in;
289	if (in != end) { // this is an encoding...
290	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291	// this assumes a short int is at least 16 bits...
292	++in;
293	if (in != end)
294	c=hexdigit(*in++) << 12;
295	if (in != end)
296	c+=hexdigit(*in++) << 8;
297	if (in != end)
298	c+=hexdigit(*in++) << 4;
299	if (in != end)
300	c+=hexdigit(*in);
301	/* BAD!! The following assumes the interface is using utf-8. But
302	at this point we don't know what encoding we are using, unless
303	we can parse it out of the string we are currently decoding... */
304	text_t uni=" ";
305	uni[0]=c;
306	text_t utf8=to_utf8(uni);
307	int last_byte=utf8.size()-1;
308	for (int i=0;i<last_byte;++i)
309	*out++ = utf8[i];
310	c=utf8[last_byte];
311	} else { // convert %HH to hex value
312	c = hexdigit (*in);
313	++in;
314	if (in != end && c < 16) { // sanity check on the previous character
315	c = c16 + hexdigit (in);
316	}
317	}
318	}
319	*out = c;
320	} else out = in;
321
322	if (in != end) ++in;
323	++out;
324	}
325
326	// remove the excess characters
327	argstr.erase (out, end);
328
329	}
330
331	// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
332	// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
333	// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
334	// apache log and that log file can be included in a local file inclusion (LFI) or
335	// remote file include (RFI) attack.
336	// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
337	// break out of an html/XML/javascript context.
338	void safe_cgi_arg (text_t &argstr) {
339	text_t::iterator in = argstr.begin();
340	text_t out = "";
341	text_t::iterator end = argstr.end();
342
343	while (in != end) {
344	if (*in == '<') out += "%3C";
345	else if (*in == '>') out += "%3E";
346	else if (*in == '&') out += "%26";
347	else if (*in == '\"') out += "%22";
348	else if (*in == '\'') out += "%27";
349	//else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
350	else { // append whatever char is in *in, but as a char, not int
351	//out += *in; // appends as int
352	out += " "; // append placeholder character
353	out[out.size()-1] = in; // now set location containing placeholder to what's in in
354	}
355	++in;
356	}
357
358	argstr.erase (argstr.begin(), end);
359	argstr += out;
360	}
361
362	// split up the cgi arguments
363	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
364	cgiargsclass &args) {
365	args.clear();
366
367	text_t::const_iterator here = argstr.begin();
368	text_t::const_iterator end = argstr.end();
369	text_t key, value;
370
371	// extract out the key=value pairs
372	while (here != end) {
373	// get the next key and value pair
374	here = getdelimitstr (here, end, '=', key);
375	here = getdelimitstr (here, end, '&', value);
376
377	// convert %xx and + to their appropriate equivalents
378	decode_cgi_arg (value);
379
380	safe_cgi_arg(value); // mitigate obvious cross-site scripting hacks in URL cgi-params
381
382	value.setencoding(1); // other encoding
383	// store this key=value pair
384	if (!key.empty()) {
385
386	// if arg occurs multiple times (as is the case with multiple
387	// checkboxes using the same name) we'll create a comma separated
388	// list of all the values (this uses a hack that encodes naturally
389	// occurring commas as %2C - values will therefore need to be decoded
390	// again before use) - it should use an array instead
391	const cgiarginfo *info = argsinfo.getarginfo (key);
392	if (info==NULL) {
393	// If info is NULL, we can't tell if the arg is multiple value or not
394	// Because we need to have dynamically named arguments multivalued, we
395	// will always assume multiplevalue = true
396	// If the arg is not multi valued, then you need to decode the commas.
397	if (args.getarg(key)==NULL) {
398	args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
399	}
400	else {
401	text_t newvalue = args[key];
402
403	newvalue += "," + encode_commas(value);
404	newvalue.setencoding(1); // other encoding
405	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
406	}
407	}
408	else {
409	if (info->multiplevalue) {
410
411	text_t newvalue = args[key];
412	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
413	newvalue += encode_commas(value);
414	newvalue.setencoding(1); // other encoding
415	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
416
417	} else {
418	args.setarg (key, value, cgiarg_t::cgi_arg);
419	}
420	}
421	}
422	}
423	}
424
425	text_t encode_commas (const text_t &intext) {
426
427	text_t outtext;
428
429	text_t::const_iterator here = intext.begin ();
430	text_t::const_iterator end = intext.end ();
431
432	while (here != end) {
433	if (*here == ',') outtext += "%2C";
434	else outtext.push_back (*here);
435	++here;
436	}
437	return outtext;
438	}
439
440	text_t decode_commas (const text_t &intext) {
441
442	text_t outtext;
443
444	text_t::const_iterator here = intext.begin ();
445	text_t::const_iterator end = intext.end ();
446
447	// for loop
448	int intext_len = intext.size();
449	for(int i = 0; i < intext_len; i++) {
450	if ((i+2)<intext_len) {
451	if(intext[i] == '%' && intext[i+1] == '2'
452	&& (intext[i+2] == 'C' \|\| intext[i+2] == 'c')) {
453	i += 2;
454	outtext.push_back(',');
455	continue;
456	}
457	}
458	outtext.push_back (intext[i]);
459	}
460	return outtext;
461	}
462
463	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
464	text_t minus_safe (const text_t &intext, bool utf8) {
465
466	text_t outtext;
467
468	text_t::const_iterator here = intext.begin ();
469	text_t::const_iterator end = intext.end ();
470
471	while (here != end) {
472	if (*here == '-') outtext += "Zz-";
473	else outtext.push_back (*here);
474	++here;
475	}
476	if (utf8) {
477	outtext = cgi_safe_utf8 (outtext);
478	} else {
479	outtext = cgi_safe_unicode (outtext);
480	}
481	return outtext;
482	}
483
484	// takes utf-8 input
485	text_t cgi_safe_utf8 (const text_t &intext) {
486	text_t outtext;
487
488	text_t::const_iterator here = intext.begin ();
489	text_t::const_iterator end = intext.end ();
490	unsigned short c;
491	text_t ttmp;
492
493	while (here != end) {
494	c = *here;
495	if (((c >= 'a') && (c <= 'z')) \|\|
496	((c >= 'A') && (c <= 'Z')) \|\|
497	((c >= '0') && (c <= '9')) \|\|
498	(c == '%') \|\| (c == '-')) {
499	// alphanumeric character
500	outtext.push_back(c);
501	} else if (c == ' ') {
502	// space
503	outtext.push_back('+');
504	} else if (c > 255) { // not utf-8 character
505	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
506	} else {
507	// everything else
508	outtext.push_back('%');
509	c2hex(c, ttmp);
510	outtext += ttmp;
511	}
512
513	++here;
514	}
515
516	return outtext;
517	}
518	// takes unicode input
519	text_t cgi_safe_unicode (const text_t &intext) {
520	text_t outtext;
521
522	text_t::const_iterator here = intext.begin ();
523	text_t::const_iterator end = intext.end ();
524	unsigned short c;
525	text_t ttmp;
526
527	while (here != end) {
528	c = *here;
529	if (((c >= 'a') && (c <= 'z')) \|\|
530	((c >= 'A') && (c <= 'Z')) \|\|
531	((c >= '0') && (c <= '9')) \|\|
532	(c == '%') \|\| (c == '-')) {
533	// alphanumeric character
534	outtext.push_back(c);
535	} else if (c == ' ') {
536	// space
537	outtext.push_back('+');
538	} else if (c > 127) { // unicode character
539	unsigned char buf[3]; // up to 3 bytes
540	buf[0]='\0';buf[1]='\0';buf[2]='\0';
541	output_utf8_char(c,buf, buf+2);
542	outtext.push_back('%');
543	c2hex(buf[0], ttmp);
544	outtext += ttmp;
545	outtext.push_back('%');
546	c2hex(buf[1], ttmp);
547	outtext += ttmp;
548	if (buf[2]) {
549	outtext.push_back('%');
550	c2hex(buf[2], ttmp);
551	outtext += ttmp;
552	}
553	} else {
554	// everything else
555	outtext.push_back('%');
556	c2hex(c, ttmp);
557	outtext += ttmp;
558	}
559
560	++here;
561	}
562
563	return outtext;
564	}
565
566
567
568
569	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
570	text_t::const_iterator last,
571	text_t &argname) {
572	first = getdelimitstr (first, last, '-', argname);
573	return first;
574	}
575
576
577	// check_save_conf_str checks the configuration string for
578	// the saved args and makes sure it does not conflict with
579	// the information about the arguments. If an error is encountered
580	// it will return false and the program should not produce any
581	// output.
582	bool check_save_conf_str (const text_t &saveconf,
583	const cgiargsinfoclass &argsinfo,
584	ostream &logout) {
585	outconvertclass text_t2ascii;
586
587	text_tset argsset;
588	text_t::const_iterator saveconfhere = saveconf.begin ();
589	text_t::const_iterator saveconfend = saveconf.end ();
590	text_t argname;
591	const cgiarginfo *info;
592
593	// first check to make sure all saved arguments can be saved
594
595	while (saveconfhere != saveconfend) {
596	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
597
598	if (!argname.empty()) {
599	// save the argument name for later
600	argsset.insert (argname);
601
602	// check the argument
603	info = argsinfo.getarginfo (argname);
604	if (info == NULL) {
605	logout << text_t2ascii << "Error: the cgi argument \"" << argname
606	<< "\" is used in the configuration string for the\n"
607	<< "saved arguments but does not exist as a valid argument.\n\n";
608	return false;
609	}
610	if (info->savedarginfo == cgiarginfo::mustnot) {
611	logout << text_t2ascii << "Error: the cgi argument \"" << argname
612	<< "\" is used in the configuration string for the\n"
613	<< "saved arguments but has been specified as an argument whose\n"
614	<< "state must not be saved.\n\n";
615	return false;
616	}
617	}
618	}
619
620
621	// next check that all saved arguments that should be saved
622	// are saved
623	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
624	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
625
626	while (argsinfohere != argsinfoend) {
627	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
628	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
629	logout << text_t2ascii << "Error: the cgi argument \""
630	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
631	<< "be save but was not listed in the saved arguments.\n\n";
632	return false;
633	}
634
635	++argsinfohere;
636	}
637
638	return true; // made it, no clashes
639	}
640
641
642	// create_save_conf_str will create a configuration string
643	// based on the information in argsinfo. This method of configuration
644	// is not recomended as small changes can produce large changes in
645	// the resulting configuration string (for instance a totally different
646	// ordering). Only arguments which "must" be saved are included in
647	// the resulting string.
648	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
649	ostream &/logout/) {
650	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
651	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
652	text_t saveconf;
653	bool first = true;
654
655	while (argsinfohere != argsinfoend) {
656	// save this argument if it must be saved
657	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
658	if (!first) saveconf.push_back ('-');
659	else first = false;
660	saveconf += (*argsinfohere).second.shortname;
661	}
662
663	++argsinfohere;
664	}
665
666	return saveconf;
667	}
668
669
670	// expand_save_args will expand the saved arguments based
671	// on saveconf placing the results in args if they are not
672	// already defined. If it encounters an error it will return false
673	// and output more information to logout.
674	bool expand_save_args (const cgiargsinfoclass &argsinfo,
675	const text_t &saveconf,
676	cgiargsclass &args,
677	ostream &logout) {
678	outconvertclass text_t2ascii;
679
680	text_t *arg_e = args.getarg("e");
681	if (arg_e == NULL) return true; // no compressed arguments
682	if (arg_e->empty()) return true; // no compressed arguments
683
684	text_t argname, argvalue;
685	const cgiarginfo *argnameinfo;
686
687	text_t::const_iterator saveconfhere = saveconf.begin();
688	text_t::const_iterator saveconfend = saveconf.end();
689
690	text_t::iterator arg_ebegin = arg_e->begin();
691	text_t::iterator arg_eend = arg_e->end();
692	text_t::iterator arg_ehere = arg_ebegin;
693	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
694	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
695
696	if (!argname.empty()) {
697	// found another entry
698	argnameinfo = argsinfo.getarginfo (argname);
699
700	if (argnameinfo == NULL) {
701	// no information about the argument could be found
702	// we can't keep going because we don't know whether
703	// this argument is a single or multiple character value
704	logout << text_t2ascii << "Error: the cgi argument \"" << argname
705	<< "\" was specified as being a compressed argument\n"
706	<< "but no information about it could be found within the "
707	<< "cgiargsinfoclass.\n";
708	return false;
709
710	} else {
711
712	// found the argument information
713	if (argnameinfo->multiplechar) {
714	text_t::const_iterator sav = arg_ehere;
715	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
716	if (distance(arg_ebegin, arg_ehere) > 2) {
717	// replace any '-' chars escaped with 'Zz'
718	bool first = true;
719	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
720	if (first) argvalue.clear();
721
722	// Hey, here's a wild idea. Why don't we check that there is
723	// another hyphen in the cgiarge before we get a pointer to it and
724	// add one. That way we are far less likely to wander off into
725	// random memory merrily parsing arguments that are then lovingly
726	// spewed all over the HTML page returned at the usage logs.
727	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
728	if (minus_itr == arg_eend)
729	{
730	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
731	return false;
732	}
733	arg_ehere = minus_itr + 1;
734
735	while (sav != (arg_ehere-1)) {
736	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
737	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
738	++sav;
739	}
740	first = false;
741	}
742	}
743	argvalue.setencoding(1); // other encoding
744	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
745	} else {
746	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
747	++arg_ehere;
748	}
749	}
750	}
751	}
752
753	return true;
754	}
755
756
757	// adds the default values for those arguments which have not
758	// been specified
759	void add_default_args (const cgiargsinfoclass &argsinfo,
760	cgiargsclass &args,
761	ostream &/logout/) {
762	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
763	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
764
765	while (argsinfohere != argsinfoend) {
766	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
767	args.setdefaultarg ((*argsinfohere).second.shortname,
768	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
769	}
770	++argsinfohere;
771	}
772	}
773
774	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
775	cgiargsclass &args,
776	fileupload_tmap &fileuploads,
777	ostream &logout) {
778
779	const cgiarginfo *info = argsinfo.getarginfo("a");
780	fileupload_tmap::const_iterator this_file = fileuploads.begin();
781	fileupload_tmap::const_iterator end_file = fileuploads.end();
782	while (this_file != end_file) {
783	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
784	if (info != NULL) {
785
786	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
787
788	args.setargfile((this_file).first, (this_file).second);
789	}
790	}
791	this_file++;
792	}
793	}
794
795	// compress_save_args will compress the arguments and return
796	// them in compressed_args. If an error was encountered
797	// compressed_args will be set to to "", an error will be
798	// written to logout, and the function will return false.
799	bool compress_save_args (const cgiargsinfoclass &argsinfo,
800	const text_t &saveconf,
801	cgiargsclass &args,
802	text_t &compressed_args,
803	outconvertclass &outconvert,
804	ostream &logout) {
805	outconvertclass text_t2ascii;
806
807	compressed_args.clear();
808
809	text_t argname, argvalue;
810	const cgiarginfo *argnameinfo;
811
812	text_t::const_iterator saveconfhere = saveconf.begin();
813	text_t::const_iterator saveconfend = saveconf.end();
814
815	while (saveconfhere != saveconfend) {
816	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
817
818	if (!argname.empty()) {
819	// found another entry
820	argnameinfo = argsinfo.getarginfo (argname);
821
822	if (argnameinfo == NULL) {
823	// no information about the argument could be found
824	// we can't keep going because we don't know whether
825	// this argument is a single or multiple character value
826	logout << text_t2ascii << "Error: the cgi argument \"" << argname
827	<< "\" was specified as being a compressed argument\n"
828	<< "but no information about it could be found within the "
829	<< "cgiargsinfoclass.\n";
830	compressed_args.clear();
831	return false;
832
833	} else {
834	// found the argument information
835	if (argnameinfo->multiplechar) {
836	// multiple character argument -- sort out any '-' chars
837	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
838	compressed_args += minus_safe (args[argname], false);
839	else
840	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
841
842	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
843
844	} else {
845	// single character argument
846	if (args[argname].size() == 0) {
847	logout << text_t2ascii << "Error: the cgi argument \"" << argname
848	<< "\" was specified as being a compressed argument which\n"
849	<< "should have a one character value but it was empty.\n\n";
850	compressed_args.clear ();
851	return false;
852
853	} else if (args[argname].size() > 1) {
854	logout << text_t2ascii << "Error: the cgi argument \"" << argname
855	<< "\" was specified as being a compressed argument which\n"
856	<< "should have a one character value but it had multiple characters.\n\n";
857	compressed_args.clear ();
858	return false;
859	}
860
861	// everything is ok
862	compressed_args += args[argname];
863	}
864	}
865	}
866	}
867
868	return true;
869	}
870
871
872	// args_tounicode converts any arguments which are not in unicode
873	// to unicode using inconvert
874	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
875	cgiargsclass::iterator here = args.begin();
876	cgiargsclass::iterator end = args.end();
877
878	while (here != end) {
879	if ((*here).second.value.getencoding() > 0) {
880	// Call reset() before converting each argument, to prevent problems when converting the last
881	// argument left the converter in a bad state
882	inconvert.reset();
883	(here).second.value = inconvert.convert((here).second.value);
884	}
885
886	++here;
887	}
888	}
889
890	// fcgienv will be loaded with environment name-value pairs
891	// if using fastcgi (had to do this as getenv doesn't work
892	// with our implementation of fastcgi). if fcgienv is empty
893	// we'll simply use getenv
894	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
895	if (fcgienv.empty()) {
896	char *n = name.getcstr();
897	char *v = getenv(n);
898	delete []n;
899	if (v != NULL) return v;
900	return g_EmptyText;
901
902	} else return fcgienv[name];
903	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: