Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 28841

Last change on this file since 28841 was 28841, checked in by ak19, 10 years ago
Fixing up URL encoding of cgi args so that phrase searching works again. Tested MGPP, Lucene and SQLite searching. Tested simple search, fielded search, advanced single field and multi-field as well as running a query.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 29.2 KB

Line
1	/**********************************************************************
2	*
3	* cgiutils.cpp -- general cgi utilities
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cgiutils.h"
27	#include "fileutil.h"
28	#include "gsdlunicode.h"
29	#include "fileutil.h"
30	#include "unitool.h" // in mg, for output_utf8_char
31	#include <cstdlib>
32	#include <time.h>
33
34	#if defined(GSDL_USE_OBJECTSPACE)
35	# include <ospace\std\iostream>
36	# include <ospace\std\fstream>
37	#elif defined(GSDL_USE_IOS_H)
38	# include <iostream.h>
39	# include <fstream.h>
40	#else
41	# include <iostream>
42	# include <fstream>
43	#endif
44
45	// set to false to undo security changes (url-encoding arguments)
46	static bool do_safe_cgi_args = true;
47
48	static unsigned short hexdigit (unsigned short c) {
49	if (c >= '0' && c <= '9') return (c-'0');
50	if (c >= 'a' && c <= 'f') return (c-'a'+10);
51	if (c >= 'A' && c <= 'F') return (c-'A'+10);
52	return c;
53	}
54
55
56	static void c2hex (unsigned short c, text_t &t) {
57	t.clear();
58
59	if (c >= 256) {
60	t = "20"; // ' '
61	return;
62	}
63
64	unsigned short o1, o2;
65
66	o1 = (c/16) % 16;
67	o2 = c % 16;
68	if (o1 >= 10) o1 += 'a' - 10;
69	else o1 += '0';
70	if (o2 >= 10) o2 += 'a' - 10;
71	else o2 += '0';
72
73	t.push_back(o1);
74	t.push_back(o2);
75	}
76
77	static text_t::iterator getline (text_t::iterator first,
78	text_t::iterator last,
79	bool include_crlf) {
80	while (first != last) {
81	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
82	// found <CRLF>
83	if (include_crlf) first += 2;
84	break;
85	}
86
87	first++;
88	}
89
90	return first;
91	}
92
93	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
94	text_t &filetype, bool &isfile, text_t &argstr,
95	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
96
97	if (!argname.empty()) {
98
99	if (!isfile) {
100	// argdata includes a trailing <CRLF> that we must remove
101	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
102	argdata.erase(argdata.end()-2, argdata.end());
103	}
104	if (!argstr.empty()) argstr += "&";
105	argstr += argname + "=" + argdata;
106
107	} else if (!filename.empty()) {
108	// filedata includes a trailing <CRLF> that we must remove
109	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
110	filedata.erase(filedata.end()-2, filedata.end());
111	}
112
113	// create tmp_name for storing the file on disk, using the current timestamp
114	text_t tmp_name(time(NULL));
115	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
116
117	char *tmp_name_c = tmp_name.getcstr();
118
119	// write the file data to disk
120	outconvertclass out;
121	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
122	filestream << out << filedata;
123	filestream.close();
124	delete tmp_name_c;
125
126	// populate the fields of a fileupload_t and put it in the
127	// fileuploads map
128	fileupload_t fu;
129	// note that filename currently may or may not include the path since
130	// some browsers (e.g. IE) include the path while others
131	// (e.g. mozilla) do not. we should probably remove the path from
132	// this field here to get a consistent value across all browsers.
133	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
134	if (slash != filename.end()) {
135	filename = substr(slash+1, filename.end());
136	}
137	fu.name = filename;
138	fu.type = filetype;
139	// size has yet to be implemented
140	fu.size = filedata.size();
141
142	fu.tmp_name = tmp_name;
143	fileuploads[argname] = fu;
144	}
145	}
146	isfile = false;
147	argname.clear();
148	argdata.clear();
149	filename.clear();
150	filedata.clear();
151	filetype.clear();
152	}
153
154	// parse data obtained through a CGI POST request
155	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
156	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
157
158	text_t argstr;
159
160	text_t::iterator content_type_begin = content_type.begin();
161	text_t::iterator content_type_end = content_type.end();
162	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
163	// a simple post request
164
165	return raw_post_data;
166
167	} else {
168	// multipart/form data - may contain one or more uploaded files
169
170	/*
171	content_type should look something like the following
172	multipart/form-data; boundary=---------------------------7d411e1a50330
173
174	while raw_post_data will be as follows
175	-----------------------------7d43e73450330CRLF
176	Content-Disposition: form-data; name="e"<CRLF>
177	<CRLF>
178	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
179	-----------------------------7d43e73450330<CRLF>
180	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
181	Content-Type: application/msword<CRLF>
182	<CRLF>
183	<Content of file><CRLF>
184
185	*/
186
187	// first get the boundary from content-type
188	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
189	if (boundary_begin+9 < content_type_end)
190	{
191	// skip over "boundary=" part of string
192	boundary_begin += 9;
193	}
194	else {
195	// error
196	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
197	return "";
198	}
199	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
200	int boundary_len = boundary.size();
201
202
203	text_t argname, argdata, filename, filedata, filetype;
204	bool isfile = false;
205	text_t::iterator data_here = raw_post_data.begin();
206	text_t::iterator data_end = raw_post_data.end();
207	while (data_here != data_end) {
208
209	// get the next available line (including the trailing <CRLF>
210	text_t line = substr(data_here, getline(data_here, data_end, true));
211
212	data_here += line.size();
213	text_t::iterator line_begin = line.begin();
214	text_t::iterator line_end = line.end();
215	if (findword(line_begin, line_end, boundary) != line_end) {
216	// we've found a boundary
217	process_post_section(argname, argdata, filename, filedata, filetype,
218	isfile, argstr, fileuploads, gsdlhome);
219
220	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
221	// we've found the the beginning of a new section
222	argname.clear();
223	argdata.clear();
224
225	// get the name of this piece of form data
226	text_t::iterator it = findword(line_begin, line_end, "name=\"");
227	if (it == line_end) break; // error - this shouldn't happen
228	it = findchar(it, line_end, '"');
229	if ((it != line_end) && (it+1 != line_end)) {
230	argname = substr(it+1, findchar(it+1, line_end, '"'));
231	}
232
233	// if this piece of form data contains filename="" it's a file
234	// upload and needs to be treated special
235	it = (findword(line_begin, line_end, "filename=\""));
236	if (it != line_end) {
237	// we've found a file upload
238	isfile = true;
239	it = findchar(it, line_end, '"');
240	if ((it != line_end) && (it+1 != line_end)) {
241	filename = substr(it+1, findchar(it+1, line_end, '"'));
242	}
243
244	// the next line is the content-type of this section
245	line = substr(data_here, getline(data_here, data_end, true));
246	data_here += line.size();
247	line_begin = line.begin();
248	line_end = line.end();
249	it = (findword(line_begin, line_end, "Content-Type: "));
250	if (it != line_end) {
251	filetype = substr(it+14, getline(it, line_end, false));
252	}
253	}
254
255	// eat up the next line as it's just a <CRLF> on it's own
256	data_here += 2;
257
258	} else {
259	if (isfile) filedata += line;
260	else argdata += line;
261	}
262
263	}
264
265	// process last section
266	process_post_section(argname, argdata, filename, filedata, filetype,
267	isfile, argstr, fileuploads, gsdlhome);
268
269	return argstr;
270	}
271	}
272
273	// convert %xx and + to their appropriate equivalents
274	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
275	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
276	// actually was, then this returns utf-8, and needs to_uni on the
277	// result to get it back to unicode. If the encoding wasn't utf-8, then the
278	// output may be crap. Seems to work for 8 bit encodings.
279	// Really, this should be given the encoding, and should always return unicode.
280	void decode_cgi_arg (text_t &argstr) {
281	text_t::iterator in = argstr.begin();
282	text_t::iterator out = in;
283	text_t::iterator end = argstr.end();
284
285	while (in != end) {
286	if (in == '+') out = ' ';
287
288	else if (*in == '%') {
289	unsigned short c = '%';
290	++in;
291	if (in != end) { // this is an encoding...
292	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
293	// this assumes a short int is at least 16 bits...
294	++in;
295	if (in != end)
296	c=hexdigit(*in++) << 12;
297	if (in != end)
298	c+=hexdigit(*in++) << 8;
299	if (in != end)
300	c+=hexdigit(*in++) << 4;
301	if (in != end)
302	c+=hexdigit(*in);
303	/* BAD!! The following assumes the interface is using utf-8. But
304	at this point we don't know what encoding we are using, unless
305	we can parse it out of the string we are currently decoding... */
306	text_t uni=" ";
307	uni[0]=c;
308	text_t utf8=to_utf8(uni);
309	int last_byte=utf8.size()-1;
310	for (int i=0;i<last_byte;++i)
311	*out++ = utf8[i];
312	c=utf8[last_byte];
313	} else { // convert %HH to hex value
314	c = hexdigit (*in);
315	++in;
316	if (in != end && c < 16) { // sanity check on the previous character
317	c = c16 + hexdigit (in);
318	}
319	}
320	}
321	*out = c;
322	} else out = in;
323
324	if (in != end) ++in;
325	++out;
326	}
327
328	// remove the excess characters
329	argstr.erase (out, end);
330
331	}
332
333	// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
334	// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
335	// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
336	// apache log and that log file can be included in a local file inclusion (LFI) or
337	// remote file include (RFI) attack.
338	// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
339	// break out of an html/XML/javascript context.
340	void safe_cgi_arg (const text_t &key, text_t &argstr) {
341	if(!do_safe_cgi_args) {
342	return;
343	}
344
345	text_t::iterator in = argstr.begin();
346	text_t out = "";
347	text_t::iterator end = argstr.end();
348
349	while (in != end) {
350	if (*in == '<') out += "%3C";
351	else if (*in == '>') out += "%3E";
352	else if (*in == '&') out += "%26";
353	else if (*in == '\"') out += "%22";
354	else if (*in == '\'') out += "%27";
355	//else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
356	else { // append whatever char is in *in, but as a char, not int
357	//out += *in; // appends as int
358	out.push_back(*in);
359	}
360	++in;
361	}
362
363	argstr.erase (argstr.begin(), end);
364	argstr += out;
365	}
366
367
368	// given a list of characters (or "all") to decode, and given the string, str, where those
369	// characters are to be decoded, this method replaces any occurrences of the url-encoded
370	// variants of those characters with their actual characters in the given string str.
371	void unsafe_cgi_arg(const text_t &chars, text_t &str) {
372	if(!do_safe_cgi_args) {
373	return;
374	}
375
376	text_t allchars = "<>&\"\'/";
377
378	text_t chars_to_decode = (chars == "all" \|\| chars == "ALL") ? allchars : chars;
379
380	text_t::iterator in = chars_to_decode.begin();
381	text_t::iterator end = chars_to_decode.end();
382
383	char hex_char[4];
384
385	// using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
386
387	while (in != end) {
388
389	// *in is a character from the accepted list of chars_to_decode list
390
391	// 1. create the url-encoded value of the char *in in variable hex_char
392	// sprintf adds in a null byte at the end
393	sprintf(hex_char,"%%%02X",*in);
394
395	// 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
396	text_t tmp = "";
397	tmp.push_back(*in);
398
399	// 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
400	str.replace(hex_char, tmp);
401
402	++in;
403	}
404	}
405
406
407	// split up the cgi arguments
408	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
409	cgiargsclass &args) {
410	args.clear();
411
412	text_t::const_iterator here = argstr.begin();
413	text_t::const_iterator end = argstr.end();
414	text_t key, value;
415
416	// extract out the key=value pairs
417	while (here != end) {
418	// get the next key and value pair
419	here = getdelimitstr (here, end, '=', key);
420	here = getdelimitstr (here, end, '&', value);
421
422	// convert %xx and + to their appropriate equivalents
423	decode_cgi_arg (value);
424
425	safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
426
427	value.setencoding(1); // other encoding
428	// store this key=value pair
429	if (!key.empty()) {
430
431	// if arg occurs multiple times (as is the case with multiple
432	// checkboxes using the same name) we'll create a comma separated
433	// list of all the values (this uses a hack that encodes naturally
434	// occurring commas as %2C - values will therefore need to be decoded
435	// again before use) - it should use an array instead
436	const cgiarginfo *info = argsinfo.getarginfo (key);
437	if (info==NULL) {
438	// If info is NULL, we can't tell if the arg is multiple value or not
439	// Because we need to have dynamically named arguments multivalued, we
440	// will always assume multiplevalue = true
441	// If the arg is not multi valued, then you need to decode the commas.
442	if (args.getarg(key)==NULL) {
443	args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
444	}
445	else {
446	text_t newvalue = args[key];
447
448	newvalue += "," + encode_commas(value);
449	newvalue.setencoding(1); // other encoding
450	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
451	}
452	}
453	else {
454	if (info->multiplevalue) {
455
456	text_t newvalue = args[key];
457	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
458	newvalue += encode_commas(value);
459	newvalue.setencoding(1); // other encoding
460	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
461
462	} else {
463	args.setarg (key, value, cgiarg_t::cgi_arg);
464	}
465	}
466	}
467	}
468	}
469
470	text_t encode_commas (const text_t &intext) {
471
472	text_t outtext;
473
474	text_t::const_iterator here = intext.begin ();
475	text_t::const_iterator end = intext.end ();
476
477	while (here != end) {
478	if (*here == ',') outtext += "%2C";
479	else outtext.push_back (*here);
480	++here;
481	}
482	return outtext;
483	}
484
485	text_t decode_commas (const text_t &intext) {
486
487	text_t outtext;
488
489	text_t::const_iterator here = intext.begin ();
490	text_t::const_iterator end = intext.end ();
491
492	// for loop
493	int intext_len = intext.size();
494	for(int i = 0; i < intext_len; i++) {
495	if ((i+2)<intext_len) {
496	if(intext[i] == '%' && intext[i+1] == '2'
497	&& (intext[i+2] == 'C' \|\| intext[i+2] == 'c')) {
498	i += 2;
499	outtext.push_back(',');
500	continue;
501	}
502	}
503	outtext.push_back (intext[i]);
504	}
505	return outtext;
506	}
507
508	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
509	text_t minus_safe (const text_t &intext, bool utf8) {
510
511	text_t outtext;
512
513	text_t::const_iterator here = intext.begin ();
514	text_t::const_iterator end = intext.end ();
515
516	while (here != end) {
517	if (*here == '-') outtext += "Zz-";
518	else outtext.push_back (*here);
519	++here;
520	}
521	if (utf8) {
522	outtext = cgi_safe_utf8 (outtext);
523	} else {
524	outtext = cgi_safe_unicode (outtext);
525	}
526	return outtext;
527	}
528
529	// takes utf-8 input
530	text_t cgi_safe_utf8 (const text_t &intext) {
531	text_t outtext;
532
533	text_t::const_iterator here = intext.begin ();
534	text_t::const_iterator end = intext.end ();
535	unsigned short c;
536	text_t ttmp;
537
538	while (here != end) {
539	c = *here;
540	if (((c >= 'a') && (c <= 'z')) \|\|
541	((c >= 'A') && (c <= 'Z')) \|\|
542	((c >= '0') && (c <= '9')) \|\|
543	(c == '%') \|\| (c == '-')) {
544	// alphanumeric character
545	outtext.push_back(c);
546	} else if (c == ' ') {
547	// space
548	outtext.push_back('+');
549	} else if (c > 255) { // not utf-8 character
550	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
551	} else {
552	// everything else
553	outtext.push_back('%');
554	c2hex(c, ttmp);
555	outtext += ttmp;
556	}
557
558	++here;
559	}
560
561	return outtext;
562	}
563	// takes unicode input
564	text_t cgi_safe_unicode (const text_t &intext) {
565	text_t outtext;
566
567	text_t::const_iterator here = intext.begin ();
568	text_t::const_iterator end = intext.end ();
569	unsigned short c;
570	text_t ttmp;
571
572	while (here != end) {
573	c = *here;
574	if (((c >= 'a') && (c <= 'z')) \|\|
575	((c >= 'A') && (c <= 'Z')) \|\|
576	((c >= '0') && (c <= '9')) \|\|
577	(c == '%') \|\| (c == '-')) {
578	// alphanumeric character
579	outtext.push_back(c);
580	} else if (c == ' ') {
581	// space
582	outtext.push_back('+');
583	} else if (c > 127) { // unicode character
584	unsigned char buf[3]; // up to 3 bytes
585	buf[0]='\0';buf[1]='\0';buf[2]='\0';
586	output_utf8_char(c,buf, buf+2);
587	outtext.push_back('%');
588	c2hex(buf[0], ttmp);
589	outtext += ttmp;
590	outtext.push_back('%');
591	c2hex(buf[1], ttmp);
592	outtext += ttmp;
593	if (buf[2]) {
594	outtext.push_back('%');
595	c2hex(buf[2], ttmp);
596	outtext += ttmp;
597	}
598	} else {
599	// everything else
600	outtext.push_back('%');
601	c2hex(c, ttmp);
602	outtext += ttmp;
603	}
604
605	++here;
606	}
607
608	return outtext;
609	}
610
611
612
613
614	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
615	text_t::const_iterator last,
616	text_t &argname) {
617	first = getdelimitstr (first, last, '-', argname);
618	return first;
619	}
620
621
622	// check_save_conf_str checks the configuration string for
623	// the saved args and makes sure it does not conflict with
624	// the information about the arguments. If an error is encountered
625	// it will return false and the program should not produce any
626	// output.
627	bool check_save_conf_str (const text_t &saveconf,
628	const cgiargsinfoclass &argsinfo,
629	ostream &logout) {
630	outconvertclass text_t2ascii;
631
632	text_tset argsset;
633	text_t::const_iterator saveconfhere = saveconf.begin ();
634	text_t::const_iterator saveconfend = saveconf.end ();
635	text_t argname;
636	const cgiarginfo *info;
637
638	// first check to make sure all saved arguments can be saved
639
640	while (saveconfhere != saveconfend) {
641	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
642
643	if (!argname.empty()) {
644	// save the argument name for later
645	argsset.insert (argname);
646
647	// check the argument
648	info = argsinfo.getarginfo (argname);
649	if (info == NULL) {
650	logout << text_t2ascii << "Error: the cgi argument \"" << argname
651	<< "\" is used in the configuration string for the\n"
652	<< "saved arguments but does not exist as a valid argument.\n\n";
653	return false;
654	}
655	if (info->savedarginfo == cgiarginfo::mustnot) {
656	logout << text_t2ascii << "Error: the cgi argument \"" << argname
657	<< "\" is used in the configuration string for the\n"
658	<< "saved arguments but has been specified as an argument whose\n"
659	<< "state must not be saved.\n\n";
660	return false;
661	}
662	}
663	}
664
665
666	// next check that all saved arguments that should be saved
667	// are saved
668	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
669	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
670
671	while (argsinfohere != argsinfoend) {
672	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
673	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
674	logout << text_t2ascii << "Error: the cgi argument \""
675	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
676	<< "be save but was not listed in the saved arguments.\n\n";
677	return false;
678	}
679
680	++argsinfohere;
681	}
682
683	return true; // made it, no clashes
684	}
685
686
687	// create_save_conf_str will create a configuration string
688	// based on the information in argsinfo. This method of configuration
689	// is not recomended as small changes can produce large changes in
690	// the resulting configuration string (for instance a totally different
691	// ordering). Only arguments which "must" be saved are included in
692	// the resulting string.
693	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
694	ostream &/logout/) {
695	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
696	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
697	text_t saveconf;
698	bool first = true;
699
700	while (argsinfohere != argsinfoend) {
701	// save this argument if it must be saved
702	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
703	if (!first) saveconf.push_back ('-');
704	else first = false;
705	saveconf += (*argsinfohere).second.shortname;
706	}
707
708	++argsinfohere;
709	}
710
711	return saveconf;
712	}
713
714
715	// expand_save_args will expand the saved arguments based
716	// on saveconf placing the results in args if they are not
717	// already defined. If it encounters an error it will return false
718	// and output more information to logout.
719	bool expand_save_args (const cgiargsinfoclass &argsinfo,
720	const text_t &saveconf,
721	cgiargsclass &args,
722	ostream &logout) {
723	outconvertclass text_t2ascii;
724
725	text_t *arg_e = args.getarg("e");
726	if (arg_e == NULL) return true; // no compressed arguments
727	if (arg_e->empty()) return true; // no compressed arguments
728
729	text_t argname, argvalue;
730	const cgiarginfo *argnameinfo;
731
732	text_t::const_iterator saveconfhere = saveconf.begin();
733	text_t::const_iterator saveconfend = saveconf.end();
734
735	text_t::iterator arg_ebegin = arg_e->begin();
736	text_t::iterator arg_eend = arg_e->end();
737	text_t::iterator arg_ehere = arg_ebegin;
738	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
739	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
740
741	if (!argname.empty()) {
742	// found another entry
743	argnameinfo = argsinfo.getarginfo (argname);
744
745	if (argnameinfo == NULL) {
746	// no information about the argument could be found
747	// we can't keep going because we don't know whether
748	// this argument is a single or multiple character value
749	logout << text_t2ascii << "Error: the cgi argument \"" << argname
750	<< "\" was specified as being a compressed argument\n"
751	<< "but no information about it could be found within the "
752	<< "cgiargsinfoclass.\n";
753	return false;
754
755	} else {
756
757	// found the argument information
758	if (argnameinfo->multiplechar) {
759	text_t::const_iterator sav = arg_ehere;
760	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
761	if (distance(arg_ebegin, arg_ehere) > 2) {
762	// replace any '-' chars escaped with 'Zz'
763	bool first = true;
764	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
765	if (first) argvalue.clear();
766
767	// Hey, here's a wild idea. Why don't we check that there is
768	// another hyphen in the cgiarge before we get a pointer to it and
769	// add one. That way we are far less likely to wander off into
770	// random memory merrily parsing arguments that are then lovingly
771	// spewed all over the HTML page returned at the usage logs.
772	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
773	if (minus_itr == arg_eend)
774	{
775	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
776	return false;
777	}
778	arg_ehere = minus_itr + 1;
779
780	while (sav != (arg_ehere-1)) {
781	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
782	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
783	++sav;
784	}
785	first = false;
786	}
787	}
788	argvalue.setencoding(1); // other encoding
789	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
790	} else {
791	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
792	++arg_ehere;
793	}
794	}
795	}
796	}
797
798	return true;
799	}
800
801
802	// adds the default values for those arguments which have not
803	// been specified
804	void add_default_args (const cgiargsinfoclass &argsinfo,
805	cgiargsclass &args,
806	ostream &/logout/) {
807	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
808	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
809
810	while (argsinfohere != argsinfoend) {
811	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
812	args.setdefaultarg ((*argsinfohere).second.shortname,
813	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
814	}
815	++argsinfohere;
816	}
817	}
818
819	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
820	cgiargsclass &args,
821	fileupload_tmap &fileuploads,
822	ostream &logout) {
823
824	const cgiarginfo *info = argsinfo.getarginfo("a");
825	fileupload_tmap::const_iterator this_file = fileuploads.begin();
826	fileupload_tmap::const_iterator end_file = fileuploads.end();
827	while (this_file != end_file) {
828	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
829	if (info != NULL) {
830
831	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
832
833	args.setargfile((this_file).first, (this_file).second);
834	}
835	}
836	this_file++;
837	}
838	}
839
840	// compress_save_args will compress the arguments and return
841	// them in compressed_args. If an error was encountered
842	// compressed_args will be set to to "", an error will be
843	// written to logout, and the function will return false.
844	bool compress_save_args (const cgiargsinfoclass &argsinfo,
845	const text_t &saveconf,
846	cgiargsclass &args,
847	text_t &compressed_args,
848	outconvertclass &outconvert,
849	ostream &logout) {
850	outconvertclass text_t2ascii;
851
852	compressed_args.clear();
853
854	text_t argname, argvalue;
855	const cgiarginfo *argnameinfo;
856
857	text_t::const_iterator saveconfhere = saveconf.begin();
858	text_t::const_iterator saveconfend = saveconf.end();
859
860	while (saveconfhere != saveconfend) {
861	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
862
863	if (!argname.empty()) {
864	// found another entry
865	argnameinfo = argsinfo.getarginfo (argname);
866
867	if (argnameinfo == NULL) {
868	// no information about the argument could be found
869	// we can't keep going because we don't know whether
870	// this argument is a single or multiple character value
871	logout << text_t2ascii << "Error: the cgi argument \"" << argname
872	<< "\" was specified as being a compressed argument\n"
873	<< "but no information about it could be found within the "
874	<< "cgiargsinfoclass.\n";
875	compressed_args.clear();
876	return false;
877
878	} else {
879	// found the argument information
880	if (argnameinfo->multiplechar) {
881	// multiple character argument -- sort out any '-' chars
882	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
883	compressed_args += minus_safe (args[argname], false);
884	else
885	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
886
887	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
888
889	} else {
890	// single character argument
891	if (args[argname].size() == 0) {
892	logout << text_t2ascii << "Error: the cgi argument \"" << argname
893	<< "\" was specified as being a compressed argument which\n"
894	<< "should have a one character value but it was empty.\n\n";
895	compressed_args.clear ();
896	return false;
897
898	} else if (args[argname].size() > 1) {
899	logout << text_t2ascii << "Error: the cgi argument \"" << argname
900	<< "\" was specified as being a compressed argument which\n"
901	<< "should have a one character value but it had multiple characters.\n\n";
902	compressed_args.clear ();
903	return false;
904	}
905
906	// everything is ok
907	compressed_args += args[argname];
908	}
909	}
910	}
911	}
912
913	return true;
914	}
915
916
917	// args_tounicode converts any arguments which are not in unicode
918	// to unicode using inconvert
919	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
920	cgiargsclass::iterator here = args.begin();
921	cgiargsclass::iterator end = args.end();
922
923	while (here != end) {
924	if ((*here).second.value.getencoding() > 0) {
925	// Call reset() before converting each argument, to prevent problems when converting the last
926	// argument left the converter in a bad state
927	inconvert.reset();
928	(here).second.value = inconvert.convert((here).second.value);
929	}
930
931	++here;
932	}
933	}
934
935	// fcgienv will be loaded with environment name-value pairs
936	// if using fastcgi (had to do this as getenv doesn't work
937	// with our implementation of fastcgi). if fcgienv is empty
938	// we'll simply use getenv
939	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
940	if (fcgienv.empty()) {
941	char *n = name.getcstr();
942	char *v = getenv(n);
943	delete []n;
944	if (v != NULL) return v;
945	return g_EmptyText;
946
947	} else return fcgienv[name];
948	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: