Context Navigation

cgiutils.cpp@ 30465

Last change on this file since 30465 was 30465, checked in by kjdon, 8 years ago
fixes for depositor. when getting the post ata and putting it together into form data, need to escape cgi args special characters
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 30.3 KB

Line
1	/**********************************************************************
2	*
3	* cgiutils.cpp -- general cgi utilities
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cgiutils.h"
27	#include "fileutil.h"
28	#include "gsdlunicode.h"
29	#include "fileutil.h"
30	#include "unitool.h" // in mg, for output_utf8_char
31	#include <cstdlib>
32	#include <time.h>
33
34	#if defined(GSDL_USE_OBJECTSPACE)
35	# include <ospace\std\iostream>
36	# include <ospace\std\fstream>
37	#elif defined(GSDL_USE_IOS_H)
38	# include <iostream.h>
39	# include <fstream.h>
40	#else
41	# include <iostream>
42	# include <fstream>
43	#endif
44
45	// set to false to undo security changes (url-encoding arguments)
46	static bool do_safe_cgi_args = false;
47
48	static unsigned short hexdigit (unsigned short c) {
49	if (c >= '0' && c <= '9') return (c-'0');
50	if (c >= 'a' && c <= 'f') return (c-'a'+10);
51	if (c >= 'A' && c <= 'F') return (c-'A'+10);
52	return c;
53	}
54
55
56	static void c2hex (unsigned short c, text_t &t) {
57	t.clear();
58
59	if (c >= 256) {
60	t = "20"; // ' '
61	return;
62	}
63
64	unsigned short o1, o2;
65
66	o1 = (c/16) % 16;
67	o2 = c % 16;
68	if (o1 >= 10) o1 += 'a' - 10;
69	else o1 += '0';
70	if (o2 >= 10) o2 += 'a' - 10;
71	else o2 += '0';
72
73	t.push_back(o1);
74	t.push_back(o2);
75	}
76
77	static text_t::iterator getline (text_t::iterator first,
78	text_t::iterator last,
79	bool include_crlf) {
80	while (first != last) {
81	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
82	// found <CRLF>
83	if (include_crlf) first += 2;
84	break;
85	}
86
87	first++;
88	}
89
90	return first;
91	}
92
93	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
94	text_t &filetype, bool &isfile, text_t &argstr,
95	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
96
97	if (!argname.empty()) {
98
99	if (!isfile) {
100	// argdata includes a trailing <CRLF> that we must remove
101	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
102	argdata.erase(argdata.end()-2, argdata.end());
103	}
104	if (!argstr.empty()) argstr += "&";
105
106	// we need to convert arg to cgi safe variant - escape '&' and '%', '+', '=', turn space to +
107	cgi_safe_post_arg(argdata);
108	argstr += argname + "=" + argdata;
109
110	} else if (!filename.empty()) {
111	// filedata includes a trailing <CRLF> that we must remove
112	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
113	filedata.erase(filedata.end()-2, filedata.end());
114	}
115
116	// create tmp_name for storing the file on disk, using the current timestamp
117	text_t tmp_name(time(NULL));
118	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
119
120	char *tmp_name_c = tmp_name.getcstr();
121
122	// write the file data to disk
123	outconvertclass out;
124	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
125	filestream << out << filedata;
126	filestream.close();
127	delete tmp_name_c;
128
129	// populate the fields of a fileupload_t and put it in the
130	// fileuploads map
131	fileupload_t fu;
132	// note that filename currently may or may not include the path since
133	// some browsers (e.g. IE) include the path while others
134	// (e.g. mozilla) do not. we should probably remove the path from
135	// this field here to get a consistent value across all browsers.
136	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
137	if (slash != filename.end()) {
138	filename = substr(slash+1, filename.end());
139	}
140	fu.name = filename;
141	fu.type = filetype;
142	// size has yet to be implemented
143	fu.size = filedata.size();
144
145	fu.tmp_name = tmp_name;
146	fileuploads[argname] = fu;
147	}
148	}
149	isfile = false;
150	argname.clear();
151	argdata.clear();
152	filename.clear();
153	filedata.clear();
154	filetype.clear();
155	}
156
157	// parse data obtained through a CGI POST request
158	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
159	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
160
161	text_t argstr;
162
163	text_t::iterator content_type_begin = content_type.begin();
164	text_t::iterator content_type_end = content_type.end();
165	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
166	// a simple post request
167	return raw_post_data;
168
169	} else {
170	// multipart/form data - may contain one or more uploaded files
171
172	/*
173	content_type should look something like the following
174	multipart/form-data; boundary=---------------------------7d411e1a50330
175
176	while raw_post_data will be as follows
177	-----------------------------7d43e73450330CRLF
178	Content-Disposition: form-data; name="e"<CRLF>
179	<CRLF>
180	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
181	-----------------------------7d43e73450330<CRLF>
182	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
183	Content-Type: application/msword<CRLF>
184	<CRLF>
185	<Content of file><CRLF>
186
187	*/
188
189	// first get the boundary from content-type
190	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
191	if (boundary_begin+9 < content_type_end)
192	{
193	// skip over "boundary=" part of string
194	boundary_begin += 9;
195	}
196	else {
197	// error
198	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
199	return "";
200	}
201	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
202	int boundary_len = boundary.size();
203
204
205	text_t argname, argdata, filename, filedata, filetype;
206	bool isfile = false;
207	text_t::iterator data_here = raw_post_data.begin();
208	text_t::iterator data_end = raw_post_data.end();
209	while (data_here != data_end) {
210
211	// get the next available line (including the trailing <CRLF>
212	text_t line = substr(data_here, getline(data_here, data_end, true));
213
214	data_here += line.size();
215	text_t::iterator line_begin = line.begin();
216	text_t::iterator line_end = line.end();
217	if (findword(line_begin, line_end, boundary) != line_end) {
218	// we've found a boundary
219	process_post_section(argname, argdata, filename, filedata, filetype,
220	isfile, argstr, fileuploads, gsdlhome);
221
222	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
223	// we've found the the beginning of a new section
224	argname.clear();
225	argdata.clear();
226
227	// get the name of this piece of form data
228	text_t::iterator it = findword(line_begin, line_end, "name=\"");
229	if (it == line_end) break; // error - this shouldn't happen
230	it = findchar(it, line_end, '"');
231	if ((it != line_end) && (it+1 != line_end)) {
232	argname = substr(it+1, findchar(it+1, line_end, '"'));
233	}
234
235	// if this piece of form data contains filename="" it's a file
236	// upload and needs to be treated special
237	it = (findword(line_begin, line_end, "filename=\""));
238	if (it != line_end) {
239	// we've found a file upload
240	isfile = true;
241	it = findchar(it, line_end, '"');
242	if ((it != line_end) && (it+1 != line_end)) {
243	filename = substr(it+1, findchar(it+1, line_end, '"'));
244	}
245
246	// the next line is the content-type of this section
247	line = substr(data_here, getline(data_here, data_end, true));
248	data_here += line.size();
249	line_begin = line.begin();
250	line_end = line.end();
251	it = (findword(line_begin, line_end, "Content-Type: "));
252	if (it != line_end) {
253	filetype = substr(it+14, getline(it, line_end, false));
254	}
255	}
256
257	// eat up the next line as it's just a <CRLF> on it's own
258	data_here += 2;
259
260	} else {
261	if (isfile) filedata += line;
262	else argdata += line;
263	}
264
265	}
266
267	// process last section
268	process_post_section(argname, argdata, filename, filedata, filetype,
269	isfile, argstr, fileuploads, gsdlhome);
270
271	return argstr;
272	}
273	}
274
275	// convert %xx and + to their appropriate equivalents
276	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
277	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
278	// actually was, then this returns utf-8, and needs to_uni on the
279	// result to get it back to unicode. If the encoding wasn't utf-8, then the
280	// output may be crap. Seems to work for 8 bit encodings.
281	// Really, this should be given the encoding, and should always return unicode.
282	void decode_cgi_arg (text_t &argstr) {
283	text_t::iterator in = argstr.begin();
284	text_t::iterator out = in;
285	text_t::iterator end = argstr.end();
286
287	while (in != end) {
288	if (in == '+') out = ' ';
289
290	else if (*in == '%') {
291	unsigned short c = '%';
292	++in;
293	if (in != end) { // this is an encoding...
294	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
295	// this assumes a short int is at least 16 bits...
296	++in;
297	if (in != end)
298	c=hexdigit(*in++) << 12;
299	if (in != end)
300	c+=hexdigit(*in++) << 8;
301	if (in != end)
302	c+=hexdigit(*in++) << 4;
303	if (in != end)
304	c+=hexdigit(*in);
305	/* BAD!! The following assumes the interface is using utf-8. But
306	at this point we don't know what encoding we are using, unless
307	we can parse it out of the string we are currently decoding... */
308	text_t uni=" ";
309	uni[0]=c;
310	text_t utf8=to_utf8(uni);
311	int last_byte=utf8.size()-1;
312	for (int i=0;i<last_byte;++i)
313	*out++ = utf8[i];
314	c=utf8[last_byte];
315	} else { // convert %HH to hex value
316	c = hexdigit (*in);
317	++in;
318	if (in != end && c < 16) { // sanity check on the previous character
319	c = c16 + hexdigit (in);
320	}
321	}
322	}
323	*out = c;
324	} else out = in;
325
326	if (in != end) ++in;
327	++out;
328	}
329
330	// remove the excess characters
331	argstr.erase (out, end);
332
333	}
334
335	//Need to escape special chars in post data so they don't interfere with arg parsing once its a get style string
336	void cgi_safe_post_arg(text_t &argstr) {
337
338	text_t::iterator in = argstr.begin();
339	text_t out = "";
340	text_t::iterator end = argstr.end();
341
342	while (in != end) {
343	if (*in == '&') out += "%26";
344	else if (*in == '%') out += "%2525";
345	else if (*in == '+') out += "%2B";
346	else if (*in == '=') out += "%3D";
347	else if (*in == ' ') out += "+";
348	else { // append whatever char is in *in, but as a char, not int
349	//out += *in; // appends as int
350	out.push_back(*in);
351	}
352	++in;
353	}
354
355	argstr.erase (argstr.begin(), end);
356	argstr += out;
357	}
358
359
360
361	// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
362	// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
363	// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
364	// apache log and that log file can be included in a local file inclusion (LFI) or
365	// remote file include (RFI) attack.
366	// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
367	// break out of an html/XML/javascript context.
368	void safe_cgi_arg (const text_t &key, text_t &argstr) {
369	if(!do_safe_cgi_args) {
370	return;
371	}
372
373	text_t::iterator in = argstr.begin();
374	text_t out = "";
375	text_t::iterator end = argstr.end();
376
377	while (in != end) {
378	if (*in == '<') out += "%3C";
379	else if (*in == '>') out += "%3E";
380	else if (*in == '&') out += "%26";
381	else if (*in == '\"') out += "%22";
382	else if (*in == '\'') out += "%27";
383	//else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
384	else { // append whatever char is in *in, but as a char, not int
385	//out += *in; // appends as int
386	out.push_back(*in);
387	}
388	++in;
389	}
390
391	argstr.erase (argstr.begin(), end);
392	argstr += out;
393	}
394
395
396	// given a list of characters (or "all") to decode, and given the string, str, where those
397	// characters are to be decoded, this method replaces any occurrences of the url-encoded
398	// variants of those characters with their actual characters in the given string str.
399	void unsafe_cgi_arg(const text_t &chars, text_t &str) {
400	if(!do_safe_cgi_args) {
401	return;
402	}
403
404	text_t allchars = "<>&\"\'/";
405
406	text_t chars_to_decode = (chars == "all" \|\| chars == "ALL") ? allchars : chars;
407
408	text_t::iterator in = chars_to_decode.begin();
409	text_t::iterator end = chars_to_decode.end();
410
411	char hex_char[4];
412
413	// using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
414
415	while (in != end) {
416
417	// *in is a character from the accepted list of chars_to_decode list
418
419	// 1. create the url-encoded value of the char *in in variable hex_char
420	// sprintf adds in a null byte at the end
421	sprintf(hex_char,"%%%02X",*in);
422
423	// 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
424	text_t tmp = "";
425	tmp.push_back(*in);
426
427	// 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
428	str.replace(hex_char, tmp);
429
430	++in;
431	}
432	}
433
434
435	// split up the cgi arguments
436	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
437	cgiargsclass &args) {
438	args.clear();
439
440	text_t::const_iterator here = argstr.begin();
441	text_t::const_iterator end = argstr.end();
442	// get seems to be not unicode, while post is, so don't want to just assume encoding is 1 (not unicode)
443	unsigned short args_encoding = argstr.getencoding();
444
445	text_t key, value;
446
447	// extract out the key=value pairs
448	while (here != end) {
449	// get the next key and value pair
450	here = getdelimitstr (here, end, '=', key);
451	here = getdelimitstr (here, end, '&', value);
452
453	// convert %xx and + to their appropriate equivalents
454	decode_cgi_arg (value);
455
456	safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
457
458	value.setencoding(args_encoding); //1 // other encoding
459	// store this key=value pair
460	if (!key.empty()) {
461
462	// if arg occurs multiple times (as is the case with multiple
463	// checkboxes using the same name) we'll create a comma separated
464	// list of all the values (this uses a hack that encodes naturally
465	// occurring commas as %2C - values will therefore need to be decoded
466	// again before use) - it should use an array instead
467	const cgiarginfo *info = argsinfo.getarginfo (key);
468	if (info==NULL) {
469	// If info is NULL, we can't tell if the arg is multiple value or not
470	// Because we need to have dynamically named arguments multivalued, we
471	// will always assume multiplevalue = true
472	// If the arg is not multi valued, then you need to decode the commas.
473	if (args.getarg(key)==NULL) {
474	// encode_commas returns a text_t without encoding bit set
475	text_t newvalue = encode_commas(value);
476	newvalue.setencoding(args_encoding);
477	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
478	}
479	else {
480	text_t newvalue = args[key];
481
482	newvalue += "," + encode_commas(value);
483	newvalue.setencoding(args_encoding); // other encoding
484	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
485	}
486	}
487	else {
488	if (info->multiplevalue) {
489
490	text_t newvalue = args[key];
491	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
492	newvalue += encode_commas(value);
493	newvalue.setencoding(args_encoding); // other encoding
494	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
495
496	} else {
497	args.setarg (key, value, cgiarg_t::cgi_arg);
498	}
499	}
500	}
501	}
502	}
503
504	text_t encode_commas (const text_t &intext) {
505
506	text_t outtext;
507
508	text_t::const_iterator here = intext.begin ();
509	text_t::const_iterator end = intext.end ();
510
511	while (here != end) {
512	if (*here == ',') outtext += "%2C";
513	else outtext.push_back (*here);
514	++here;
515	}
516	return outtext;
517	}
518
519	text_t decode_commas (const text_t &intext) {
520
521	text_t outtext;
522
523	text_t::const_iterator here = intext.begin ();
524	text_t::const_iterator end = intext.end ();
525
526	// for loop
527	int intext_len = intext.size();
528	for(int i = 0; i < intext_len; i++) {
529	if ((i+2)<intext_len) {
530	if(intext[i] == '%' && intext[i+1] == '2'
531	&& (intext[i+2] == 'C' \|\| intext[i+2] == 'c')) {
532	i += 2;
533	outtext.push_back(',');
534	continue;
535	}
536	}
537	outtext.push_back (intext[i]);
538	}
539	return outtext;
540	}
541
542	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
543	text_t minus_safe (const text_t &intext, bool utf8) {
544
545	text_t outtext;
546
547	text_t::const_iterator here = intext.begin ();
548	text_t::const_iterator end = intext.end ();
549
550	while (here != end) {
551	if (*here == '-') outtext += "Zz-";
552	else outtext.push_back (*here);
553	++here;
554	}
555	if (utf8) {
556	outtext = cgi_safe_utf8 (outtext);
557	} else {
558	outtext = cgi_safe_unicode (outtext);
559	}
560	return outtext;
561	}
562
563	// takes utf-8 input
564	text_t cgi_safe_utf8 (const text_t &intext) {
565	text_t outtext;
566
567	text_t::const_iterator here = intext.begin ();
568	text_t::const_iterator end = intext.end ();
569	unsigned short c;
570	text_t ttmp;
571
572	while (here != end) {
573	c = *here;
574	if (((c >= 'a') && (c <= 'z')) \|\|
575	((c >= 'A') && (c <= 'Z')) \|\|
576	((c >= '0') && (c <= '9')) \|\|
577	(c == '%') \|\| (c == '-')) {
578	// alphanumeric character
579	outtext.push_back(c);
580	} else if (c == ' ') {
581	// space
582	outtext.push_back('+');
583	} else if (c > 255) { // not utf-8 character
584	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
585	} else {
586	// everything else
587	outtext.push_back('%');
588	c2hex(c, ttmp);
589	outtext += ttmp;
590	}
591
592	++here;
593	}
594
595	return outtext;
596	}
597	// takes unicode input
598	text_t cgi_safe_unicode (const text_t &intext) {
599	text_t outtext;
600
601	text_t::const_iterator here = intext.begin ();
602	text_t::const_iterator end = intext.end ();
603	unsigned short c;
604	text_t ttmp;
605
606	while (here != end) {
607	c = *here;
608	if (((c >= 'a') && (c <= 'z')) \|\|
609	((c >= 'A') && (c <= 'Z')) \|\|
610	((c >= '0') && (c <= '9')) \|\|
611	(c == '%') \|\| (c == '-')) {
612	// alphanumeric character
613	outtext.push_back(c);
614	} else if (c == ' ') {
615	// space
616	outtext.push_back('+');
617	} else if (c > 127) { // unicode character
618	unsigned char buf[3]; // up to 3 bytes
619	buf[0]='\0';buf[1]='\0';buf[2]='\0';
620	output_utf8_char(c,buf, buf+2);
621	outtext.push_back('%');
622	c2hex(buf[0], ttmp);
623	outtext += ttmp;
624	outtext.push_back('%');
625	c2hex(buf[1], ttmp);
626	outtext += ttmp;
627	if (buf[2]) {
628	outtext.push_back('%');
629	c2hex(buf[2], ttmp);
630	outtext += ttmp;
631	}
632	} else {
633	// everything else
634	outtext.push_back('%');
635	c2hex(c, ttmp);
636	outtext += ttmp;
637	}
638
639	++here;
640	}
641
642	return outtext;
643	}
644
645
646
647
648	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
649	text_t::const_iterator last,
650	text_t &argname) {
651	first = getdelimitstr (first, last, '-', argname);
652	return first;
653	}
654
655
656	// check_save_conf_str checks the configuration string for
657	// the saved args and makes sure it does not conflict with
658	// the information about the arguments. If an error is encountered
659	// it will return false and the program should not produce any
660	// output.
661	bool check_save_conf_str (const text_t &saveconf,
662	const cgiargsinfoclass &argsinfo,
663	ostream &logout) {
664	outconvertclass text_t2ascii;
665
666	text_tset argsset;
667	text_t::const_iterator saveconfhere = saveconf.begin ();
668	text_t::const_iterator saveconfend = saveconf.end ();
669	text_t argname;
670	const cgiarginfo *info;
671
672	// first check to make sure all saved arguments can be saved
673
674	while (saveconfhere != saveconfend) {
675	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
676
677	if (!argname.empty()) {
678	// save the argument name for later
679	argsset.insert (argname);
680
681	// check the argument
682	info = argsinfo.getarginfo (argname);
683	if (info == NULL) {
684	logout << text_t2ascii << "Error: the cgi argument \"" << argname
685	<< "\" is used in the configuration string for the\n"
686	<< "saved arguments but does not exist as a valid argument.\n\n";
687	return false;
688	}
689	if (info->savedarginfo == cgiarginfo::mustnot) {
690	logout << text_t2ascii << "Error: the cgi argument \"" << argname
691	<< "\" is used in the configuration string for the\n"
692	<< "saved arguments but has been specified as an argument whose\n"
693	<< "state must not be saved.\n\n";
694	return false;
695	}
696	}
697	}
698
699
700	// next check that all saved arguments that should be saved
701	// are saved
702	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
703	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
704
705	while (argsinfohere != argsinfoend) {
706	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
707	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
708	logout << text_t2ascii << "Error: the cgi argument \""
709	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
710	<< "be save but was not listed in the saved arguments.\n\n";
711	return false;
712	}
713
714	++argsinfohere;
715	}
716
717	return true; // made it, no clashes
718	}
719
720
721	// create_save_conf_str will create a configuration string
722	// based on the information in argsinfo. This method of configuration
723	// is not recomended as small changes can produce large changes in
724	// the resulting configuration string (for instance a totally different
725	// ordering). Only arguments which "must" be saved are included in
726	// the resulting string.
727	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
728	ostream &/logout/) {
729	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
730	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
731	text_t saveconf;
732	bool first = true;
733
734	while (argsinfohere != argsinfoend) {
735	// save this argument if it must be saved
736	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
737	if (!first) saveconf.push_back ('-');
738	else first = false;
739	saveconf += (*argsinfohere).second.shortname;
740	}
741
742	++argsinfohere;
743	}
744
745	return saveconf;
746	}
747
748
749	// expand_save_args will expand the saved arguments based
750	// on saveconf placing the results in args if they are not
751	// already defined. If it encounters an error it will return false
752	// and output more information to logout.
753	bool expand_save_args (const cgiargsinfoclass &argsinfo,
754	const text_t &saveconf,
755	cgiargsclass &args,
756	ostream &logout) {
757	outconvertclass text_t2ascii;
758
759	text_t *arg_e = args.getarg("e");
760	if (arg_e == NULL) return true; // no compressed arguments
761	if (arg_e->empty()) return true; // no compressed arguments
762
763	text_t argname, argvalue;
764	const cgiarginfo *argnameinfo;
765
766	text_t::const_iterator saveconfhere = saveconf.begin();
767	text_t::const_iterator saveconfend = saveconf.end();
768
769	text_t::iterator arg_ebegin = arg_e->begin();
770	text_t::iterator arg_eend = arg_e->end();
771	text_t::iterator arg_ehere = arg_ebegin;
772	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
773	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
774
775	if (!argname.empty()) {
776	// found another entry
777	argnameinfo = argsinfo.getarginfo (argname);
778
779	if (argnameinfo == NULL) {
780	// no information about the argument could be found
781	// we can't keep going because we don't know whether
782	// this argument is a single or multiple character value
783	logout << text_t2ascii << "Error: the cgi argument \"" << argname
784	<< "\" was specified as being a compressed argument\n"
785	<< "but no information about it could be found within the "
786	<< "cgiargsinfoclass.\n";
787	return false;
788
789	} else {
790
791	// found the argument information
792	if (argnameinfo->multiplechar) {
793	text_t::const_iterator sav = arg_ehere;
794	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
795	if (distance(arg_ebegin, arg_ehere) > 2) {
796	// replace any '-' chars escaped with 'Zz'
797	bool first = true;
798	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
799	if (first) argvalue.clear();
800
801	// Hey, here's a wild idea. Why don't we check that there is
802	// another hyphen in the cgiarge before we get a pointer to it and
803	// add one. That way we are far less likely to wander off into
804	// random memory merrily parsing arguments that are then lovingly
805	// spewed all over the HTML page returned at the usage logs.
806	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
807	if (minus_itr == arg_eend)
808	{
809	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
810	return false;
811	}
812	arg_ehere = minus_itr + 1;
813
814	while (sav != (arg_ehere-1)) {
815	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
816	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
817	++sav;
818	}
819	first = false;
820	}
821	}
822	argvalue.setencoding(1); // other encoding
823	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
824	} else {
825	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
826	++arg_ehere;
827	}
828	}
829	}
830	}
831
832	return true;
833	}
834
835
836	// adds the default values for those arguments which have not
837	// been specified
838	void add_default_args (const cgiargsinfoclass &argsinfo,
839	cgiargsclass &args,
840	ostream &/logout/) {
841	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
842	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
843
844	while (argsinfohere != argsinfoend) {
845	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
846	args.setdefaultarg ((*argsinfohere).second.shortname,
847	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
848	}
849	++argsinfohere;
850	}
851	}
852
853	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
854	cgiargsclass &args,
855	fileupload_tmap &fileuploads,
856	ostream &logout) {
857
858	const cgiarginfo *info = argsinfo.getarginfo("a");
859	fileupload_tmap::const_iterator this_file = fileuploads.begin();
860	fileupload_tmap::const_iterator end_file = fileuploads.end();
861	while (this_file != end_file) {
862	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
863	if (info != NULL) {
864
865	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
866
867	args.setargfile((this_file).first, (this_file).second);
868	}
869	}
870	this_file++;
871	}
872	}
873
874	// compress_save_args will compress the arguments and return
875	// them in compressed_args. If an error was encountered
876	// compressed_args will be set to to "", an error will be
877	// written to logout, and the function will return false.
878	bool compress_save_args (const cgiargsinfoclass &argsinfo,
879	const text_t &saveconf,
880	cgiargsclass &args,
881	text_t &compressed_args,
882	outconvertclass &outconvert,
883	ostream &logout) {
884	outconvertclass text_t2ascii;
885
886	compressed_args.clear();
887
888	text_t argname, argvalue;
889	const cgiarginfo *argnameinfo;
890
891	text_t::const_iterator saveconfhere = saveconf.begin();
892	text_t::const_iterator saveconfend = saveconf.end();
893
894	while (saveconfhere != saveconfend) {
895	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
896
897	if (!argname.empty()) {
898	// found another entry
899	argnameinfo = argsinfo.getarginfo (argname);
900
901	if (argnameinfo == NULL) {
902	// no information about the argument could be found
903	// we can't keep going because we don't know whether
904	// this argument is a single or multiple character value
905	logout << text_t2ascii << "Error: the cgi argument \"" << argname
906	<< "\" was specified as being a compressed argument\n"
907	<< "but no information about it could be found within the "
908	<< "cgiargsinfoclass.\n";
909	compressed_args.clear();
910	return false;
911
912	} else {
913	// found the argument information
914	if (argnameinfo->multiplechar) {
915	// multiple character argument -- sort out any '-' chars
916	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
917	compressed_args += minus_safe (args[argname], false);
918	else
919	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
920
921	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
922
923	} else {
924	// single character argument
925	if (args[argname].size() == 0) {
926	logout << text_t2ascii << "Error: the cgi argument \"" << argname
927	<< "\" was specified as being a compressed argument which\n"
928	<< "should have a one character value but it was empty.\n\n";
929	compressed_args.clear ();
930	return false;
931
932	} else if (args[argname].size() > 1) {
933	logout << text_t2ascii << "Error: the cgi argument \"" << argname
934	<< "\" was specified as being a compressed argument which\n"
935	<< "should have a one character value but it had multiple characters.\n\n";
936	compressed_args.clear ();
937	return false;
938	}
939
940	// everything is ok
941	compressed_args += args[argname];
942	}
943	}
944	}
945	}
946
947	return true;
948	}
949
950
951	// args_tounicode converts any arguments which are not in unicode
952	// to unicode using inconvert
953	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
954	cgiargsclass::iterator here = args.begin();
955	cgiargsclass::iterator end = args.end();
956
957	while (here != end) {
958	if ((*here).second.value.getencoding() > 0) {
959	// Call reset() before converting each argument, to prevent problems when converting the last
960	// argument left the converter in a bad state
961	inconvert.reset();
962	(here).second.value = inconvert.convert((here).second.value);
963	}
964
965	++here;
966	}
967	}
968
969	// fcgienv will be loaded with environment name-value pairs
970	// if using fastcgi (had to do this as getenv doesn't work
971	// with our implementation of fastcgi). if fcgienv is empty
972	// we'll simply use getenv
973	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
974	if (fcgienv.empty()) {
975	char *n = name.getcstr();
976	char *v = getenv(n);
977	delete []n;
978	if (v != NULL) return v;
979	return g_EmptyText;
980
981	} else return fcgienv[name];
982	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 30465

Download in other formats: