Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 22044

Last change on this file since 22044 was 21997, checked in by mdewsnip, 14 years ago
Fixed nasty problem where random memory can be iterated through when "e" variables are badly formed. By John Thompson at DL Consulting Ltd.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 26.3 KB

Line
1	/**********************************************************************
2	*
3	* cgiutils.cpp -- general cgi utilities
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "cgiutils.h"
27	#include "md5.h"
28	#include "fileutil.h"
29	#include "gsdlunicode.h"
30	#include "fileutil.h"
31	#include "unitool.h" // in mg, for output_utf8_char
32	#include <cstdlib>
33
34	#if defined(GSDL_USE_OBJECTSPACE)
35	# include <ospace\std\iostream>
36	# include <ospace\std\fstream>
37	#elif defined(GSDL_USE_IOS_H)
38	# include <iostream.h>
39	# include <fstream.h>
40	#else
41	# include <iostream>
42	# include <fstream>
43	#endif
44
45
46	static unsigned short hexdigit (unsigned short c) {
47	if (c >= '0' && c <= '9') return (c-'0');
48	if (c >= 'a' && c <= 'f') return (c-'a'+10);
49	if (c >= 'A' && c <= 'F') return (c-'A'+10);
50	return c;
51	}
52
53
54	static void c2hex (unsigned short c, text_t &t) {
55	t.clear();
56
57	if (c >= 256) {
58	t = "20"; // ' '
59	return;
60	}
61
62	unsigned short o1, o2;
63
64	o1 = (c/16) % 16;
65	o2 = c % 16;
66	if (o1 >= 10) o1 += 'a' - 10;
67	else o1 += '0';
68	if (o2 >= 10) o2 += 'a' - 10;
69	else o2 += '0';
70
71	t.push_back(o1);
72	t.push_back(o2);
73	}
74
75	static text_t::iterator getline (text_t::iterator first,
76	text_t::iterator last,
77	bool include_crlf) {
78	while (first != last) {
79	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
80	// found <CRLF>
81	if (include_crlf) first += 2;
82	break;
83	}
84
85	first++;
86	}
87
88	return first;
89	}
90
91	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92	text_t &filetype, bool &isfile, text_t &argstr,
93	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95	if (!argname.empty()) {
96
97	if (!isfile) {
98	// argdata includes a trailing <CRLF> that we must remove
99	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
100	argdata.erase(argdata.end()-2, argdata.end());
101	}
102	if (!argstr.empty()) argstr += "&";
103	argstr += argname + "=" + argdata;
104
105	} else if (!filename.empty()) {
106	// filedata includes a trailing <CRLF> that we must remove
107	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
108	filedata.erase(filedata.end()-2, filedata.end());
109	}
110
111	// create tmp_name for storing the file on disk
112	text_t tmp_name = md5data(filedata);
113	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
114
115	char *tmp_name_c = tmp_name.getcstr();
116
117	// write the file data to disk
118	outconvertclass out;
119	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
120	filestream << out << filedata;
121	filestream.close();
122	delete tmp_name_c;
123
124	// populate the fields of a fileupload_t and put it in the
125	// fileuploads map
126	fileupload_t fu;
127	// note that filename currently may or may not include the path since
128	// some browsers (e.g. IE) include the path while others
129	// (e.g. mozilla) do not. we should probably remove the path from
130	// this field here to get a consistent value across all browsers.
131	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132	if (slash != filename.end()) {
133	filename = substr(slash+1, filename.end());
134	}
135	fu.name = filename;
136	fu.type = filetype;
137	// size has yet to be implemented
138	fu.size = filedata.size();
139
140	fu.tmp_name = tmp_name;
141	fileuploads[argname] = fu;
142	}
143	}
144	isfile = false;
145	argname.clear();
146	argdata.clear();
147	filename.clear();
148	filedata.clear();
149	filetype.clear();
150	}
151
152	// parse data obtained through a CGI POST request
153	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156	text_t argstr;
157
158	text_t::iterator content_type_begin = content_type.begin();
159	text_t::iterator content_type_end = content_type.end();
160	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161	// a simple post request
162
163	return raw_post_data;
164
165	} else {
166	// multipart/form data - may contain one or more uploaded files
167
168	/*
169	content_type should look something like the following
170	multipart/form-data; boundary=---------------------------7d411e1a50330
171
172	while raw_post_data will be as follows
173	-----------------------------7d43e73450330CRLF
174	Content-Disposition: form-data; name="e"<CRLF>
175	<CRLF>
176	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177	-----------------------------7d43e73450330<CRLF>
178	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179	Content-Type: application/msword<CRLF>
180	<CRLF>
181	<Content of file><CRLF>
182
183	*/
184
185	// first get the boundary from content-type
186	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
187	if (boundary_begin+9 < content_type_end)
188	{
189	// skip over "boundary=" part of string
190	boundary_begin += 9;
191	}
192	else {
193	// error
194	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
195	return "";
196	}
197	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198	int boundary_len = boundary.size();
199
200
201	text_t argname, argdata, filename, filedata, filetype;
202	bool isfile = false;
203	text_t::iterator data_here = raw_post_data.begin();
204	text_t::iterator data_end = raw_post_data.end();
205	while (data_here != data_end) {
206
207	// get the next available line (including the trailing <CRLF>
208	text_t line = substr(data_here, getline(data_here, data_end, true));
209
210	data_here += line.size();
211	text_t::iterator line_begin = line.begin();
212	text_t::iterator line_end = line.end();
213	if (findword(line_begin, line_end, boundary) != line_end) {
214	// we've found a boundary
215	process_post_section(argname, argdata, filename, filedata, filetype,
216	isfile, argstr, fileuploads, gsdlhome);
217
218	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219	// we've found the the beginning of a new section
220	argname.clear();
221	argdata.clear();
222
223	// get the name of this piece of form data
224	text_t::iterator it = findword(line_begin, line_end, "name=\"");
225	if (it == line_end) break; // error - this shouldn't happen
226	it = findchar(it, line_end, '"');
227	if ((it != line_end) && (it+1 != line_end)) {
228	argname = substr(it+1, findchar(it+1, line_end, '"'));
229	}
230
231	// if this piece of form data contains filename="" it's a file
232	// upload and needs to be treated special
233	it = (findword(line_begin, line_end, "filename=\""));
234	if (it != line_end) {
235	// we've found a file upload
236	isfile = true;
237	it = findchar(it, line_end, '"');
238	if ((it != line_end) && (it+1 != line_end)) {
239	filename = substr(it+1, findchar(it+1, line_end, '"'));
240	}
241
242	// the next line is the content-type of this section
243	line = substr(data_here, getline(data_here, data_end, true));
244	data_here += line.size();
245	line_begin = line.begin();
246	line_end = line.end();
247	it = (findword(line_begin, line_end, "Content-Type: "));
248	if (it != line_end) {
249	filetype = substr(it+14, getline(it, line_end, false));
250	}
251	}
252
253	// eat up the next line as it's just a <CRLF> on it's own
254	data_here += 2;
255
256	} else {
257	if (isfile) filedata += line;
258	else argdata += line;
259	}
260
261	}
262
263	// process last section
264	process_post_section(argname, argdata, filename, filedata, filetype,
265	isfile, argstr, fileuploads, gsdlhome);
266
267	return argstr;
268	}
269	}
270
271	// convert %xx and + to their appropriate equivalents
272	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
273	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274	// actually was, then this returns utf-8, and needs to_uni on the
275	// result to get it back to unicode. If the encoding wasn't utf-8, then the
276	// output may be crap. Seems to work for 8 bit encodings.
277	// Really, this should be given the encoding, and should always return unicode.
278	void decode_cgi_arg (text_t &argstr) {
279	text_t::iterator in = argstr.begin();
280	text_t::iterator out = in;
281	text_t::iterator end = argstr.end();
282
283	while (in != end) {
284	if (in == '+') out = ' ';
285
286	else if (*in == '%') {
287	unsigned short c = '%';
288	++in;
289	if (in != end) { // this is an encoding...
290	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291	// this assumes a short int is at least 16 bits...
292	++in;
293	if (in != end)
294	c=hexdigit(*in++) << 12;
295	if (in != end)
296	c+=hexdigit(*in++) << 8;
297	if (in != end)
298	c+=hexdigit(*in++) << 4;
299	if (in != end)
300	c+=hexdigit(*in);
301	/* BAD!! The following assumes the interface is using utf-8. But
302	at this point we don't know what encoding we are using, unless
303	we can parse it out of the string we are currently decoding... */
304	text_t uni=" ";
305	uni[0]=c;
306	text_t utf8=to_utf8(uni);
307	int last_byte=utf8.size()-1;
308	for (int i=0;i<last_byte;++i)
309	*out++ = utf8[i];
310	c=utf8[last_byte];
311	} else { // convert %HH to hex value
312	c = hexdigit (*in);
313	++in;
314	if (in != end && c < 16) { // sanity check on the previous character
315	c = c16 + hexdigit (in);
316	}
317	}
318	}
319	*out = c;
320	} else out = in;
321
322	if (in != end) ++in;
323	++out;
324	}
325
326	// remove the excess characters
327	argstr.erase (out, end);
328
329	}
330
331
332	// split up the cgi arguments
333	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
334	cgiargsclass &args) {
335	args.clear();
336
337	text_t::const_iterator here = argstr.begin();
338	text_t::const_iterator end = argstr.end();
339	text_t key, value;
340
341	// extract out the key=value pairs
342	while (here != end) {
343	// get the next key and value pair
344	here = getdelimitstr (here, end, '=', key);
345	here = getdelimitstr (here, end, '&', value);
346
347	// convert %xx and + to their appropriate equivalents
348	decode_cgi_arg (value);
349	value.setencoding(1); // other encoding
350	// store this key=value pair
351	if (!key.empty()) {
352
353	// if arg occurs multiple times (as is the case with multiple
354	// checkboxes using the same name) we'll create a comma separated
355	// list of all the values (this uses a hack that encodes naturally
356	// occurring commas as %2C - values will therefore need to be decoded
357	// again before use) - it should use an array instead
358	const cgiarginfo *info = argsinfo.getarginfo (key);
359	if (info==NULL) {
360	// If info is NULL, we can't tell if the arg is multiple value or not
361	// Because we need to have dynamically named arguments multivalued, we
362	// will always assume multiplevalue = true
363	// If the arg is not multi valued, then you need to decode the commas.
364	if (args.getarg(key)==NULL) {
365	args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
366	}
367	else {
368	text_t newvalue = args[key];
369
370	newvalue += "," + encode_commas(value);
371	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
372	}
373	}
374	else {
375	if (info->multiplevalue) {
376
377	text_t newvalue = args[key];
378	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
379	newvalue += encode_commas(value);
380	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
381
382	} else {
383	args.setarg (key, value, cgiarg_t::cgi_arg);
384	}
385	}
386	}
387	}
388	}
389
390	text_t encode_commas (const text_t &intext) {
391
392	text_t outtext;
393
394	text_t::const_iterator here = intext.begin ();
395	text_t::const_iterator end = intext.end ();
396
397	while (here != end) {
398	if (*here == ',') outtext += "%2C";
399	else outtext.push_back (*here);
400	++here;
401	}
402	return outtext;
403	}
404
405	text_t decode_commas (const text_t &intext) {
406
407	text_t outtext;
408
409	text_t::const_iterator here = intext.begin ();
410	text_t::const_iterator end = intext.end ();
411
412	while (here != end) {
413	if ((here+2<end) && here == '%' && (here+1) == '2' &&
414	((here+2) == 'C' \|\| (here+2) == 'c')) {
415	here += 2;
416	outtext.push_back(',');
417
418	}else outtext.push_back (*here);
419	++here;
420	}
421	return outtext;
422	}
423
424	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
425	text_t minus_safe (const text_t &intext, bool utf8) {
426
427	text_t outtext;
428
429	text_t::const_iterator here = intext.begin ();
430	text_t::const_iterator end = intext.end ();
431
432	while (here != end) {
433	if (*here == '-') outtext += "Zz-";
434	else outtext.push_back (*here);
435	++here;
436	}
437	if (utf8) {
438	outtext = cgi_safe_utf8 (outtext);
439	} else {
440	outtext = cgi_safe_unicode (outtext);
441	}
442	return outtext;
443	}
444
445	// takes utf-8 input
446	text_t cgi_safe_utf8 (const text_t &intext) {
447	text_t outtext;
448
449	text_t::const_iterator here = intext.begin ();
450	text_t::const_iterator end = intext.end ();
451	unsigned short c;
452	text_t ttmp;
453
454	while (here != end) {
455	c = *here;
456	if (((c >= 'a') && (c <= 'z')) \|\|
457	((c >= 'A') && (c <= 'Z')) \|\|
458	((c >= '0') && (c <= '9')) \|\|
459	(c == '%') \|\| (c == '-')) {
460	// alphanumeric character
461	outtext.push_back(c);
462	} else if (c == ' ') {
463	// space
464	outtext.push_back('+');
465	} else if (c > 255) { // not utf-8 character
466	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
467	} else {
468	// everything else
469	outtext.push_back('%');
470	c2hex(c, ttmp);
471	outtext += ttmp;
472	}
473
474	++here;
475	}
476
477	return outtext;
478	}
479	// takes unicode input
480	text_t cgi_safe_unicode (const text_t &intext) {
481	text_t outtext;
482
483	text_t::const_iterator here = intext.begin ();
484	text_t::const_iterator end = intext.end ();
485	unsigned short c;
486	text_t ttmp;
487
488	while (here != end) {
489	c = *here;
490	if (((c >= 'a') && (c <= 'z')) \|\|
491	((c >= 'A') && (c <= 'Z')) \|\|
492	((c >= '0') && (c <= '9')) \|\|
493	(c == '%') \|\| (c == '-')) {
494	// alphanumeric character
495	outtext.push_back(c);
496	} else if (c == ' ') {
497	// space
498	outtext.push_back('+');
499	} else if (c > 127) { // unicode character
500	unsigned char buf[3]; // up to 3 bytes
501	buf[0]='\0';buf[1]='\0';buf[2]='\0';
502	output_utf8_char(c,buf, buf+2);
503	outtext.push_back('%');
504	c2hex(buf[0], ttmp);
505	outtext += ttmp;
506	outtext.push_back('%');
507	c2hex(buf[1], ttmp);
508	outtext += ttmp;
509	if (buf[2]) {
510	outtext.push_back('%');
511	c2hex(buf[2], ttmp);
512	outtext += ttmp;
513	}
514	} else {
515	// everything else
516	outtext.push_back('%');
517	c2hex(c, ttmp);
518	outtext += ttmp;
519	}
520
521	++here;
522	}
523
524	return outtext;
525	}
526
527
528
529
530	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
531	text_t::const_iterator last,
532	text_t &argname) {
533	first = getdelimitstr (first, last, '-', argname);
534	return first;
535	}
536
537
538	// check_save_conf_str checks the configuration string for
539	// the saved args and makes sure it does not conflict with
540	// the information about the arguments. If an error is encountered
541	// it will return false and the program should not produce any
542	// output.
543	bool check_save_conf_str (const text_t &saveconf,
544	const cgiargsinfoclass &argsinfo,
545	ostream &logout) {
546	outconvertclass text_t2ascii;
547
548	text_tset argsset;
549	text_t::const_iterator saveconfhere = saveconf.begin ();
550	text_t::const_iterator saveconfend = saveconf.end ();
551	text_t argname;
552	const cgiarginfo *info;
553
554	// first check to make sure all saved arguments can be saved
555
556	while (saveconfhere != saveconfend) {
557	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
558
559	if (!argname.empty()) {
560	// save the argument name for later
561	argsset.insert (argname);
562
563	// check the argument
564	info = argsinfo.getarginfo (argname);
565	if (info == NULL) {
566	logout << text_t2ascii << "Error: the cgi argument \"" << argname
567	<< "\" is used in the configuration string for the\n"
568	<< "saved arguments but does not exist as a valid argument.\n\n";
569	return false;
570	}
571	if (info->savedarginfo == cgiarginfo::mustnot) {
572	logout << text_t2ascii << "Error: the cgi argument \"" << argname
573	<< "\" is used in the configuration string for the\n"
574	<< "saved arguments but has been specified as an argument whose\n"
575	<< "state must not be saved.\n\n";
576	return false;
577	}
578	}
579	}
580
581
582	// next check that all saved arguments that should be saved
583	// are saved
584	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
585	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
586
587	while (argsinfohere != argsinfoend) {
588	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
589	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
590	logout << text_t2ascii << "Error: the cgi argument \""
591	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
592	<< "be save but was not listed in the saved arguments.\n\n";
593	return false;
594	}
595
596	++argsinfohere;
597	}
598
599	return true; // made it, no clashes
600	}
601
602
603	// create_save_conf_str will create a configuration string
604	// based on the information in argsinfo. This method of configuration
605	// is not recomended as small changes can produce large changes in
606	// the resulting configuration string (for instance a totally different
607	// ordering). Only arguments which "must" be saved are included in
608	// the resulting string.
609	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
610	ostream &/logout/) {
611	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
612	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
613	text_t saveconf;
614	bool first = true;
615
616	while (argsinfohere != argsinfoend) {
617	// save this argument if it must be saved
618	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
619	if (!first) saveconf.push_back ('-');
620	else first = false;
621	saveconf += (*argsinfohere).second.shortname;
622	}
623
624	++argsinfohere;
625	}
626
627	return saveconf;
628	}
629
630
631	// expand_save_args will expand the saved arguments based
632	// on saveconf placing the results in args if they are not
633	// already defined. If it encounters an error it will return false
634	// and output more information to logout.
635	bool expand_save_args (const cgiargsinfoclass &argsinfo,
636	const text_t &saveconf,
637	cgiargsclass &args,
638	ostream &logout) {
639	outconvertclass text_t2ascii;
640
641	text_t *arg_e = args.getarg("e");
642	if (arg_e == NULL) return true; // no compressed arguments
643	if (arg_e->empty()) return true; // no compressed arguments
644
645	text_t argname, argvalue;
646	const cgiarginfo *argnameinfo;
647
648	text_t::const_iterator saveconfhere = saveconf.begin();
649	text_t::const_iterator saveconfend = saveconf.end();
650
651	text_t::iterator arg_ebegin = arg_e->begin();
652	text_t::iterator arg_eend = arg_e->end();
653	text_t::iterator arg_ehere = arg_ebegin;
654	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
655	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
656
657	if (!argname.empty()) {
658	// found another entry
659	argnameinfo = argsinfo.getarginfo (argname);
660
661	if (argnameinfo == NULL) {
662	// no information about the argument could be found
663	// we can't keep going because we don't know whether
664	// this argument is a single or multiple character value
665	logout << text_t2ascii << "Error: the cgi argument \"" << argname
666	<< "\" was specified as being a compressed argument\n"
667	<< "but no information about it could be found within the "
668	<< "cgiargsinfoclass.\n";
669	return false;
670
671	} else {
672
673	// found the argument information
674	if (argnameinfo->multiplechar) {
675	text_t::const_iterator sav = arg_ehere;
676	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
677	if (distance(arg_ebegin, arg_ehere) > 2) {
678	// replace any '-' chars escaped with 'Zz'
679	bool first = true;
680	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
681	if (first) argvalue.clear();
682
683	// Hey, here's a wild idea. Why don't we check that there is
684	// another hyphen in the cgiarge before we get a pointer to it and
685	// add one. That way we are far less likely to wander off into
686	// random memory merrily parsing arguments that are then lovingly
687	// spewed all over the HTML page returned at the usage logs.
688	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
689	if (minus_itr == arg_eend)
690	{
691	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
692	return false;
693	}
694	arg_ehere = minus_itr + 1;
695
696	while (sav != (arg_ehere-1)) {
697	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
698	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
699	++sav;
700	}
701	first = false;
702	}
703	}
704	argvalue.setencoding(1); // other encoding
705	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
706	} else {
707	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
708	++arg_ehere;
709	}
710	}
711	}
712	}
713
714	return true;
715	}
716
717
718	// adds the default values for those arguments which have not
719	// been specified
720	void add_default_args (const cgiargsinfoclass &argsinfo,
721	cgiargsclass &args,
722	ostream &/logout/) {
723	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
724	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
725
726	while (argsinfohere != argsinfoend) {
727	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
728	args.setdefaultarg ((*argsinfohere).second.shortname,
729	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
730	}
731	++argsinfohere;
732	}
733	}
734
735	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
736	cgiargsclass &args,
737	fileupload_tmap &fileuploads,
738	ostream &logout) {
739
740	const cgiarginfo *info = argsinfo.getarginfo("a");
741	fileupload_tmap::const_iterator this_file = fileuploads.begin();
742	fileupload_tmap::const_iterator end_file = fileuploads.end();
743	while (this_file != end_file) {
744	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
745	if (info != NULL) {
746
747	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
748
749	args.setargfile((this_file).first, (this_file).second);
750	}
751	}
752	this_file++;
753	}
754	}
755
756	// compress_save_args will compress the arguments and return
757	// them in compressed_args. If an error was encountered
758	// compressed_args will be set to to "", an error will be
759	// written to logout, and the function will return false.
760	bool compress_save_args (const cgiargsinfoclass &argsinfo,
761	const text_t &saveconf,
762	cgiargsclass &args,
763	text_t &compressed_args,
764	outconvertclass &outconvert,
765	ostream &logout) {
766	outconvertclass text_t2ascii;
767
768	compressed_args.clear();
769
770	text_t argname, argvalue;
771	const cgiarginfo *argnameinfo;
772
773	text_t::const_iterator saveconfhere = saveconf.begin();
774	text_t::const_iterator saveconfend = saveconf.end();
775
776	while (saveconfhere != saveconfend) {
777	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
778
779	if (!argname.empty()) {
780	// found another entry
781	argnameinfo = argsinfo.getarginfo (argname);
782
783	if (argnameinfo == NULL) {
784	// no information about the argument could be found
785	// we can't keep going because we don't know whether
786	// this argument is a single or multiple character value
787	logout << text_t2ascii << "Error: the cgi argument \"" << argname
788	<< "\" was specified as being a compressed argument\n"
789	<< "but no information about it could be found within the "
790	<< "cgiargsinfoclass.\n";
791	compressed_args.clear();
792	return false;
793
794	} else {
795	// found the argument information
796	if (argnameinfo->multiplechar) {
797	// multiple character argument -- sort out any '-' chars
798	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
799	compressed_args += minus_safe (args[argname], false);
800	else
801	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
802
803	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
804
805	} else {
806	// single character argument
807	if (args[argname].size() == 0) {
808	logout << text_t2ascii << "Error: the cgi argument \"" << argname
809	<< "\" was specified as being a compressed argument which\n"
810	<< "should have a one character value but it was empty.\n\n";
811	compressed_args.clear ();
812	return false;
813
814	} else if (args[argname].size() > 1) {
815	logout << text_t2ascii << "Error: the cgi argument \"" << argname
816	<< "\" was specified as being a compressed argument which\n"
817	<< "should have a one character value but it had multiple characters.\n\n";
818	compressed_args.clear ();
819	return false;
820	}
821
822	// everything is ok
823	compressed_args += args[argname];
824	}
825	}
826	}
827	}
828
829	return true;
830	}
831
832
833	// args_tounicode converts any arguments which are not in unicode
834	// to unicode using inconvert
835	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
836	cgiargsclass::iterator here = args.begin();
837	cgiargsclass::iterator end = args.end();
838
839	while (here != end) {
840	if ((*here).second.value.getencoding() > 0) {
841	// Call reset() before converting each argument, to prevent problems when converting the last
842	// argument left the converter in a bad state
843	inconvert.reset();
844	(here).second.value = inconvert.convert((here).second.value);
845	}
846
847	++here;
848	}
849	}
850
851	// fcgienv will be loaded with environment name-value pairs
852	// if using fastcgi (had to do this as getenv doesn't work
853	// with our implementation of fastcgi). if fcgienv is empty
854	// we'll simply use getenv
855	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
856	if (fcgienv.empty()) {
857	char *n = name.getcstr();
858	char *v = getenv(n);
859	delete []n;
860	if (v != NULL) return v;
861	return g_EmptyText;
862
863	} else return fcgienv[name];
864	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: