Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 22942

Last change on this file since 22942 was 22942, checked in by ak19, 14 years ago
For ticket no 712 again. Tidier code in decode_commas function. Related to previous commit to fix a crash that occurred when using a combination of advanced and fielded searching - in an MGPP collection (server.exe and library.cgi would crash depending on which web server was used): 1. When parsing cgi args, arrays stem and fold contained the URL encodings percent-2-C rather than commas for delimiters and weren't split properly resulting in arrays of unexpected lengths (and values). Need to decode the percent-2-C to commas by calling decode_commas() in cgiutils.cpp before splitting. 2. decode_commas in cgiutils.cpp was performing an illegal iterator operation by attempting to peek PAST the end of the iterator which doesn't seem to be allowed by the STL code. When the iteration really got past the end, the iteration operation causes a problem resulting in a server.exe crash of its own.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 26.5 KB

Rev	Line
[108]	1	/**********************************************************************
	2	*
	3	* cgiutils.cpp -- general cgi utilities
	4	* Copyright (C) 1999 The New Zealand Digital Library Project
	5	*
[533]	6	* A component of the Greenstone digital library software
	7	* from the New Zealand Digital Library Project at the
	8	* University of Waikato, New Zealand.
[108]	9	*
[533]	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License as published by
	12	* the Free Software Foundation; either version 2 of the License, or
	13	* (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	* GNU General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public License
	21	* along with this program; if not, write to the Free Software
	22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	*
[108]	24	*********************************************************************/
	25
	26	#include "cgiutils.h"
[12513]	27	#include "fileutil.h"
[3151]	28	#include "gsdlunicode.h"
[11998]	29	#include "fileutil.h"
[3217]	30	#include "unitool.h" // in mg, for output_utf8_char
[18882]	31	#include <cstdlib>
[22796]	32	#include <time.h>
[108]	33
[12513]	34	#if defined(GSDL_USE_OBJECTSPACE)
	35	# include <ospace\std\iostream>
	36	# include <ospace\std\fstream>
	37	#elif defined(GSDL_USE_IOS_H)
	38	# include <iostream.h>
	39	# include <fstream.h>
	40	#else
	41	# include <iostream>
	42	# include <fstream>
	43	#endif
[11998]	44
[12513]	45
[108]	46	static unsigned short hexdigit (unsigned short c) {
	47	if (c >= '0' && c <= '9') return (c-'0');
	48	if (c >= 'a' && c <= 'f') return (c-'a'+10);
	49	if (c >= 'A' && c <= 'F') return (c-'A'+10);
	50	return c;
	51	}
	52
	53
	54	static void c2hex (unsigned short c, text_t &t) {
	55	t.clear();
	56
	57	if (c >= 256) {
	58	t = "20"; // ' '
	59	return;
	60	}
	61
	62	unsigned short o1, o2;
	63
	64	o1 = (c/16) % 16;
	65	o2 = c % 16;
	66	if (o1 >= 10) o1 += 'a' - 10;
	67	else o1 += '0';
	68	if (o2 >= 10) o2 += 'a' - 10;
	69	else o2 += '0';
	70
	71	t.push_back(o1);
	72	t.push_back(o2);
	73	}
	74
[12513]	75	static text_t::iterator getline (text_t::iterator first,
	76	text_t::iterator last,
	77	bool include_crlf) {
	78	while (first != last) {
	79	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
	80	// found <CRLF>
	81	if (include_crlf) first += 2;
	82	break;
	83	}
[12794]	84
[12513]	85	first++;
	86	}
[12794]	87
[12513]	88	return first;
	89	}
	90
	91	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
	92	text_t &filetype, bool &isfile, text_t &argstr,
	93	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
	94
	95	if (!argname.empty()) {
	96
	97	if (!isfile) {
	98	// argdata includes a trailing <CRLF> that we must remove
	99	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
	100	argdata.erase(argdata.end()-2, argdata.end());
	101	}
	102	if (!argstr.empty()) argstr += "&";
	103	argstr += argname + "=" + argdata;
	104
	105	} else if (!filename.empty()) {
	106	// filedata includes a trailing <CRLF> that we must remove
	107	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
	108	filedata.erase(filedata.end()-2, filedata.end());
	109	}
	110
[22791]	111	// create tmp_name for storing the file on disk, using the current timestamp
	112	text_t tmp_name(time(NULL));
[12513]	113	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
[12794]	114
[12513]	115	char *tmp_name_c = tmp_name.getcstr();
	116
	117	// write the file data to disk
	118	outconvertclass out;
	119	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
	120	filestream << out << filedata;
	121	filestream.close();
	122	delete tmp_name_c;
	123
	124	// populate the fields of a fileupload_t and put it in the
	125	// fileuploads map
	126	fileupload_t fu;
	127	// note that filename currently may or may not include the path since
	128	// some browsers (e.g. IE) include the path while others
	129	// (e.g. mozilla) do not. we should probably remove the path from
	130	// this field here to get a consistent value across all browsers.
	131	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
	132	if (slash != filename.end()) {
	133	filename = substr(slash+1, filename.end());
	134	}
	135	fu.name = filename;
	136	fu.type = filetype;
	137	// size has yet to be implemented
[12579]	138	fu.size = filedata.size();
[12794]	139
[12513]	140	fu.tmp_name = tmp_name;
	141	fileuploads[argname] = fu;
	142	}
	143	}
	144	isfile = false;
	145	argname.clear();
	146	argdata.clear();
	147	filename.clear();
	148	filedata.clear();
	149	filetype.clear();
	150	}
	151
	152	// parse data obtained through a CGI POST request
	153	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
	154	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
	155
	156	text_t argstr;
	157
	158	text_t::iterator content_type_begin = content_type.begin();
	159	text_t::iterator content_type_end = content_type.end();
	160	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
	161	// a simple post request
[12794]	162
[12513]	163	return raw_post_data;
	164
	165	} else {
	166	// multipart/form data - may contain one or more uploaded files
	167
	168	/*
	169	content_type should look something like the following
	170	multipart/form-data; boundary=---------------------------7d411e1a50330
	171
	172	while raw_post_data will be as follows
	173	-----------------------------7d43e73450330CRLF
	174	Content-Disposition: form-data; name="e"<CRLF>
	175	<CRLF>
	176	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
	177	-----------------------------7d43e73450330<CRLF>
	178	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
	179	Content-Type: application/msword<CRLF>
	180	<CRLF>
	181	<Content of file><CRLF>
	182
	183	*/
	184
	185	// first get the boundary from content-type
	186	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
[12794]	187	if (boundary_begin+9 < content_type_end)
	188	{
	189	// skip over "boundary=" part of string
	190	boundary_begin += 9;
	191	}
[12513]	192	else {
	193	// error
[12796]	194	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
[12513]	195	return "";
	196	}
	197	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
	198	int boundary_len = boundary.size();
	199
[12794]	200
[12513]	201	text_t argname, argdata, filename, filedata, filetype;
	202	bool isfile = false;
	203	text_t::iterator data_here = raw_post_data.begin();
	204	text_t::iterator data_end = raw_post_data.end();
	205	while (data_here != data_end) {
	206
	207	// get the next available line (including the trailing <CRLF>
	208	text_t line = substr(data_here, getline(data_here, data_end, true));
[12794]	209
[12513]	210	data_here += line.size();
	211	text_t::iterator line_begin = line.begin();
	212	text_t::iterator line_end = line.end();
	213	if (findword(line_begin, line_end, boundary) != line_end) {
	214	// we've found a boundary
	215	process_post_section(argname, argdata, filename, filedata, filetype,
	216	isfile, argstr, fileuploads, gsdlhome);
	217
	218	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
	219	// we've found the the beginning of a new section
	220	argname.clear();
	221	argdata.clear();
	222
	223	// get the name of this piece of form data
	224	text_t::iterator it = findword(line_begin, line_end, "name=\"");
	225	if (it == line_end) break; // error - this shouldn't happen
	226	it = findchar(it, line_end, '"');
	227	if ((it != line_end) && (it+1 != line_end)) {
	228	argname = substr(it+1, findchar(it+1, line_end, '"'));
	229	}
	230
	231	// if this piece of form data contains filename="" it's a file
	232	// upload and needs to be treated special
	233	it = (findword(line_begin, line_end, "filename=\""));
	234	if (it != line_end) {
	235	// we've found a file upload
	236	isfile = true;
	237	it = findchar(it, line_end, '"');
	238	if ((it != line_end) && (it+1 != line_end)) {
	239	filename = substr(it+1, findchar(it+1, line_end, '"'));
	240	}
	241
	242	// the next line is the content-type of this section
	243	line = substr(data_here, getline(data_here, data_end, true));
	244	data_here += line.size();
	245	line_begin = line.begin();
	246	line_end = line.end();
	247	it = (findword(line_begin, line_end, "Content-Type: "));
	248	if (it != line_end) {
	249	filetype = substr(it+14, getline(it, line_end, false));
	250	}
	251	}
	252
	253	// eat up the next line as it's just a <CRLF> on it's own
	254	data_here += 2;
	255
	256	} else {
	257	if (isfile) filedata += line;
	258	else argdata += line;
	259	}
[12794]	260
[12513]	261	}
	262
	263	// process last section
	264	process_post_section(argname, argdata, filename, filedata, filetype,
	265	isfile, argstr, fileuploads, gsdlhome);
	266
	267	return argstr;
	268	}
	269	}
	270
[108]	271	// convert %xx and + to their appropriate equivalents
[3217]	272	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
[13461]	273	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
	274	// actually was, then this returns utf-8, and needs to_uni on the
	275	// result to get it back to unicode. If the encoding wasn't utf-8, then the
	276	// output may be crap. Seems to work for 8 bit encodings.
	277	// Really, this should be given the encoding, and should always return unicode.
[108]	278	void decode_cgi_arg (text_t &argstr) {
	279	text_t::iterator in = argstr.begin();
	280	text_t::iterator out = in;
	281	text_t::iterator end = argstr.end();
	282
	283	while (in != end) {
	284	if (in == '+') out = ' ';
	285
	286	else if (*in == '%') {
	287	unsigned short c = '%';
[3151]	288	++in;
	289	if (in != end) { // this is an encoding...
	290	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
	291	// this assumes a short int is at least 16 bits...
	292	++in;
	293	if (in != end)
	294	c=hexdigit(*in++) << 12;
	295	if (in != end)
	296	c+=hexdigit(*in++) << 8;
	297	if (in != end)
	298	c+=hexdigit(*in++) << 4;
	299	if (in != end)
	300	c+=hexdigit(*in);
	301	/* BAD!! The following assumes the interface is using utf-8. But
	302	at this point we don't know what encoding we are using, unless
	303	we can parse it out of the string we are currently decoding... */
	304	text_t uni=" ";
	305	uni[0]=c;
	306	text_t utf8=to_utf8(uni);
	307	int last_byte=utf8.size()-1;
[9620]	308	for (int i=0;i<last_byte;++i)
[3151]	309	*out++ = utf8[i];
	310	c=utf8[last_byte];
	311	} else { // convert %HH to hex value
	312	c = hexdigit (*in);
	313	++in;
	314	if (in != end && c < 16) { // sanity check on the previous character
	315	c = c16 + hexdigit (in);
	316	}
	317	}
[108]	318	}
	319	*out = c;
	320	} else out = in;
	321
[9620]	322	if (in != end) ++in;
	323	++out;
[108]	324	}
	325
	326	// remove the excess characters
	327	argstr.erase (out, end);
[13461]	328
[108]	329	}
	330
	331
	332	// split up the cgi arguments
[776]	333	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
	334	cgiargsclass &args) {
[108]	335	args.clear();
	336
[7432]	337	text_t::const_iterator here = argstr.begin();
	338	text_t::const_iterator end = argstr.end();
[108]	339	text_t key, value;
[11998]	340
[108]	341	// extract out the key=value pairs
	342	while (here != end) {
	343	// get the next key and value pair
	344	here = getdelimitstr (here, end, '=', key);
	345	here = getdelimitstr (here, end, '&', value);
	346
	347	// convert %xx and + to their appropriate equivalents
[614]	348	decode_cgi_arg (value);
[108]	349	value.setencoding(1); // other encoding
	350	// store this key=value pair
[764]	351	if (!key.empty()) {
[2426]	352
	353	// if arg occurs multiple times (as is the case with multiple
	354	// checkboxes using the same name) we'll create a comma separated
	355	// list of all the values (this uses a hack that encodes naturally
	356	// occurring commas as %2C - values will therefore need to be decoded
	357	// again before use) - it should use an array instead
[2417]	358	const cgiarginfo *info = argsinfo.getarginfo (key);
[12562]	359	if (info==NULL) {
	360	// If info is NULL, we can't tell if the arg is multiple value or not
	361	// Because we need to have dynamically named arguments multivalued, we
	362	// will always assume multiplevalue = true
	363	// If the arg is not multi valued, then you need to decode the commas.
	364	if (args.getarg(key)==NULL) {
	365	args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
	366	}
	367	else {
	368	text_t newvalue = args[key];
[2417]	369
[12562]	370	newvalue += "," + encode_commas(value);
[22404]	371	newvalue.setencoding(1); // other encoding
[12562]	372	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	373	}
[2417]	374	}
[12562]	375	else {
	376	if (info->multiplevalue) {
	377
	378	text_t newvalue = args[key];
	379	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
	380	newvalue += encode_commas(value);
[22404]	381	newvalue.setencoding(1); // other encoding
[12562]	382	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	383
	384	} else {
	385	args.setarg (key, value, cgiarg_t::cgi_arg);
	386	}
	387	}
[764]	388	}
[108]	389	}
	390	}
	391
[2426]	392	text_t encode_commas (const text_t &intext) {
	393
	394	text_t outtext;
	395
	396	text_t::const_iterator here = intext.begin ();
	397	text_t::const_iterator end = intext.end ();
	398
	399	while (here != end) {
	400	if (*here == ',') outtext += "%2C";
	401	else outtext.push_back (*here);
[9620]	402	++here;
[2426]	403	}
	404	return outtext;
	405	}
	406
	407	text_t decode_commas (const text_t &intext) {
	408
	409	text_t outtext;
	410
	411	text_t::const_iterator here = intext.begin ();
	412	text_t::const_iterator end = intext.end ();
	413
[22942]	414	// for loop
	415	int intext_len = intext.size();
	416	for(int i = 0; i < intext_len; i++) {
	417	if ((i+2)<intext_len) {
	418	if(intext[i] == '%' && intext[i+1] == '2'
	419	&& (intext[i+2] == 'C' \|\| intext[i+2] == 'c')) {
	420	i += 2;
	421	outtext.push_back(',');
	422	continue;
	423	}
	424	}
	425	outtext.push_back (intext[i]);
	426	}
	427	return outtext;
[2426]	428	}
	429
[13456]	430	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
	431	text_t minus_safe (const text_t &intext, bool utf8) {
[607]	432
	433	text_t outtext;
	434
	435	text_t::const_iterator here = intext.begin ();
	436	text_t::const_iterator end = intext.end ();
	437
	438	while (here != end) {
[1504]	439	if (*here == '-') outtext += "Zz-";
[607]	440	else outtext.push_back (*here);
[9620]	441	++here;
[607]	442	}
[13456]	443	if (utf8) {
	444	outtext = cgi_safe_utf8 (outtext);
	445	} else {
	446	outtext = cgi_safe_unicode (outtext);
	447	}
[607]	448	return outtext;
	449	}
	450
[13456]	451	// takes utf-8 input
	452	text_t cgi_safe_utf8 (const text_t &intext) {
[108]	453	text_t outtext;
	454
	455	text_t::const_iterator here = intext.begin ();
	456	text_t::const_iterator end = intext.end ();
	457	unsigned short c;
	458	text_t ttmp;
	459
	460	while (here != end) {
	461	c = *here;
	462	if (((c >= 'a') && (c <= 'z')) \|\|
	463	((c >= 'A') && (c <= 'Z')) \|\|
[474]	464	((c >= '0') && (c <= '9')) \|\|
[13456]	465	(c == '%') \|\| (c == '-')) {
[108]	466	// alphanumeric character
	467	outtext.push_back(c);
	468	} else if (c == ' ') {
	469	// space
[150]	470	outtext.push_back('+');
[13456]	471	} else if (c > 255) { // not utf-8 character
	472	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
	473	} else {
	474	// everything else
	475	outtext.push_back('%');
	476	c2hex(c, ttmp);
	477	outtext += ttmp;
	478	}
	479
	480	++here;
	481	}
	482
	483	return outtext;
	484	}
	485	// takes unicode input
	486	text_t cgi_safe_unicode (const text_t &intext) {
	487	text_t outtext;
	488
	489	text_t::const_iterator here = intext.begin ();
	490	text_t::const_iterator end = intext.end ();
	491	unsigned short c;
	492	text_t ttmp;
	493
	494	while (here != end) {
	495	c = *here;
	496	if (((c >= 'a') && (c <= 'z')) \|\|
	497	((c >= 'A') && (c <= 'Z')) \|\|
	498	((c >= '0') && (c <= '9')) \|\|
	499	(c == '%') \|\| (c == '-')) {
	500	// alphanumeric character
	501	outtext.push_back(c);
	502	} else if (c == ' ') {
	503	// space
	504	outtext.push_back('+');
[13103]	505	} else if (c > 127) { // unicode character
[3217]	506	unsigned char buf[3]; // up to 3 bytes
	507	buf[0]='\0';buf[1]='\0';buf[2]='\0';
	508	output_utf8_char(c,buf, buf+2);
	509	outtext.push_back('%');
	510	c2hex(buf[0], ttmp);
	511	outtext += ttmp;
	512	outtext.push_back('%');
	513	c2hex(buf[1], ttmp);
	514	outtext += ttmp;
	515	if (buf[2]) {
	516	outtext.push_back('%');
	517	c2hex(buf[2], ttmp);
	518	outtext += ttmp;
	519	}
[108]	520	} else {
	521	// everything else
	522	outtext.push_back('%');
	523	c2hex(c, ttmp);
	524	outtext += ttmp;
	525	}
	526
[9620]	527	++here;
[108]	528	}
	529
	530	return outtext;
	531	}
[155]	532
	533
	534
	535
	536	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
	537	text_t::const_iterator last,
	538	text_t &argname) {
	539	first = getdelimitstr (first, last, '-', argname);
	540	return first;
	541	}
	542
	543
	544	// check_save_conf_str checks the configuration string for
	545	// the saved args and makes sure it does not conflict with
	546	// the information about the arguments. If an error is encountered
	547	// it will return false and the program should not produce any
	548	// output.
	549	bool check_save_conf_str (const text_t &saveconf,
	550	const cgiargsinfoclass &argsinfo,
	551	ostream &logout) {
	552	outconvertclass text_t2ascii;
	553
	554	text_tset argsset;
	555	text_t::const_iterator saveconfhere = saveconf.begin ();
	556	text_t::const_iterator saveconfend = saveconf.end ();
	557	text_t argname;
	558	const cgiarginfo *info;
	559
	560	// first check to make sure all saved arguments can be saved
	561
	562	while (saveconfhere != saveconfend) {
	563	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	564
	565	if (!argname.empty()) {
	566	// save the argument name for later
	567	argsset.insert (argname);
	568
	569	// check the argument
	570	info = argsinfo.getarginfo (argname);
	571	if (info == NULL) {
	572	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	573	<< "\" is used in the configuration string for the\n"
	574	<< "saved arguments but does not exist as a valid argument.\n\n";
	575	return false;
	576	}
	577	if (info->savedarginfo == cgiarginfo::mustnot) {
	578	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	579	<< "\" is used in the configuration string for the\n"
	580	<< "saved arguments but has been specified as an argument whose\n"
	581	<< "state must not be saved.\n\n";
	582	return false;
	583	}
	584	}
	585	}
	586
	587
	588	// next check that all saved arguments that should be saved
	589	// are saved
	590	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	591	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	592
	593	while (argsinfohere != argsinfoend) {
	594	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
	595	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
	596	logout << text_t2ascii << "Error: the cgi argument \""
	597	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
	598	<< "be save but was not listed in the saved arguments.\n\n";
	599	return false;
	600	}
	601
[9620]	602	++argsinfohere;
[155]	603	}
	604
	605	return true; // made it, no clashes
	606	}
	607
	608
	609	// create_save_conf_str will create a configuration string
	610	// based on the information in argsinfo. This method of configuration
	611	// is not recomended as small changes can produce large changes in
	612	// the resulting configuration string (for instance a totally different
	613	// ordering). Only arguments which "must" be saved are included in
	614	// the resulting string.
	615	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
	616	ostream &/logout/) {
	617	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	618	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	619	text_t saveconf;
	620	bool first = true;
	621
	622	while (argsinfohere != argsinfoend) {
	623	// save this argument if it must be saved
	624	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
	625	if (!first) saveconf.push_back ('-');
	626	else first = false;
	627	saveconf += (*argsinfohere).second.shortname;
	628	}
	629
[9620]	630	++argsinfohere;
[155]	631	}
	632
	633	return saveconf;
	634	}
	635
	636
	637	// expand_save_args will expand the saved arguments based
	638	// on saveconf placing the results in args if they are not
	639	// already defined. If it encounters an error it will return false
	640	// and output more information to logout.
	641	bool expand_save_args (const cgiargsinfoclass &argsinfo,
	642	const text_t &saveconf,
	643	cgiargsclass &args,
	644	ostream &logout) {
	645	outconvertclass text_t2ascii;
	646
	647	text_t *arg_e = args.getarg("e");
	648	if (arg_e == NULL) return true; // no compressed arguments
	649	if (arg_e->empty()) return true; // no compressed arguments
	650
	651	text_t argname, argvalue;
	652	const cgiarginfo *argnameinfo;
	653
	654	text_t::const_iterator saveconfhere = saveconf.begin();
	655	text_t::const_iterator saveconfend = saveconf.end();
	656
[11259]	657	text_t::iterator arg_ebegin = arg_e->begin();
[155]	658	text_t::iterator arg_eend = arg_e->end();
[11259]	659	text_t::iterator arg_ehere = arg_ebegin;
[155]	660	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
	661	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	662
	663	if (!argname.empty()) {
	664	// found another entry
	665	argnameinfo = argsinfo.getarginfo (argname);
	666
	667	if (argnameinfo == NULL) {
	668	// no information about the argument could be found
	669	// we can't keep going because we don't know whether
	670	// this argument is a single or multiple character value
	671	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	672	<< "\" was specified as being a compressed argument\n"
	673	<< "but no information about it could be found within the "
	674	<< "cgiargsinfoclass.\n";
	675	return false;
	676
	677	} else {
[294]	678
[155]	679	// found the argument information
	680	if (argnameinfo->multiplechar) {
[607]	681	text_t::const_iterator sav = arg_ehere;
[155]	682	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
[11259]	683	if (distance(arg_ebegin, arg_ehere) > 2) {
	684	// replace any '-' chars escaped with 'Zz'
	685	bool first = true;
	686	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
	687	if (first) argvalue.clear();
[21997]	688
	689	// Hey, here's a wild idea. Why don't we check that there is
	690	// another hyphen in the cgiarge before we get a pointer to it and
	691	// add one. That way we are far less likely to wander off into
	692	// random memory merrily parsing arguments that are then lovingly
	693	// spewed all over the HTML page returned at the usage logs.
	694	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
	695	if (minus_itr == arg_eend)
	696	{
	697	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
	698	return false;
	699	}
	700	arg_ehere = minus_itr + 1;
	701
[11259]	702	while (sav != (arg_ehere-1)) {
	703	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
	704	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
	705	++sav;
	706	}
	707	first = false;
[607]	708	}
	709	}
[294]	710	argvalue.setencoding(1); // other encoding
[366]	711	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
[155]	712	} else {
[366]	713	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
[9620]	714	++arg_ehere;
[155]	715	}
	716	}
	717	}
	718	}
	719
	720	return true;
	721	}
	722
	723
	724	// adds the default values for those arguments which have not
	725	// been specified
	726	void add_default_args (const cgiargsinfoclass &argsinfo,
	727	cgiargsclass &args,
	728	ostream &/logout/) {
	729	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	730	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	731
	732	while (argsinfohere != argsinfoend) {
	733	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
	734	args.setdefaultarg ((*argsinfohere).second.shortname,
[366]	735	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
[155]	736	}
[9620]	737	++argsinfohere;
[155]	738	}
	739	}
	740
[12513]	741	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
	742	cgiargsclass &args,
	743	fileupload_tmap &fileuploads,
	744	ostream &logout) {
	745
	746	const cgiarginfo *info = argsinfo.getarginfo("a");
	747	fileupload_tmap::const_iterator this_file = fileuploads.begin();
	748	fileupload_tmap::const_iterator end_file = fileuploads.end();
	749	while (this_file != end_file) {
	750	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
	751	if (info != NULL) {
[12794]	752
[12513]	753	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
[12794]	754
[12513]	755	args.setargfile((this_file).first, (this_file).second);
	756	}
	757	}
	758	this_file++;
	759	}
	760	}
[155]	761
	762	// compress_save_args will compress the arguments and return
	763	// them in compressed_args. If an error was encountered
	764	// compressed_args will be set to to "", an error will be
	765	// written to logout, and the function will return false.
	766	bool compress_save_args (const cgiargsinfoclass &argsinfo,
	767	const text_t &saveconf,
	768	cgiargsclass &args,
	769	text_t &compressed_args,
[294]	770	outconvertclass &outconvert,
[155]	771	ostream &logout) {
	772	outconvertclass text_t2ascii;
	773
	774	compressed_args.clear();
	775
	776	text_t argname, argvalue;
	777	const cgiarginfo *argnameinfo;
	778
	779	text_t::const_iterator saveconfhere = saveconf.begin();
	780	text_t::const_iterator saveconfend = saveconf.end();
	781
	782	while (saveconfhere != saveconfend) {
	783	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	784
	785	if (!argname.empty()) {
	786	// found another entry
	787	argnameinfo = argsinfo.getarginfo (argname);
	788
	789	if (argnameinfo == NULL) {
	790	// no information about the argument could be found
	791	// we can't keep going because we don't know whether
	792	// this argument is a single or multiple character value
	793	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	794	<< "\" was specified as being a compressed argument\n"
	795	<< "but no information about it could be found within the "
	796	<< "cgiargsinfoclass.\n";
	797	compressed_args.clear();
	798	return false;
	799
	800	} else {
	801	// found the argument information
	802	if (argnameinfo->multiplechar) {
[607]	803	// multiple character argument -- sort out any '-' chars
[9674]	804	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
[13456]	805	compressed_args += minus_safe (args[argname], false);
[3670]	806	else
[13456]	807	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
	808
[155]	809	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
	810
	811	} else {
	812	// single character argument
	813	if (args[argname].size() == 0) {
	814	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	815	<< "\" was specified as being a compressed argument which\n"
	816	<< "should have a one character value but it was empty.\n\n";
	817	compressed_args.clear ();
	818	return false;
	819
	820	} else if (args[argname].size() > 1) {
	821	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	822	<< "\" was specified as being a compressed argument which\n"
	823	<< "should have a one character value but it had multiple characters.\n\n";
	824	compressed_args.clear ();
	825	return false;
	826	}
	827
	828	// everything is ok
	829	compressed_args += args[argname];
	830	}
	831	}
	832	}
	833	}
	834
	835	return true;
	836	}
	837
	838
	839	// args_tounicode converts any arguments which are not in unicode
	840	// to unicode using inconvert
	841	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
	842	cgiargsclass::iterator here = args.begin();
	843	cgiargsclass::iterator end = args.end();
	844
	845	while (here != end) {
[366]	846	if ((*here).second.value.getencoding() > 0) {
[21961]	847	// Call reset() before converting each argument, to prevent problems when converting the last
	848	// argument left the converter in a bad state
	849	inconvert.reset();
[366]	850	(here).second.value = inconvert.convert((here).second.value);
[155]	851	}
	852
[9620]	853	++here;
[155]	854	}
	855	}
[873]	856
	857	// fcgienv will be loaded with environment name-value pairs
	858	// if using fastcgi (had to do this as getenv doesn't work
	859	// with our implementation of fastcgi). if fcgienv is empty
	860	// we'll simply use getenv
	861	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
	862	if (fcgienv.empty()) {
	863	char *n = name.getcstr();
	864	char *v = getenv(n);
[7432]	865	delete []n;
[873]	866	if (v != NULL) return v;
[7432]	867	return g_EmptyText;
[873]	868
	869	} else return fcgienv[name];
	870	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: