Context Navigation

cgiutils.cpp@ 31425

Last change on this file since 31425 was 30465, checked in by kjdon, 8 years ago
fixes for depositor. when getting the post ata and putting it together into form data, need to escape cgi args special characters
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 30.3 KB

Rev	Line
[108]	1	/**********************************************************************
	2	*
	3	* cgiutils.cpp -- general cgi utilities
	4	* Copyright (C) 1999 The New Zealand Digital Library Project
	5	*
[533]	6	* A component of the Greenstone digital library software
	7	* from the New Zealand Digital Library Project at the
	8	* University of Waikato, New Zealand.
[108]	9	*
[533]	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License as published by
	12	* the Free Software Foundation; either version 2 of the License, or
	13	* (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	* GNU General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public License
	21	* along with this program; if not, write to the Free Software
	22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	*
[108]	24	*********************************************************************/
	25
	26	#include "cgiutils.h"
[12513]	27	#include "fileutil.h"
[3151]	28	#include "gsdlunicode.h"
[11998]	29	#include "fileutil.h"
[3217]	30	#include "unitool.h" // in mg, for output_utf8_char
[18882]	31	#include <cstdlib>
[22796]	32	#include <time.h>
[108]	33
[12513]	34	#if defined(GSDL_USE_OBJECTSPACE)
	35	# include <ospace\std\iostream>
	36	# include <ospace\std\fstream>
	37	#elif defined(GSDL_USE_IOS_H)
	38	# include <iostream.h>
	39	# include <fstream.h>
	40	#else
	41	# include <iostream>
	42	# include <fstream>
	43	#endif
[11998]	44
[28841]	45	// set to false to undo security changes (url-encoding arguments)
[28888]	46	static bool do_safe_cgi_args = false;
[12513]	47
[108]	48	static unsigned short hexdigit (unsigned short c) {
	49	if (c >= '0' && c <= '9') return (c-'0');
	50	if (c >= 'a' && c <= 'f') return (c-'a'+10);
	51	if (c >= 'A' && c <= 'F') return (c-'A'+10);
	52	return c;
	53	}
	54
	55
	56	static void c2hex (unsigned short c, text_t &t) {
	57	t.clear();
	58
	59	if (c >= 256) {
	60	t = "20"; // ' '
	61	return;
	62	}
	63
	64	unsigned short o1, o2;
	65
	66	o1 = (c/16) % 16;
	67	o2 = c % 16;
	68	if (o1 >= 10) o1 += 'a' - 10;
	69	else o1 += '0';
	70	if (o2 >= 10) o2 += 'a' - 10;
	71	else o2 += '0';
	72
	73	t.push_back(o1);
	74	t.push_back(o2);
	75	}
	76
[12513]	77	static text_t::iterator getline (text_t::iterator first,
	78	text_t::iterator last,
	79	bool include_crlf) {
	80	while (first != last) {
	81	if (((first+1) != last) && (first == 13) && ((first+1) == 10)) {
	82	// found <CRLF>
	83	if (include_crlf) first += 2;
	84	break;
	85	}
[12794]	86
[12513]	87	first++;
	88	}
[12794]	89
[12513]	90	return first;
	91	}
	92
	93	static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
	94	text_t &filetype, bool &isfile, text_t &argstr,
	95	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
	96
	97	if (!argname.empty()) {
	98
	99	if (!isfile) {
	100	// argdata includes a trailing <CRLF> that we must remove
	101	if ((argdata.size() > 1) && ((argdata.end()-2) == 13) && ((argdata.end()-1) == 10)) {
	102	argdata.erase(argdata.end()-2, argdata.end());
	103	}
	104	if (!argstr.empty()) argstr += "&";
[30465]	105
	106	// we need to convert arg to cgi safe variant - escape '&' and '%', '+', '=', turn space to +
	107	cgi_safe_post_arg(argdata);
[12513]	108	argstr += argname + "=" + argdata;
	109
	110	} else if (!filename.empty()) {
	111	// filedata includes a trailing <CRLF> that we must remove
	112	if ((filedata.size() > 1) && ((filedata.end()-2) == 13) && ((filedata.end()-1) == 10)) {
	113	filedata.erase(filedata.end()-2, filedata.end());
	114	}
	115
[22791]	116	// create tmp_name for storing the file on disk, using the current timestamp
	117	text_t tmp_name(time(NULL));
[12513]	118	tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
[12794]	119
[12513]	120	char *tmp_name_c = tmp_name.getcstr();
	121
	122	// write the file data to disk
	123	outconvertclass out;
	124	ofstream filestream(tmp_name_c, ios::out \| ios::binary);
	125	filestream << out << filedata;
	126	filestream.close();
	127	delete tmp_name_c;
	128
	129	// populate the fields of a fileupload_t and put it in the
	130	// fileuploads map
	131	fileupload_t fu;
	132	// note that filename currently may or may not include the path since
	133	// some browsers (e.g. IE) include the path while others
	134	// (e.g. mozilla) do not. we should probably remove the path from
	135	// this field here to get a consistent value across all browsers.
	136	text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
	137	if (slash != filename.end()) {
	138	filename = substr(slash+1, filename.end());
	139	}
	140	fu.name = filename;
	141	fu.type = filetype;
	142	// size has yet to be implemented
[12579]	143	fu.size = filedata.size();
[12794]	144
[12513]	145	fu.tmp_name = tmp_name;
	146	fileuploads[argname] = fu;
	147	}
	148	}
	149	isfile = false;
	150	argname.clear();
	151	argdata.clear();
	152	filename.clear();
	153	filedata.clear();
	154	filetype.clear();
	155	}
	156
	157	// parse data obtained through a CGI POST request
	158	text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
	159	fileupload_tmap &fileuploads, const text_t &gsdlhome) {
	160
	161	text_t argstr;
	162
	163	text_t::iterator content_type_begin = content_type.begin();
	164	text_t::iterator content_type_end = content_type.end();
	165	if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
	166	// a simple post request
	167	return raw_post_data;
	168
	169	} else {
	170	// multipart/form data - may contain one or more uploaded files
	171
	172	/*
	173	content_type should look something like the following
	174	multipart/form-data; boundary=---------------------------7d411e1a50330
	175
	176	while raw_post_data will be as follows
	177	-----------------------------7d43e73450330CRLF
	178	Content-Disposition: form-data; name="e"<CRLF>
	179	<CRLF>
	180	d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
	181	-----------------------------7d43e73450330<CRLF>
	182	Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
	183	Content-Type: application/msword<CRLF>
	184	<CRLF>
	185	<Content of file><CRLF>
	186
	187	*/
	188
	189	// first get the boundary from content-type
	190	text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
[12794]	191	if (boundary_begin+9 < content_type_end)
	192	{
	193	// skip over "boundary=" part of string
	194	boundary_begin += 9;
	195	}
[12513]	196	else {
	197	// error
[12796]	198	cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
[12513]	199	return "";
	200	}
	201	text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
	202	int boundary_len = boundary.size();
	203
[12794]	204
[12513]	205	text_t argname, argdata, filename, filedata, filetype;
	206	bool isfile = false;
	207	text_t::iterator data_here = raw_post_data.begin();
	208	text_t::iterator data_end = raw_post_data.end();
	209	while (data_here != data_end) {
	210
	211	// get the next available line (including the trailing <CRLF>
	212	text_t line = substr(data_here, getline(data_here, data_end, true));
[12794]	213
[12513]	214	data_here += line.size();
	215	text_t::iterator line_begin = line.begin();
	216	text_t::iterator line_end = line.end();
	217	if (findword(line_begin, line_end, boundary) != line_end) {
	218	// we've found a boundary
	219	process_post_section(argname, argdata, filename, filedata, filetype,
	220	isfile, argstr, fileuploads, gsdlhome);
	221
	222	} else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
	223	// we've found the the beginning of a new section
	224	argname.clear();
	225	argdata.clear();
	226
	227	// get the name of this piece of form data
	228	text_t::iterator it = findword(line_begin, line_end, "name=\"");
	229	if (it == line_end) break; // error - this shouldn't happen
	230	it = findchar(it, line_end, '"');
	231	if ((it != line_end) && (it+1 != line_end)) {
	232	argname = substr(it+1, findchar(it+1, line_end, '"'));
	233	}
	234
	235	// if this piece of form data contains filename="" it's a file
	236	// upload and needs to be treated special
	237	it = (findword(line_begin, line_end, "filename=\""));
	238	if (it != line_end) {
	239	// we've found a file upload
	240	isfile = true;
	241	it = findchar(it, line_end, '"');
	242	if ((it != line_end) && (it+1 != line_end)) {
	243	filename = substr(it+1, findchar(it+1, line_end, '"'));
	244	}
	245
	246	// the next line is the content-type of this section
	247	line = substr(data_here, getline(data_here, data_end, true));
	248	data_here += line.size();
	249	line_begin = line.begin();
	250	line_end = line.end();
	251	it = (findword(line_begin, line_end, "Content-Type: "));
	252	if (it != line_end) {
	253	filetype = substr(it+14, getline(it, line_end, false));
	254	}
	255	}
	256
	257	// eat up the next line as it's just a <CRLF> on it's own
	258	data_here += 2;
	259
	260	} else {
	261	if (isfile) filedata += line;
	262	else argdata += line;
	263	}
[12794]	264
[12513]	265	}
	266
	267	// process last section
	268	process_post_section(argname, argdata, filename, filedata, filetype,
	269	isfile, argstr, fileuploads, gsdlhome);
	270
	271	return argstr;
	272	}
	273	}
	274
[108]	275	// convert %xx and + to their appropriate equivalents
[3217]	276	// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
[13461]	277	// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
	278	// actually was, then this returns utf-8, and needs to_uni on the
	279	// result to get it back to unicode. If the encoding wasn't utf-8, then the
	280	// output may be crap. Seems to work for 8 bit encodings.
	281	// Really, this should be given the encoding, and should always return unicode.
[108]	282	void decode_cgi_arg (text_t &argstr) {
	283	text_t::iterator in = argstr.begin();
	284	text_t::iterator out = in;
	285	text_t::iterator end = argstr.end();
	286
	287	while (in != end) {
	288	if (in == '+') out = ' ';
	289
	290	else if (*in == '%') {
	291	unsigned short c = '%';
[3151]	292	++in;
	293	if (in != end) { // this is an encoding...
	294	if (*in == 'u') { // convert %uHHHH to unicode then current encoding
	295	// this assumes a short int is at least 16 bits...
	296	++in;
	297	if (in != end)
	298	c=hexdigit(*in++) << 12;
	299	if (in != end)
	300	c+=hexdigit(*in++) << 8;
	301	if (in != end)
	302	c+=hexdigit(*in++) << 4;
	303	if (in != end)
	304	c+=hexdigit(*in);
	305	/* BAD!! The following assumes the interface is using utf-8. But
	306	at this point we don't know what encoding we are using, unless
	307	we can parse it out of the string we are currently decoding... */
	308	text_t uni=" ";
	309	uni[0]=c;
	310	text_t utf8=to_utf8(uni);
	311	int last_byte=utf8.size()-1;
[9620]	312	for (int i=0;i<last_byte;++i)
[3151]	313	*out++ = utf8[i];
	314	c=utf8[last_byte];
	315	} else { // convert %HH to hex value
	316	c = hexdigit (*in);
	317	++in;
	318	if (in != end && c < 16) { // sanity check on the previous character
	319	c = c16 + hexdigit (in);
	320	}
	321	}
[108]	322	}
	323	*out = c;
	324	} else out = in;
	325
[9620]	326	if (in != end) ++in;
	327	++out;
[108]	328	}
	329
	330	// remove the excess characters
	331	argstr.erase (out, end);
[13461]	332
[108]	333	}
	334
[30465]	335	//Need to escape special chars in post data so they don't interfere with arg parsing once its a get style string
	336	void cgi_safe_post_arg(text_t &argstr) {
	337
	338	text_t::iterator in = argstr.begin();
	339	text_t out = "";
	340	text_t::iterator end = argstr.end();
	341
	342	while (in != end) {
	343	if (*in == '&') out += "%26";
	344	else if (*in == '%') out += "%2525";
	345	else if (*in == '+') out += "%2B";
	346	else if (*in == '=') out += "%3D";
	347	else if (*in == ' ') out += "+";
	348	else { // append whatever char is in *in, but as a char, not int
	349	//out += *in; // appends as int
	350	out.push_back(*in);
	351	}
	352	++in;
	353	}
	354
	355	argstr.erase (argstr.begin(), end);
	356	argstr += out;
	357	}
	358
	359
	360
[26539]	361	// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
	362	// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
	363	// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
	364	// apache log and that log file can be included in a local file inclusion (LFI) or
	365	// remote file include (RFI) attack.
	366	// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
	367	// break out of an html/XML/javascript context.
[28841]	368	void safe_cgi_arg (const text_t &key, text_t &argstr) {
	369	if(!do_safe_cgi_args) {
	370	return;
	371	}
	372
[26539]	373	text_t::iterator in = argstr.begin();
	374	text_t out = "";
	375	text_t::iterator end = argstr.end();
	376
	377	while (in != end) {
	378	if (*in == '<') out += "%3C";
	379	else if (*in == '>') out += "%3E";
	380	else if (*in == '&') out += "%26";
	381	else if (*in == '\"') out += "%22";
	382	else if (*in == '\'') out += "%27";
[26560]	383	//else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
[26539]	384	else { // append whatever char is in *in, but as a char, not int
	385	//out += *in; // appends as int
[28841]	386	out.push_back(*in);
[26539]	387	}
	388	++in;
	389	}
	390
	391	argstr.erase (argstr.begin(), end);
	392	argstr += out;
	393	}
[108]	394
[28841]	395
	396	// given a list of characters (or "all") to decode, and given the string, str, where those
	397	// characters are to be decoded, this method replaces any occurrences of the url-encoded
	398	// variants of those characters with their actual characters in the given string str.
	399	void unsafe_cgi_arg(const text_t &chars, text_t &str) {
	400	if(!do_safe_cgi_args) {
	401	return;
	402	}
	403
	404	text_t allchars = "<>&\"\'/";
	405
	406	text_t chars_to_decode = (chars == "all" \|\| chars == "ALL") ? allchars : chars;
	407
	408	text_t::iterator in = chars_to_decode.begin();
	409	text_t::iterator end = chars_to_decode.end();
	410
	411	char hex_char[4];
	412
	413	// using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
	414
	415	while (in != end) {
	416
	417	// *in is a character from the accepted list of chars_to_decode list
	418
	419	// 1. create the url-encoded value of the char *in in variable hex_char
	420	// sprintf adds in a null byte at the end
	421	sprintf(hex_char,"%%%02X",*in);
	422
	423	// 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
	424	text_t tmp = "";
	425	tmp.push_back(*in);
	426
	427	// 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
	428	str.replace(hex_char, tmp);
	429
	430	++in;
	431	}
	432	}
	433
	434
[108]	435	// split up the cgi arguments
[776]	436	void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
	437	cgiargsclass &args) {
[108]	438	args.clear();
	439
[7432]	440	text_t::const_iterator here = argstr.begin();
	441	text_t::const_iterator end = argstr.end();
[30373]	442	// get seems to be not unicode, while post is, so don't want to just assume encoding is 1 (not unicode)
	443	unsigned short args_encoding = argstr.getencoding();
[30465]	444
[108]	445	text_t key, value;
[11998]	446
[108]	447	// extract out the key=value pairs
	448	while (here != end) {
	449	// get the next key and value pair
	450	here = getdelimitstr (here, end, '=', key);
	451	here = getdelimitstr (here, end, '&', value);
	452
	453	// convert %xx and + to their appropriate equivalents
[614]	454	decode_cgi_arg (value);
[26539]	455
[28841]	456	safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
[26539]	457
[30373]	458	value.setencoding(args_encoding); //1 // other encoding
[108]	459	// store this key=value pair
[764]	460	if (!key.empty()) {
[2426]	461
	462	// if arg occurs multiple times (as is the case with multiple
	463	// checkboxes using the same name) we'll create a comma separated
	464	// list of all the values (this uses a hack that encodes naturally
	465	// occurring commas as %2C - values will therefore need to be decoded
	466	// again before use) - it should use an array instead
[2417]	467	const cgiarginfo *info = argsinfo.getarginfo (key);
[12562]	468	if (info==NULL) {
	469	// If info is NULL, we can't tell if the arg is multiple value or not
	470	// Because we need to have dynamically named arguments multivalued, we
	471	// will always assume multiplevalue = true
	472	// If the arg is not multi valued, then you need to decode the commas.
	473	if (args.getarg(key)==NULL) {
[30373]	474	// encode_commas returns a text_t without encoding bit set
	475	text_t newvalue = encode_commas(value);
	476	newvalue.setencoding(args_encoding);
	477	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
[12562]	478	}
	479	else {
	480	text_t newvalue = args[key];
[2417]	481
[12562]	482	newvalue += "," + encode_commas(value);
[30373]	483	newvalue.setencoding(args_encoding); // other encoding
[12562]	484	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	485	}
[2417]	486	}
[12562]	487	else {
	488	if (info->multiplevalue) {
	489
	490	text_t newvalue = args[key];
	491	if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
	492	newvalue += encode_commas(value);
[30373]	493	newvalue.setencoding(args_encoding); // other encoding
[12562]	494	args.setarg (key, newvalue, cgiarg_t::cgi_arg);
	495
	496	} else {
	497	args.setarg (key, value, cgiarg_t::cgi_arg);
	498	}
	499	}
[764]	500	}
[108]	501	}
	502	}
	503
[2426]	504	text_t encode_commas (const text_t &intext) {
	505
	506	text_t outtext;
	507
	508	text_t::const_iterator here = intext.begin ();
	509	text_t::const_iterator end = intext.end ();
	510
	511	while (here != end) {
	512	if (*here == ',') outtext += "%2C";
	513	else outtext.push_back (*here);
[9620]	514	++here;
[2426]	515	}
	516	return outtext;
	517	}
	518
	519	text_t decode_commas (const text_t &intext) {
	520
	521	text_t outtext;
	522
	523	text_t::const_iterator here = intext.begin ();
	524	text_t::const_iterator end = intext.end ();
	525
[22942]	526	// for loop
	527	int intext_len = intext.size();
	528	for(int i = 0; i < intext_len; i++) {
	529	if ((i+2)<intext_len) {
	530	if(intext[i] == '%' && intext[i+1] == '2'
	531	&& (intext[i+2] == 'C' \|\| intext[i+2] == 'c')) {
	532	i += 2;
	533	outtext.push_back(',');
	534	continue;
	535	}
	536	}
	537	outtext.push_back (intext[i]);
	538	}
	539	return outtext;
[2426]	540	}
	541
[13456]	542	// set utf8 to true if input is in utf-8, otherwise expects input in unicode
	543	text_t minus_safe (const text_t &intext, bool utf8) {
[607]	544
	545	text_t outtext;
	546
	547	text_t::const_iterator here = intext.begin ();
	548	text_t::const_iterator end = intext.end ();
	549
	550	while (here != end) {
[1504]	551	if (*here == '-') outtext += "Zz-";
[607]	552	else outtext.push_back (*here);
[9620]	553	++here;
[607]	554	}
[13456]	555	if (utf8) {
	556	outtext = cgi_safe_utf8 (outtext);
	557	} else {
	558	outtext = cgi_safe_unicode (outtext);
	559	}
[607]	560	return outtext;
	561	}
	562
[13456]	563	// takes utf-8 input
	564	text_t cgi_safe_utf8 (const text_t &intext) {
[108]	565	text_t outtext;
	566
	567	text_t::const_iterator here = intext.begin ();
	568	text_t::const_iterator end = intext.end ();
	569	unsigned short c;
	570	text_t ttmp;
	571
	572	while (here != end) {
	573	c = *here;
	574	if (((c >= 'a') && (c <= 'z')) \|\|
	575	((c >= 'A') && (c <= 'Z')) \|\|
[474]	576	((c >= '0') && (c <= '9')) \|\|
[13456]	577	(c == '%') \|\| (c == '-')) {
[108]	578	// alphanumeric character
	579	outtext.push_back(c);
	580	} else if (c == ' ') {
	581	// space
[150]	582	outtext.push_back('+');
[13456]	583	} else if (c > 255) { // not utf-8 character
	584	cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
	585	} else {
	586	// everything else
	587	outtext.push_back('%');
	588	c2hex(c, ttmp);
	589	outtext += ttmp;
	590	}
	591
	592	++here;
	593	}
	594
	595	return outtext;
	596	}
	597	// takes unicode input
	598	text_t cgi_safe_unicode (const text_t &intext) {
	599	text_t outtext;
	600
	601	text_t::const_iterator here = intext.begin ();
	602	text_t::const_iterator end = intext.end ();
	603	unsigned short c;
	604	text_t ttmp;
	605
	606	while (here != end) {
	607	c = *here;
	608	if (((c >= 'a') && (c <= 'z')) \|\|
	609	((c >= 'A') && (c <= 'Z')) \|\|
	610	((c >= '0') && (c <= '9')) \|\|
	611	(c == '%') \|\| (c == '-')) {
	612	// alphanumeric character
	613	outtext.push_back(c);
	614	} else if (c == ' ') {
	615	// space
	616	outtext.push_back('+');
[13103]	617	} else if (c > 127) { // unicode character
[3217]	618	unsigned char buf[3]; // up to 3 bytes
	619	buf[0]='\0';buf[1]='\0';buf[2]='\0';
	620	output_utf8_char(c,buf, buf+2);
	621	outtext.push_back('%');
	622	c2hex(buf[0], ttmp);
	623	outtext += ttmp;
	624	outtext.push_back('%');
	625	c2hex(buf[1], ttmp);
	626	outtext += ttmp;
	627	if (buf[2]) {
	628	outtext.push_back('%');
	629	c2hex(buf[2], ttmp);
	630	outtext += ttmp;
	631	}
[108]	632	} else {
	633	// everything else
	634	outtext.push_back('%');
	635	c2hex(c, ttmp);
	636	outtext += ttmp;
	637	}
	638
[9620]	639	++here;
[108]	640	}
	641
	642	return outtext;
	643	}
[155]	644
	645
	646
	647
	648	static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
	649	text_t::const_iterator last,
	650	text_t &argname) {
	651	first = getdelimitstr (first, last, '-', argname);
	652	return first;
	653	}
	654
	655
	656	// check_save_conf_str checks the configuration string for
	657	// the saved args and makes sure it does not conflict with
	658	// the information about the arguments. If an error is encountered
	659	// it will return false and the program should not produce any
	660	// output.
	661	bool check_save_conf_str (const text_t &saveconf,
	662	const cgiargsinfoclass &argsinfo,
	663	ostream &logout) {
	664	outconvertclass text_t2ascii;
	665
	666	text_tset argsset;
	667	text_t::const_iterator saveconfhere = saveconf.begin ();
	668	text_t::const_iterator saveconfend = saveconf.end ();
	669	text_t argname;
	670	const cgiarginfo *info;
	671
	672	// first check to make sure all saved arguments can be saved
	673
	674	while (saveconfhere != saveconfend) {
	675	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	676
	677	if (!argname.empty()) {
	678	// save the argument name for later
	679	argsset.insert (argname);
	680
	681	// check the argument
	682	info = argsinfo.getarginfo (argname);
	683	if (info == NULL) {
	684	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	685	<< "\" is used in the configuration string for the\n"
	686	<< "saved arguments but does not exist as a valid argument.\n\n";
	687	return false;
	688	}
	689	if (info->savedarginfo == cgiarginfo::mustnot) {
	690	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	691	<< "\" is used in the configuration string for the\n"
	692	<< "saved arguments but has been specified as an argument whose\n"
	693	<< "state must not be saved.\n\n";
	694	return false;
	695	}
	696	}
	697	}
	698
	699
	700	// next check that all saved arguments that should be saved
	701	// are saved
	702	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	703	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	704
	705	while (argsinfohere != argsinfoend) {
	706	if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
	707	(argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
	708	logout << text_t2ascii << "Error: the cgi argument \""
	709	<< (*argsinfohere).second.shortname << "\" was specified as needing to\n"
	710	<< "be save but was not listed in the saved arguments.\n\n";
	711	return false;
	712	}
	713
[9620]	714	++argsinfohere;
[155]	715	}
	716
	717	return true; // made it, no clashes
	718	}
	719
	720
	721	// create_save_conf_str will create a configuration string
	722	// based on the information in argsinfo. This method of configuration
	723	// is not recomended as small changes can produce large changes in
	724	// the resulting configuration string (for instance a totally different
	725	// ordering). Only arguments which "must" be saved are included in
	726	// the resulting string.
	727	text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
	728	ostream &/logout/) {
	729	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	730	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	731	text_t saveconf;
	732	bool first = true;
	733
	734	while (argsinfohere != argsinfoend) {
	735	// save this argument if it must be saved
	736	if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
	737	if (!first) saveconf.push_back ('-');
	738	else first = false;
	739	saveconf += (*argsinfohere).second.shortname;
	740	}
	741
[9620]	742	++argsinfohere;
[155]	743	}
	744
	745	return saveconf;
	746	}
	747
	748
	749	// expand_save_args will expand the saved arguments based
	750	// on saveconf placing the results in args if they are not
	751	// already defined. If it encounters an error it will return false
	752	// and output more information to logout.
	753	bool expand_save_args (const cgiargsinfoclass &argsinfo,
	754	const text_t &saveconf,
	755	cgiargsclass &args,
	756	ostream &logout) {
	757	outconvertclass text_t2ascii;
	758
	759	text_t *arg_e = args.getarg("e");
	760	if (arg_e == NULL) return true; // no compressed arguments
	761	if (arg_e->empty()) return true; // no compressed arguments
	762
	763	text_t argname, argvalue;
	764	const cgiarginfo *argnameinfo;
	765
	766	text_t::const_iterator saveconfhere = saveconf.begin();
	767	text_t::const_iterator saveconfend = saveconf.end();
	768
[11259]	769	text_t::iterator arg_ebegin = arg_e->begin();
[155]	770	text_t::iterator arg_eend = arg_e->end();
[11259]	771	text_t::iterator arg_ehere = arg_ebegin;
[155]	772	while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
	773	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	774
	775	if (!argname.empty()) {
	776	// found another entry
	777	argnameinfo = argsinfo.getarginfo (argname);
	778
	779	if (argnameinfo == NULL) {
	780	// no information about the argument could be found
	781	// we can't keep going because we don't know whether
	782	// this argument is a single or multiple character value
	783	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	784	<< "\" was specified as being a compressed argument\n"
	785	<< "but no information about it could be found within the "
	786	<< "cgiargsinfoclass.\n";
	787	return false;
	788
	789	} else {
[294]	790
[155]	791	// found the argument information
	792	if (argnameinfo->multiplechar) {
[607]	793	text_t::const_iterator sav = arg_ehere;
[155]	794	arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
[11259]	795	if (distance(arg_ebegin, arg_ehere) > 2) {
	796	// replace any '-' chars escaped with 'Zz'
	797	bool first = true;
	798	while (((arg_ehere-3) == 'Z') && ((arg_ehere-2) == 'z')) {
	799	if (first) argvalue.clear();
[21997]	800
	801	// Hey, here's a wild idea. Why don't we check that there is
	802	// another hyphen in the cgiarge before we get a pointer to it and
	803	// add one. That way we are far less likely to wander off into
	804	// random memory merrily parsing arguments that are then lovingly
	805	// spewed all over the HTML page returned at the usage logs.
	806	text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
	807	if (minus_itr == arg_eend)
	808	{
	809	logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
	810	return false;
	811	}
	812	arg_ehere = minus_itr + 1;
	813
[11259]	814	while (sav != (arg_ehere-1)) {
	815	if (!((sav == 'Z') && ((sav+1) == 'z') && (*(sav+2) == '-')) &&
	816	!(((sav-1) == 'Z') && (sav == 'z') && ((sav+1) == '-'))) argvalue.push_back (sav);
	817	++sav;
	818	}
	819	first = false;
[607]	820	}
	821	}
[294]	822	argvalue.setencoding(1); // other encoding
[366]	823	if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
[155]	824	} else {
[366]	825	args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
[9620]	826	++arg_ehere;
[155]	827	}
	828	}
	829	}
	830	}
	831
	832	return true;
	833	}
	834
	835
	836	// adds the default values for those arguments which have not
	837	// been specified
	838	void add_default_args (const cgiargsinfoclass &argsinfo,
	839	cgiargsclass &args,
	840	ostream &/logout/) {
	841	cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
	842	cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
	843
	844	while (argsinfohere != argsinfoend) {
	845	if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
	846	args.setdefaultarg ((*argsinfohere).second.shortname,
[366]	847	(*argsinfohere).second.argdefault, cgiarg_t::default_arg);
[155]	848	}
[9620]	849	++argsinfohere;
[155]	850	}
	851	}
	852
[12513]	853	void add_fileupload_args (const cgiargsinfoclass &argsinfo,
	854	cgiargsclass &args,
	855	fileupload_tmap &fileuploads,
	856	ostream &logout) {
	857
	858	const cgiarginfo *info = argsinfo.getarginfo("a");
	859	fileupload_tmap::const_iterator this_file = fileuploads.begin();
	860	fileupload_tmap::const_iterator end_file = fileuploads.end();
	861	while (this_file != end_file) {
	862	const cgiarginfo info = argsinfo.getarginfo((this_file).first);
	863	if (info != NULL) {
[12794]	864
[12513]	865	if ((info).fileupload && (file_exists((this_file).second.tmp_name))) {
[12794]	866
[12513]	867	args.setargfile((this_file).first, (this_file).second);
	868	}
	869	}
	870	this_file++;
	871	}
	872	}
[155]	873
	874	// compress_save_args will compress the arguments and return
	875	// them in compressed_args. If an error was encountered
	876	// compressed_args will be set to to "", an error will be
	877	// written to logout, and the function will return false.
	878	bool compress_save_args (const cgiargsinfoclass &argsinfo,
	879	const text_t &saveconf,
	880	cgiargsclass &args,
	881	text_t &compressed_args,
[294]	882	outconvertclass &outconvert,
[155]	883	ostream &logout) {
	884	outconvertclass text_t2ascii;
	885
	886	compressed_args.clear();
	887
	888	text_t argname, argvalue;
	889	const cgiarginfo *argnameinfo;
	890
	891	text_t::const_iterator saveconfhere = saveconf.begin();
	892	text_t::const_iterator saveconfend = saveconf.end();
	893
	894	while (saveconfhere != saveconfend) {
	895	saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
	896
	897	if (!argname.empty()) {
	898	// found another entry
	899	argnameinfo = argsinfo.getarginfo (argname);
	900
	901	if (argnameinfo == NULL) {
	902	// no information about the argument could be found
	903	// we can't keep going because we don't know whether
	904	// this argument is a single or multiple character value
	905	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	906	<< "\" was specified as being a compressed argument\n"
	907	<< "but no information about it could be found within the "
	908	<< "cgiargsinfoclass.\n";
	909	compressed_args.clear();
	910	return false;
	911
	912	} else {
	913	// found the argument information
	914	if (argnameinfo->multiplechar) {
[607]	915	// multiple character argument -- sort out any '-' chars
[9674]	916	if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
[13456]	917	compressed_args += minus_safe (args[argname], false);
[3670]	918	else
[13456]	919	compressed_args += minus_safe (outconvert.convert(args[argname]), true);
	920
[155]	921	if (saveconfhere != saveconfend) compressed_args.push_back ('-');
	922
	923	} else {
	924	// single character argument
	925	if (args[argname].size() == 0) {
	926	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	927	<< "\" was specified as being a compressed argument which\n"
	928	<< "should have a one character value but it was empty.\n\n";
	929	compressed_args.clear ();
	930	return false;
	931
	932	} else if (args[argname].size() > 1) {
	933	logout << text_t2ascii << "Error: the cgi argument \"" << argname
	934	<< "\" was specified as being a compressed argument which\n"
	935	<< "should have a one character value but it had multiple characters.\n\n";
	936	compressed_args.clear ();
	937	return false;
	938	}
	939
	940	// everything is ok
	941	compressed_args += args[argname];
	942	}
	943	}
	944	}
	945	}
	946
	947	return true;
	948	}
	949
	950
	951	// args_tounicode converts any arguments which are not in unicode
	952	// to unicode using inconvert
	953	void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
	954	cgiargsclass::iterator here = args.begin();
	955	cgiargsclass::iterator end = args.end();
	956
	957	while (here != end) {
[366]	958	if ((*here).second.value.getencoding() > 0) {
[21961]	959	// Call reset() before converting each argument, to prevent problems when converting the last
	960	// argument left the converter in a bad state
	961	inconvert.reset();
[366]	962	(here).second.value = inconvert.convert((here).second.value);
[155]	963	}
	964
[9620]	965	++here;
[155]	966	}
	967	}
[873]	968
	969	// fcgienv will be loaded with environment name-value pairs
	970	// if using fastcgi (had to do this as getenv doesn't work
	971	// with our implementation of fastcgi). if fcgienv is empty
	972	// we'll simply use getenv
	973	text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
	974	if (fcgienv.empty()) {
	975	char *n = name.getcstr();
	976	char *v = getenv(n);
[7432]	977	delete []n;
[873]	978	if (v != NULL) return v;
[7432]	979	return g_EmptyText;
[873]	980
	981	} else return fcgienv[name];
	982	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 31425

Download in other formats: