source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 28888

Last change on this file since 28888 was 28888, checked in by ak19, 10 years ago

First security commit. 1. Introducing the new securitools.h and .cpp files, which port the functions necessary to implement security in Greenstone from OWASP-ESAPI for Java, since OWASP's C++ version is largely not yet implemented, even though their code compiles. The newly added runtime-src/packages/security which contains OWASP ESAPI for C++ will therefore be removed again shortly. 2. receptionist.cpp now sets various web-encoded variants for each cgiarg macro, such as HTML entity encoded, attr encoded, javascript encoded (and css encoded variants). These are now used in the macro files based on which variant is suited to the context. 3. This commit further contains the minimum changes to protect the c, d, and p cgi variables.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 29.2 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "fileutil.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "unitool.h" // in mg, for output_utf8_char
31#include <cstdlib>
32#include <time.h>
33
34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\iostream>
36# include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38# include <iostream.h>
39# include <fstream.h>
40#else
41# include <iostream>
42# include <fstream>
43#endif
44
45// set to false to undo security changes (url-encoding arguments)
46static bool do_safe_cgi_args = false;
47
48static unsigned short hexdigit (unsigned short c) {
49 if (c >= '0' && c <= '9') return (c-'0');
50 if (c >= 'a' && c <= 'f') return (c-'a'+10);
51 if (c >= 'A' && c <= 'F') return (c-'A'+10);
52 return c;
53}
54
55
56static void c2hex (unsigned short c, text_t &t) {
57 t.clear();
58
59 if (c >= 256) {
60 t = "20"; // ' '
61 return;
62 }
63
64 unsigned short o1, o2;
65
66 o1 = (c/16) % 16;
67 o2 = c % 16;
68 if (o1 >= 10) o1 += 'a' - 10;
69 else o1 += '0';
70 if (o2 >= 10) o2 += 'a' - 10;
71 else o2 += '0';
72
73 t.push_back(o1);
74 t.push_back(o2);
75}
76
77static text_t::iterator getline (text_t::iterator first,
78 text_t::iterator last,
79 bool include_crlf) {
80 while (first != last) {
81 if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
82 // found <CRLF>
83 if (include_crlf) first += 2;
84 break;
85 }
86
87 first++;
88 }
89
90 return first;
91}
92
93static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
94 text_t &filetype, bool &isfile, text_t &argstr,
95 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
96
97 if (!argname.empty()) {
98
99 if (!isfile) {
100 // argdata includes a trailing <CRLF> that we must remove
101 if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
102 argdata.erase(argdata.end()-2, argdata.end());
103 }
104 if (!argstr.empty()) argstr += "&";
105 argstr += argname + "=" + argdata;
106
107 } else if (!filename.empty()) {
108 // filedata includes a trailing <CRLF> that we must remove
109 if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
110 filedata.erase(filedata.end()-2, filedata.end());
111 }
112
113 // create tmp_name for storing the file on disk, using the current timestamp
114 text_t tmp_name(time(NULL));
115 tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
116
117 char *tmp_name_c = tmp_name.getcstr();
118
119 // write the file data to disk
120 outconvertclass out;
121 ofstream filestream(tmp_name_c, ios::out | ios::binary);
122 filestream << out << filedata;
123 filestream.close();
124 delete tmp_name_c;
125
126 // populate the fields of a fileupload_t and put it in the
127 // fileuploads map
128 fileupload_t fu;
129 // note that filename currently may or may not include the path since
130 // some browsers (e.g. IE) include the path while others
131 // (e.g. mozilla) do not. we should probably remove the path from
132 // this field here to get a consistent value across all browsers.
133 text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
134 if (slash != filename.end()) {
135 filename = substr(slash+1, filename.end());
136 }
137 fu.name = filename;
138 fu.type = filetype;
139 // size has yet to be implemented
140 fu.size = filedata.size();
141
142 fu.tmp_name = tmp_name;
143 fileuploads[argname] = fu;
144 }
145 }
146 isfile = false;
147 argname.clear();
148 argdata.clear();
149 filename.clear();
150 filedata.clear();
151 filetype.clear();
152}
153
154// parse data obtained through a CGI POST request
155text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
156 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
157
158 text_t argstr;
159
160 text_t::iterator content_type_begin = content_type.begin();
161 text_t::iterator content_type_end = content_type.end();
162 if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
163 // a simple post request
164
165 return raw_post_data;
166
167 } else {
168 // multipart/form data - may contain one or more uploaded files
169
170 /*
171 content_type should look something like the following
172 multipart/form-data; boundary=---------------------------7d411e1a50330
173
174 while raw_post_data will be as follows
175 -----------------------------7d43e73450330CRLF
176 Content-Disposition: form-data; name="e"<CRLF>
177 <CRLF>
178 d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
179 -----------------------------7d43e73450330<CRLF>
180 Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
181 Content-Type: application/msword<CRLF>
182 <CRLF>
183 <Content of file><CRLF>
184
185 */
186
187 // first get the boundary from content-type
188 text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
189 if (boundary_begin+9 < content_type_end)
190 {
191 // skip over "boundary=" part of string
192 boundary_begin += 9;
193 }
194 else {
195 // error
196 cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
197 return "";
198 }
199 text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
200 int boundary_len = boundary.size();
201
202
203 text_t argname, argdata, filename, filedata, filetype;
204 bool isfile = false;
205 text_t::iterator data_here = raw_post_data.begin();
206 text_t::iterator data_end = raw_post_data.end();
207 while (data_here != data_end) {
208
209 // get the next available line (including the trailing <CRLF>
210 text_t line = substr(data_here, getline(data_here, data_end, true));
211
212 data_here += line.size();
213 text_t::iterator line_begin = line.begin();
214 text_t::iterator line_end = line.end();
215 if (findword(line_begin, line_end, boundary) != line_end) {
216 // we've found a boundary
217 process_post_section(argname, argdata, filename, filedata, filetype,
218 isfile, argstr, fileuploads, gsdlhome);
219
220 } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
221 // we've found the the beginning of a new section
222 argname.clear();
223 argdata.clear();
224
225 // get the name of this piece of form data
226 text_t::iterator it = findword(line_begin, line_end, "name=\"");
227 if (it == line_end) break; // error - this shouldn't happen
228 it = findchar(it, line_end, '"');
229 if ((it != line_end) && (it+1 != line_end)) {
230 argname = substr(it+1, findchar(it+1, line_end, '"'));
231 }
232
233 // if this piece of form data contains filename="" it's a file
234 // upload and needs to be treated special
235 it = (findword(line_begin, line_end, "filename=\""));
236 if (it != line_end) {
237 // we've found a file upload
238 isfile = true;
239 it = findchar(it, line_end, '"');
240 if ((it != line_end) && (it+1 != line_end)) {
241 filename = substr(it+1, findchar(it+1, line_end, '"'));
242 }
243
244 // the next line is the content-type of this section
245 line = substr(data_here, getline(data_here, data_end, true));
246 data_here += line.size();
247 line_begin = line.begin();
248 line_end = line.end();
249 it = (findword(line_begin, line_end, "Content-Type: "));
250 if (it != line_end) {
251 filetype = substr(it+14, getline(it, line_end, false));
252 }
253 }
254
255 // eat up the next line as it's just a <CRLF> on it's own
256 data_here += 2;
257
258 } else {
259 if (isfile) filedata += line;
260 else argdata += line;
261 }
262
263 }
264
265 // process last section
266 process_post_section(argname, argdata, filename, filedata, filetype,
267 isfile, argstr, fileuploads, gsdlhome);
268
269 return argstr;
270 }
271}
272
273// convert %xx and + to their appropriate equivalents
274// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
275// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
276// actually was, then this returns utf-8, and needs to_uni on the
277// result to get it back to unicode. If the encoding wasn't utf-8, then the
278// output may be crap. Seems to work for 8 bit encodings.
279// Really, this should be given the encoding, and should always return unicode.
280void decode_cgi_arg (text_t &argstr) {
281 text_t::iterator in = argstr.begin();
282 text_t::iterator out = in;
283 text_t::iterator end = argstr.end();
284
285 while (in != end) {
286 if (*in == '+') *out = ' ';
287
288 else if (*in == '%') {
289 unsigned short c = '%';
290 ++in;
291 if (in != end) { // this is an encoding...
292 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
293 // this assumes a short int is at least 16 bits...
294 ++in;
295 if (in != end)
296 c=hexdigit(*in++) << 12;
297 if (in != end)
298 c+=hexdigit(*in++) << 8;
299 if (in != end)
300 c+=hexdigit(*in++) << 4;
301 if (in != end)
302 c+=hexdigit(*in);
303 /* BAD!! The following assumes the interface is using utf-8. But
304 at this point we don't know what encoding we are using, unless
305 we can parse it out of the string we are currently decoding... */
306 text_t uni=" ";
307 uni[0]=c;
308 text_t utf8=to_utf8(uni);
309 int last_byte=utf8.size()-1;
310 for (int i=0;i<last_byte;++i)
311 *out++ = utf8[i];
312 c=utf8[last_byte];
313 } else { // convert %HH to hex value
314 c = hexdigit (*in);
315 ++in;
316 if (in != end && c < 16) { // sanity check on the previous character
317 c = c*16 + hexdigit (*in);
318 }
319 }
320 }
321 *out = c;
322 } else *out = *in;
323
324 if (in != end) ++in;
325 ++out;
326 }
327
328 // remove the excess characters
329 argstr.erase (out, end);
330
331}
332
333// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
334// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
335// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
336// apache log and that log file can be included in a local file inclusion (LFI) or
337// remote file include (RFI) attack.
338// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
339// break out of an html/XML/javascript context.
340void safe_cgi_arg (const text_t &key, text_t &argstr) {
341 if(!do_safe_cgi_args) {
342 return;
343 }
344
345 text_t::iterator in = argstr.begin();
346 text_t out = "";
347 text_t::iterator end = argstr.end();
348
349 while (in != end) {
350 if (*in == '<') out += "%3C";
351 else if (*in == '>') out += "%3E";
352 else if (*in == '&') out += "%26";
353 else if (*in == '\"') out += "%22";
354 else if (*in == '\'') out += "%27";
355 //else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
356 else { // append whatever char is in *in, but as a char, not int
357 //out += *in; // appends as int
358 out.push_back(*in);
359 }
360 ++in;
361 }
362
363 argstr.erase (argstr.begin(), end);
364 argstr += out;
365}
366
367
368// given a list of characters (or "all") to decode, and given the string, str, where those
369// characters are to be decoded, this method replaces any occurrences of the url-encoded
370// variants of those characters with their actual characters in the given string str.
371void unsafe_cgi_arg(const text_t &chars, text_t &str) {
372 if(!do_safe_cgi_args) {
373 return;
374 }
375
376 text_t allchars = "<>&\"\'/";
377
378 text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars;
379
380 text_t::iterator in = chars_to_decode.begin();
381 text_t::iterator end = chars_to_decode.end();
382
383 char hex_char[4];
384
385 // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
386
387 while (in != end) {
388
389 // *in is a character from the accepted list of chars_to_decode list
390
391 // 1. create the url-encoded value of the char *in in variable hex_char
392 // sprintf adds in a null byte at the end
393 sprintf(hex_char,"%%%02X",*in);
394
395 // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
396 text_t tmp = "";
397 tmp.push_back(*in);
398
399 // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
400 str.replace(hex_char, tmp);
401
402 ++in;
403 }
404}
405
406
407// split up the cgi arguments
408void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
409 cgiargsclass &args) {
410 args.clear();
411
412 text_t::const_iterator here = argstr.begin();
413 text_t::const_iterator end = argstr.end();
414 text_t key, value;
415
416 // extract out the key=value pairs
417 while (here != end) {
418 // get the next key and value pair
419 here = getdelimitstr (here, end, '=', key);
420 here = getdelimitstr (here, end, '&', value);
421
422 // convert %xx and + to their appropriate equivalents
423 decode_cgi_arg (value);
424
425 safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
426
427 value.setencoding(1); // other encoding
428 // store this key=value pair
429 if (!key.empty()) {
430
431 // if arg occurs multiple times (as is the case with multiple
432 // checkboxes using the same name) we'll create a comma separated
433 // list of all the values (this uses a hack that encodes naturally
434 // occurring commas as %2C - values will therefore need to be decoded
435 // again before use) - it should use an array instead
436 const cgiarginfo *info = argsinfo.getarginfo (key);
437 if (info==NULL) {
438 // If info is NULL, we can't tell if the arg is multiple value or not
439 // Because we need to have dynamically named arguments multivalued, we
440 // will always assume multiplevalue = true
441 // If the arg is not multi valued, then you need to decode the commas.
442 if (args.getarg(key)==NULL) {
443 args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
444 }
445 else {
446 text_t newvalue = args[key];
447
448 newvalue += "," + encode_commas(value);
449 newvalue.setencoding(1); // other encoding
450 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
451 }
452 }
453 else {
454 if (info->multiplevalue) {
455
456 text_t newvalue = args[key];
457 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
458 newvalue += encode_commas(value);
459 newvalue.setencoding(1); // other encoding
460 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
461
462 } else {
463 args.setarg (key, value, cgiarg_t::cgi_arg);
464 }
465 }
466 }
467 }
468}
469
470text_t encode_commas (const text_t &intext) {
471
472 text_t outtext;
473
474 text_t::const_iterator here = intext.begin ();
475 text_t::const_iterator end = intext.end ();
476
477 while (here != end) {
478 if (*here == ',') outtext += "%2C";
479 else outtext.push_back (*here);
480 ++here;
481 }
482 return outtext;
483}
484
485text_t decode_commas (const text_t &intext) {
486
487 text_t outtext;
488
489 text_t::const_iterator here = intext.begin ();
490 text_t::const_iterator end = intext.end ();
491
492 // for loop
493 int intext_len = intext.size();
494 for(int i = 0; i < intext_len; i++) {
495 if ((i+2)<intext_len) {
496 if(intext[i] == '%' && intext[i+1] == '2'
497 && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
498 i += 2;
499 outtext.push_back(',');
500 continue;
501 }
502 }
503 outtext.push_back (intext[i]);
504 }
505 return outtext;
506}
507
508// set utf8 to true if input is in utf-8, otherwise expects input in unicode
509text_t minus_safe (const text_t &intext, bool utf8) {
510
511 text_t outtext;
512
513 text_t::const_iterator here = intext.begin ();
514 text_t::const_iterator end = intext.end ();
515
516 while (here != end) {
517 if (*here == '-') outtext += "Zz-";
518 else outtext.push_back (*here);
519 ++here;
520 }
521 if (utf8) {
522 outtext = cgi_safe_utf8 (outtext);
523 } else {
524 outtext = cgi_safe_unicode (outtext);
525 }
526 return outtext;
527}
528
529// takes utf-8 input
530text_t cgi_safe_utf8 (const text_t &intext) {
531 text_t outtext;
532
533 text_t::const_iterator here = intext.begin ();
534 text_t::const_iterator end = intext.end ();
535 unsigned short c;
536 text_t ttmp;
537
538 while (here != end) {
539 c = *here;
540 if (((c >= 'a') && (c <= 'z')) ||
541 ((c >= 'A') && (c <= 'Z')) ||
542 ((c >= '0') && (c <= '9')) ||
543 (c == '%') || (c == '-')) {
544 // alphanumeric character
545 outtext.push_back(c);
546 } else if (c == ' ') {
547 // space
548 outtext.push_back('+');
549 } else if (c > 255) { // not utf-8 character
550 cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
551 } else {
552 // everything else
553 outtext.push_back('%');
554 c2hex(c, ttmp);
555 outtext += ttmp;
556 }
557
558 ++here;
559 }
560
561 return outtext;
562}
563// takes unicode input
564text_t cgi_safe_unicode (const text_t &intext) {
565 text_t outtext;
566
567 text_t::const_iterator here = intext.begin ();
568 text_t::const_iterator end = intext.end ();
569 unsigned short c;
570 text_t ttmp;
571
572 while (here != end) {
573 c = *here;
574 if (((c >= 'a') && (c <= 'z')) ||
575 ((c >= 'A') && (c <= 'Z')) ||
576 ((c >= '0') && (c <= '9')) ||
577 (c == '%') || (c == '-')) {
578 // alphanumeric character
579 outtext.push_back(c);
580 } else if (c == ' ') {
581 // space
582 outtext.push_back('+');
583 } else if (c > 127) { // unicode character
584 unsigned char buf[3]; // up to 3 bytes
585 buf[0]='\0';buf[1]='\0';buf[2]='\0';
586 output_utf8_char(c,buf, buf+2);
587 outtext.push_back('%');
588 c2hex(buf[0], ttmp);
589 outtext += ttmp;
590 outtext.push_back('%');
591 c2hex(buf[1], ttmp);
592 outtext += ttmp;
593 if (buf[2]) {
594 outtext.push_back('%');
595 c2hex(buf[2], ttmp);
596 outtext += ttmp;
597 }
598 } else {
599 // everything else
600 outtext.push_back('%');
601 c2hex(c, ttmp);
602 outtext += ttmp;
603 }
604
605 ++here;
606 }
607
608 return outtext;
609}
610
611
612
613
614static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
615 text_t::const_iterator last,
616 text_t &argname) {
617 first = getdelimitstr (first, last, '-', argname);
618 return first;
619}
620
621
622// check_save_conf_str checks the configuration string for
623// the saved args and makes sure it does not conflict with
624// the information about the arguments. If an error is encountered
625// it will return false and the program should not produce any
626// output.
627bool check_save_conf_str (const text_t &saveconf,
628 const cgiargsinfoclass &argsinfo,
629 ostream &logout) {
630 outconvertclass text_t2ascii;
631
632 text_tset argsset;
633 text_t::const_iterator saveconfhere = saveconf.begin ();
634 text_t::const_iterator saveconfend = saveconf.end ();
635 text_t argname;
636 const cgiarginfo *info;
637
638 // first check to make sure all saved arguments can be saved
639
640 while (saveconfhere != saveconfend) {
641 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
642
643 if (!argname.empty()) {
644 // save the argument name for later
645 argsset.insert (argname);
646
647 // check the argument
648 info = argsinfo.getarginfo (argname);
649 if (info == NULL) {
650 logout << text_t2ascii << "Error: the cgi argument \"" << argname
651 << "\" is used in the configuration string for the\n"
652 << "saved arguments but does not exist as a valid argument.\n\n";
653 return false;
654 }
655 if (info->savedarginfo == cgiarginfo::mustnot) {
656 logout << text_t2ascii << "Error: the cgi argument \"" << argname
657 << "\" is used in the configuration string for the\n"
658 << "saved arguments but has been specified as an argument whose\n"
659 << "state must not be saved.\n\n";
660 return false;
661 }
662 }
663 }
664
665
666 // next check that all saved arguments that should be saved
667 // are saved
668 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
669 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
670
671 while (argsinfohere != argsinfoend) {
672 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
673 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
674 logout << text_t2ascii << "Error: the cgi argument \""
675 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
676 << "be save but was not listed in the saved arguments.\n\n";
677 return false;
678 }
679
680 ++argsinfohere;
681 }
682
683 return true; // made it, no clashes
684}
685
686
687// create_save_conf_str will create a configuration string
688// based on the information in argsinfo. This method of configuration
689// is not recomended as small changes can produce large changes in
690// the resulting configuration string (for instance a totally different
691// ordering). Only arguments which "must" be saved are included in
692// the resulting string.
693text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
694 ostream &/*logout*/) {
695 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
696 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
697 text_t saveconf;
698 bool first = true;
699
700 while (argsinfohere != argsinfoend) {
701 // save this argument if it must be saved
702 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
703 if (!first) saveconf.push_back ('-');
704 else first = false;
705 saveconf += (*argsinfohere).second.shortname;
706 }
707
708 ++argsinfohere;
709 }
710
711 return saveconf;
712}
713
714
715// expand_save_args will expand the saved arguments based
716// on saveconf placing the results in args if they are not
717// already defined. If it encounters an error it will return false
718// and output more information to logout.
719bool expand_save_args (const cgiargsinfoclass &argsinfo,
720 const text_t &saveconf,
721 cgiargsclass &args,
722 ostream &logout) {
723 outconvertclass text_t2ascii;
724
725 text_t *arg_e = args.getarg("e");
726 if (arg_e == NULL) return true; // no compressed arguments
727 if (arg_e->empty()) return true; // no compressed arguments
728
729 text_t argname, argvalue;
730 const cgiarginfo *argnameinfo;
731
732 text_t::const_iterator saveconfhere = saveconf.begin();
733 text_t::const_iterator saveconfend = saveconf.end();
734
735 text_t::iterator arg_ebegin = arg_e->begin();
736 text_t::iterator arg_eend = arg_e->end();
737 text_t::iterator arg_ehere = arg_ebegin;
738 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
739 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
740
741 if (!argname.empty()) {
742 // found another entry
743 argnameinfo = argsinfo.getarginfo (argname);
744
745 if (argnameinfo == NULL) {
746 // no information about the argument could be found
747 // we can't keep going because we don't know whether
748 // this argument is a single or multiple character value
749 logout << text_t2ascii << "Error: the cgi argument \"" << argname
750 << "\" was specified as being a compressed argument\n"
751 << "but no information about it could be found within the "
752 << "cgiargsinfoclass.\n";
753 return false;
754
755 } else {
756
757 // found the argument information
758 if (argnameinfo->multiplechar) {
759 text_t::const_iterator sav = arg_ehere;
760 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
761 if (distance(arg_ebegin, arg_ehere) > 2) {
762 // replace any '-' chars escaped with 'Zz'
763 bool first = true;
764 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
765 if (first) argvalue.clear();
766
767 // Hey, here's a wild idea. Why don't we check that there is
768 // another hyphen in the cgiarge before we get a pointer to it and
769 // add one. That way we are far less likely to wander off into
770 // random memory merrily parsing arguments that are then lovingly
771 // spewed all over the HTML page returned at the usage logs.
772 text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
773 if (minus_itr == arg_eend)
774 {
775 logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
776 return false;
777 }
778 arg_ehere = minus_itr + 1;
779
780 while (sav != (arg_ehere-1)) {
781 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
782 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
783 ++sav;
784 }
785 first = false;
786 }
787 }
788 argvalue.setencoding(1); // other encoding
789 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
790 } else {
791 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
792 ++arg_ehere;
793 }
794 }
795 }
796 }
797
798 return true;
799}
800
801
802// adds the default values for those arguments which have not
803// been specified
804void add_default_args (const cgiargsinfoclass &argsinfo,
805 cgiargsclass &args,
806 ostream &/*logout*/) {
807 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
808 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
809
810 while (argsinfohere != argsinfoend) {
811 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
812 args.setdefaultarg ((*argsinfohere).second.shortname,
813 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
814 }
815 ++argsinfohere;
816 }
817}
818
819void add_fileupload_args (const cgiargsinfoclass &argsinfo,
820 cgiargsclass &args,
821 fileupload_tmap &fileuploads,
822 ostream &logout) {
823
824 const cgiarginfo *info = argsinfo.getarginfo("a");
825 fileupload_tmap::const_iterator this_file = fileuploads.begin();
826 fileupload_tmap::const_iterator end_file = fileuploads.end();
827 while (this_file != end_file) {
828 const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
829 if (info != NULL) {
830
831 if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
832
833 args.setargfile((*this_file).first, (*this_file).second);
834 }
835 }
836 this_file++;
837 }
838}
839
840// compress_save_args will compress the arguments and return
841// them in compressed_args. If an error was encountered
842// compressed_args will be set to to "", an error will be
843// written to logout, and the function will return false.
844bool compress_save_args (const cgiargsinfoclass &argsinfo,
845 const text_t &saveconf,
846 cgiargsclass &args,
847 text_t &compressed_args,
848 outconvertclass &outconvert,
849 ostream &logout) {
850 outconvertclass text_t2ascii;
851
852 compressed_args.clear();
853
854 text_t argname, argvalue;
855 const cgiarginfo *argnameinfo;
856
857 text_t::const_iterator saveconfhere = saveconf.begin();
858 text_t::const_iterator saveconfend = saveconf.end();
859
860 while (saveconfhere != saveconfend) {
861 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
862
863 if (!argname.empty()) {
864 // found another entry
865 argnameinfo = argsinfo.getarginfo (argname);
866
867 if (argnameinfo == NULL) {
868 // no information about the argument could be found
869 // we can't keep going because we don't know whether
870 // this argument is a single or multiple character value
871 logout << text_t2ascii << "Error: the cgi argument \"" << argname
872 << "\" was specified as being a compressed argument\n"
873 << "but no information about it could be found within the "
874 << "cgiargsinfoclass.\n";
875 compressed_args.clear();
876 return false;
877
878 } else {
879 // found the argument information
880 if (argnameinfo->multiplechar) {
881 // multiple character argument -- sort out any '-' chars
882 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
883 compressed_args += minus_safe (args[argname], false);
884 else
885 compressed_args += minus_safe (outconvert.convert(args[argname]), true);
886
887 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
888
889 } else {
890 // single character argument
891 if (args[argname].size() == 0) {
892 logout << text_t2ascii << "Error: the cgi argument \"" << argname
893 << "\" was specified as being a compressed argument which\n"
894 << "should have a one character value but it was empty.\n\n";
895 compressed_args.clear ();
896 return false;
897
898 } else if (args[argname].size() > 1) {
899 logout << text_t2ascii << "Error: the cgi argument \"" << argname
900 << "\" was specified as being a compressed argument which\n"
901 << "should have a one character value but it had multiple characters.\n\n";
902 compressed_args.clear ();
903 return false;
904 }
905
906 // everything is ok
907 compressed_args += args[argname];
908 }
909 }
910 }
911 }
912
913 return true;
914}
915
916
917// args_tounicode converts any arguments which are not in unicode
918// to unicode using inconvert
919void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
920 cgiargsclass::iterator here = args.begin();
921 cgiargsclass::iterator end = args.end();
922
923 while (here != end) {
924 if ((*here).second.value.getencoding() > 0) {
925 // Call reset() before converting each argument, to prevent problems when converting the last
926 // argument left the converter in a bad state
927 inconvert.reset();
928 (*here).second.value = inconvert.convert((*here).second.value);
929 }
930
931 ++here;
932 }
933}
934
935// fcgienv will be loaded with environment name-value pairs
936// if using fastcgi (had to do this as getenv doesn't work
937// with our implementation of fastcgi). if fcgienv is empty
938// we'll simply use getenv
939text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
940 if (fcgienv.empty()) {
941 char *n = name.getcstr();
942 char *v = getenv(n);
943 delete []n;
944 if (v != NULL) return v;
945 return g_EmptyText;
946
947 } else return fcgienv[name];
948}
Note: See TracBrowser for help on using the repository browser.