source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 30465

Last change on this file since 30465 was 30465, checked in by kjdon, 8 years ago

fixes for depositor. when getting the post ata and putting it together into form data, need to escape cgi args special characters

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 30.3 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "fileutil.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "unitool.h" // in mg, for output_utf8_char
31#include <cstdlib>
32#include <time.h>
33
34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\iostream>
36# include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38# include <iostream.h>
39# include <fstream.h>
40#else
41# include <iostream>
42# include <fstream>
43#endif
44
45// set to false to undo security changes (url-encoding arguments)
46static bool do_safe_cgi_args = false;
47
48static unsigned short hexdigit (unsigned short c) {
49 if (c >= '0' && c <= '9') return (c-'0');
50 if (c >= 'a' && c <= 'f') return (c-'a'+10);
51 if (c >= 'A' && c <= 'F') return (c-'A'+10);
52 return c;
53}
54
55
56static void c2hex (unsigned short c, text_t &t) {
57 t.clear();
58
59 if (c >= 256) {
60 t = "20"; // ' '
61 return;
62 }
63
64 unsigned short o1, o2;
65
66 o1 = (c/16) % 16;
67 o2 = c % 16;
68 if (o1 >= 10) o1 += 'a' - 10;
69 else o1 += '0';
70 if (o2 >= 10) o2 += 'a' - 10;
71 else o2 += '0';
72
73 t.push_back(o1);
74 t.push_back(o2);
75}
76
77static text_t::iterator getline (text_t::iterator first,
78 text_t::iterator last,
79 bool include_crlf) {
80 while (first != last) {
81 if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
82 // found <CRLF>
83 if (include_crlf) first += 2;
84 break;
85 }
86
87 first++;
88 }
89
90 return first;
91}
92
93static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
94 text_t &filetype, bool &isfile, text_t &argstr,
95 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
96
97 if (!argname.empty()) {
98
99 if (!isfile) {
100 // argdata includes a trailing <CRLF> that we must remove
101 if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
102 argdata.erase(argdata.end()-2, argdata.end());
103 }
104 if (!argstr.empty()) argstr += "&";
105
106 // we need to convert arg to cgi safe variant - escape '&' and '%', '+', '=', turn space to +
107 cgi_safe_post_arg(argdata);
108 argstr += argname + "=" + argdata;
109
110 } else if (!filename.empty()) {
111 // filedata includes a trailing <CRLF> that we must remove
112 if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
113 filedata.erase(filedata.end()-2, filedata.end());
114 }
115
116 // create tmp_name for storing the file on disk, using the current timestamp
117 text_t tmp_name(time(NULL));
118 tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
119
120 char *tmp_name_c = tmp_name.getcstr();
121
122 // write the file data to disk
123 outconvertclass out;
124 ofstream filestream(tmp_name_c, ios::out | ios::binary);
125 filestream << out << filedata;
126 filestream.close();
127 delete tmp_name_c;
128
129 // populate the fields of a fileupload_t and put it in the
130 // fileuploads map
131 fileupload_t fu;
132 // note that filename currently may or may not include the path since
133 // some browsers (e.g. IE) include the path while others
134 // (e.g. mozilla) do not. we should probably remove the path from
135 // this field here to get a consistent value across all browsers.
136 text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
137 if (slash != filename.end()) {
138 filename = substr(slash+1, filename.end());
139 }
140 fu.name = filename;
141 fu.type = filetype;
142 // size has yet to be implemented
143 fu.size = filedata.size();
144
145 fu.tmp_name = tmp_name;
146 fileuploads[argname] = fu;
147 }
148 }
149 isfile = false;
150 argname.clear();
151 argdata.clear();
152 filename.clear();
153 filedata.clear();
154 filetype.clear();
155}
156
157// parse data obtained through a CGI POST request
158text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
159 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
160
161 text_t argstr;
162
163 text_t::iterator content_type_begin = content_type.begin();
164 text_t::iterator content_type_end = content_type.end();
165 if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
166 // a simple post request
167 return raw_post_data;
168
169 } else {
170 // multipart/form data - may contain one or more uploaded files
171
172 /*
173 content_type should look something like the following
174 multipart/form-data; boundary=---------------------------7d411e1a50330
175
176 while raw_post_data will be as follows
177 -----------------------------7d43e73450330CRLF
178 Content-Disposition: form-data; name="e"<CRLF>
179 <CRLF>
180 d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
181 -----------------------------7d43e73450330<CRLF>
182 Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
183 Content-Type: application/msword<CRLF>
184 <CRLF>
185 <Content of file><CRLF>
186
187 */
188
189 // first get the boundary from content-type
190 text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
191 if (boundary_begin+9 < content_type_end)
192 {
193 // skip over "boundary=" part of string
194 boundary_begin += 9;
195 }
196 else {
197 // error
198 cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
199 return "";
200 }
201 text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
202 int boundary_len = boundary.size();
203
204
205 text_t argname, argdata, filename, filedata, filetype;
206 bool isfile = false;
207 text_t::iterator data_here = raw_post_data.begin();
208 text_t::iterator data_end = raw_post_data.end();
209 while (data_here != data_end) {
210
211 // get the next available line (including the trailing <CRLF>
212 text_t line = substr(data_here, getline(data_here, data_end, true));
213
214 data_here += line.size();
215 text_t::iterator line_begin = line.begin();
216 text_t::iterator line_end = line.end();
217 if (findword(line_begin, line_end, boundary) != line_end) {
218 // we've found a boundary
219 process_post_section(argname, argdata, filename, filedata, filetype,
220 isfile, argstr, fileuploads, gsdlhome);
221
222 } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
223 // we've found the the beginning of a new section
224 argname.clear();
225 argdata.clear();
226
227 // get the name of this piece of form data
228 text_t::iterator it = findword(line_begin, line_end, "name=\"");
229 if (it == line_end) break; // error - this shouldn't happen
230 it = findchar(it, line_end, '"');
231 if ((it != line_end) && (it+1 != line_end)) {
232 argname = substr(it+1, findchar(it+1, line_end, '"'));
233 }
234
235 // if this piece of form data contains filename="" it's a file
236 // upload and needs to be treated special
237 it = (findword(line_begin, line_end, "filename=\""));
238 if (it != line_end) {
239 // we've found a file upload
240 isfile = true;
241 it = findchar(it, line_end, '"');
242 if ((it != line_end) && (it+1 != line_end)) {
243 filename = substr(it+1, findchar(it+1, line_end, '"'));
244 }
245
246 // the next line is the content-type of this section
247 line = substr(data_here, getline(data_here, data_end, true));
248 data_here += line.size();
249 line_begin = line.begin();
250 line_end = line.end();
251 it = (findword(line_begin, line_end, "Content-Type: "));
252 if (it != line_end) {
253 filetype = substr(it+14, getline(it, line_end, false));
254 }
255 }
256
257 // eat up the next line as it's just a <CRLF> on it's own
258 data_here += 2;
259
260 } else {
261 if (isfile) filedata += line;
262 else argdata += line;
263 }
264
265 }
266
267 // process last section
268 process_post_section(argname, argdata, filename, filedata, filetype,
269 isfile, argstr, fileuploads, gsdlhome);
270
271 return argstr;
272 }
273}
274
275// convert %xx and + to their appropriate equivalents
276// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
277// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
278// actually was, then this returns utf-8, and needs to_uni on the
279// result to get it back to unicode. If the encoding wasn't utf-8, then the
280// output may be crap. Seems to work for 8 bit encodings.
281// Really, this should be given the encoding, and should always return unicode.
282void decode_cgi_arg (text_t &argstr) {
283 text_t::iterator in = argstr.begin();
284 text_t::iterator out = in;
285 text_t::iterator end = argstr.end();
286
287 while (in != end) {
288 if (*in == '+') *out = ' ';
289
290 else if (*in == '%') {
291 unsigned short c = '%';
292 ++in;
293 if (in != end) { // this is an encoding...
294 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
295 // this assumes a short int is at least 16 bits...
296 ++in;
297 if (in != end)
298 c=hexdigit(*in++) << 12;
299 if (in != end)
300 c+=hexdigit(*in++) << 8;
301 if (in != end)
302 c+=hexdigit(*in++) << 4;
303 if (in != end)
304 c+=hexdigit(*in);
305 /* BAD!! The following assumes the interface is using utf-8. But
306 at this point we don't know what encoding we are using, unless
307 we can parse it out of the string we are currently decoding... */
308 text_t uni=" ";
309 uni[0]=c;
310 text_t utf8=to_utf8(uni);
311 int last_byte=utf8.size()-1;
312 for (int i=0;i<last_byte;++i)
313 *out++ = utf8[i];
314 c=utf8[last_byte];
315 } else { // convert %HH to hex value
316 c = hexdigit (*in);
317 ++in;
318 if (in != end && c < 16) { // sanity check on the previous character
319 c = c*16 + hexdigit (*in);
320 }
321 }
322 }
323 *out = c;
324 } else *out = *in;
325
326 if (in != end) ++in;
327 ++out;
328 }
329
330 // remove the excess characters
331 argstr.erase (out, end);
332
333}
334
335//Need to escape special chars in post data so they don't interfere with arg parsing once its a get style string
336void cgi_safe_post_arg(text_t &argstr) {
337
338 text_t::iterator in = argstr.begin();
339 text_t out = "";
340 text_t::iterator end = argstr.end();
341
342 while (in != end) {
343 if (*in == '&') out += "%26";
344 else if (*in == '%') out += "%2525";
345 else if (*in == '+') out += "%2B";
346 else if (*in == '=') out += "%3D";
347 else if (*in == ' ') out += "+";
348 else { // append whatever char is in *in, but as a char, not int
349 //out += *in; // appends as int
350 out.push_back(*in);
351 }
352 ++in;
353 }
354
355 argstr.erase (argstr.begin(), end);
356 argstr += out;
357}
358
359
360
361// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
362// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
363// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
364// apache log and that log file can be included in a local file inclusion (LFI) or
365// remote file include (RFI) attack.
366// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
367// break out of an html/XML/javascript context.
368void safe_cgi_arg (const text_t &key, text_t &argstr) {
369 if(!do_safe_cgi_args) {
370 return;
371 }
372
373 text_t::iterator in = argstr.begin();
374 text_t out = "";
375 text_t::iterator end = argstr.end();
376
377 while (in != end) {
378 if (*in == '<') out += "%3C";
379 else if (*in == '>') out += "%3E";
380 else if (*in == '&') out += "%26";
381 else if (*in == '\"') out += "%22";
382 else if (*in == '\'') out += "%27";
383 //else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
384 else { // append whatever char is in *in, but as a char, not int
385 //out += *in; // appends as int
386 out.push_back(*in);
387 }
388 ++in;
389 }
390
391 argstr.erase (argstr.begin(), end);
392 argstr += out;
393}
394
395
396// given a list of characters (or "all") to decode, and given the string, str, where those
397// characters are to be decoded, this method replaces any occurrences of the url-encoded
398// variants of those characters with their actual characters in the given string str.
399void unsafe_cgi_arg(const text_t &chars, text_t &str) {
400 if(!do_safe_cgi_args) {
401 return;
402 }
403
404 text_t allchars = "<>&\"\'/";
405
406 text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars;
407
408 text_t::iterator in = chars_to_decode.begin();
409 text_t::iterator end = chars_to_decode.end();
410
411 char hex_char[4];
412
413 // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
414
415 while (in != end) {
416
417 // *in is a character from the accepted list of chars_to_decode list
418
419 // 1. create the url-encoded value of the char *in in variable hex_char
420 // sprintf adds in a null byte at the end
421 sprintf(hex_char,"%%%02X",*in);
422
423 // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
424 text_t tmp = "";
425 tmp.push_back(*in);
426
427 // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
428 str.replace(hex_char, tmp);
429
430 ++in;
431 }
432}
433
434
435// split up the cgi arguments
436void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
437 cgiargsclass &args) {
438 args.clear();
439
440 text_t::const_iterator here = argstr.begin();
441 text_t::const_iterator end = argstr.end();
442 // get seems to be not unicode, while post is, so don't want to just assume encoding is 1 (not unicode)
443 unsigned short args_encoding = argstr.getencoding();
444
445 text_t key, value;
446
447 // extract out the key=value pairs
448 while (here != end) {
449 // get the next key and value pair
450 here = getdelimitstr (here, end, '=', key);
451 here = getdelimitstr (here, end, '&', value);
452
453 // convert %xx and + to their appropriate equivalents
454 decode_cgi_arg (value);
455
456 safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
457
458 value.setencoding(args_encoding); //1 // other encoding
459 // store this key=value pair
460 if (!key.empty()) {
461
462 // if arg occurs multiple times (as is the case with multiple
463 // checkboxes using the same name) we'll create a comma separated
464 // list of all the values (this uses a hack that encodes naturally
465 // occurring commas as %2C - values will therefore need to be decoded
466 // again before use) - it should use an array instead
467 const cgiarginfo *info = argsinfo.getarginfo (key);
468 if (info==NULL) {
469 // If info is NULL, we can't tell if the arg is multiple value or not
470 // Because we need to have dynamically named arguments multivalued, we
471 // will always assume multiplevalue = true
472 // If the arg is not multi valued, then you need to decode the commas.
473 if (args.getarg(key)==NULL) {
474 // encode_commas returns a text_t without encoding bit set
475 text_t newvalue = encode_commas(value);
476 newvalue.setencoding(args_encoding);
477 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
478 }
479 else {
480 text_t newvalue = args[key];
481
482 newvalue += "," + encode_commas(value);
483 newvalue.setencoding(args_encoding); // other encoding
484 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
485 }
486 }
487 else {
488 if (info->multiplevalue) {
489
490 text_t newvalue = args[key];
491 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
492 newvalue += encode_commas(value);
493 newvalue.setencoding(args_encoding); // other encoding
494 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
495
496 } else {
497 args.setarg (key, value, cgiarg_t::cgi_arg);
498 }
499 }
500 }
501 }
502}
503
504text_t encode_commas (const text_t &intext) {
505
506 text_t outtext;
507
508 text_t::const_iterator here = intext.begin ();
509 text_t::const_iterator end = intext.end ();
510
511 while (here != end) {
512 if (*here == ',') outtext += "%2C";
513 else outtext.push_back (*here);
514 ++here;
515 }
516 return outtext;
517}
518
519text_t decode_commas (const text_t &intext) {
520
521 text_t outtext;
522
523 text_t::const_iterator here = intext.begin ();
524 text_t::const_iterator end = intext.end ();
525
526 // for loop
527 int intext_len = intext.size();
528 for(int i = 0; i < intext_len; i++) {
529 if ((i+2)<intext_len) {
530 if(intext[i] == '%' && intext[i+1] == '2'
531 && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
532 i += 2;
533 outtext.push_back(',');
534 continue;
535 }
536 }
537 outtext.push_back (intext[i]);
538 }
539 return outtext;
540}
541
542// set utf8 to true if input is in utf-8, otherwise expects input in unicode
543text_t minus_safe (const text_t &intext, bool utf8) {
544
545 text_t outtext;
546
547 text_t::const_iterator here = intext.begin ();
548 text_t::const_iterator end = intext.end ();
549
550 while (here != end) {
551 if (*here == '-') outtext += "Zz-";
552 else outtext.push_back (*here);
553 ++here;
554 }
555 if (utf8) {
556 outtext = cgi_safe_utf8 (outtext);
557 } else {
558 outtext = cgi_safe_unicode (outtext);
559 }
560 return outtext;
561}
562
563// takes utf-8 input
564text_t cgi_safe_utf8 (const text_t &intext) {
565 text_t outtext;
566
567 text_t::const_iterator here = intext.begin ();
568 text_t::const_iterator end = intext.end ();
569 unsigned short c;
570 text_t ttmp;
571
572 while (here != end) {
573 c = *here;
574 if (((c >= 'a') && (c <= 'z')) ||
575 ((c >= 'A') && (c <= 'Z')) ||
576 ((c >= '0') && (c <= '9')) ||
577 (c == '%') || (c == '-')) {
578 // alphanumeric character
579 outtext.push_back(c);
580 } else if (c == ' ') {
581 // space
582 outtext.push_back('+');
583 } else if (c > 255) { // not utf-8 character
584 cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
585 } else {
586 // everything else
587 outtext.push_back('%');
588 c2hex(c, ttmp);
589 outtext += ttmp;
590 }
591
592 ++here;
593 }
594
595 return outtext;
596}
597// takes unicode input
598text_t cgi_safe_unicode (const text_t &intext) {
599 text_t outtext;
600
601 text_t::const_iterator here = intext.begin ();
602 text_t::const_iterator end = intext.end ();
603 unsigned short c;
604 text_t ttmp;
605
606 while (here != end) {
607 c = *here;
608 if (((c >= 'a') && (c <= 'z')) ||
609 ((c >= 'A') && (c <= 'Z')) ||
610 ((c >= '0') && (c <= '9')) ||
611 (c == '%') || (c == '-')) {
612 // alphanumeric character
613 outtext.push_back(c);
614 } else if (c == ' ') {
615 // space
616 outtext.push_back('+');
617 } else if (c > 127) { // unicode character
618 unsigned char buf[3]; // up to 3 bytes
619 buf[0]='\0';buf[1]='\0';buf[2]='\0';
620 output_utf8_char(c,buf, buf+2);
621 outtext.push_back('%');
622 c2hex(buf[0], ttmp);
623 outtext += ttmp;
624 outtext.push_back('%');
625 c2hex(buf[1], ttmp);
626 outtext += ttmp;
627 if (buf[2]) {
628 outtext.push_back('%');
629 c2hex(buf[2], ttmp);
630 outtext += ttmp;
631 }
632 } else {
633 // everything else
634 outtext.push_back('%');
635 c2hex(c, ttmp);
636 outtext += ttmp;
637 }
638
639 ++here;
640 }
641
642 return outtext;
643}
644
645
646
647
648static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
649 text_t::const_iterator last,
650 text_t &argname) {
651 first = getdelimitstr (first, last, '-', argname);
652 return first;
653}
654
655
656// check_save_conf_str checks the configuration string for
657// the saved args and makes sure it does not conflict with
658// the information about the arguments. If an error is encountered
659// it will return false and the program should not produce any
660// output.
661bool check_save_conf_str (const text_t &saveconf,
662 const cgiargsinfoclass &argsinfo,
663 ostream &logout) {
664 outconvertclass text_t2ascii;
665
666 text_tset argsset;
667 text_t::const_iterator saveconfhere = saveconf.begin ();
668 text_t::const_iterator saveconfend = saveconf.end ();
669 text_t argname;
670 const cgiarginfo *info;
671
672 // first check to make sure all saved arguments can be saved
673
674 while (saveconfhere != saveconfend) {
675 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
676
677 if (!argname.empty()) {
678 // save the argument name for later
679 argsset.insert (argname);
680
681 // check the argument
682 info = argsinfo.getarginfo (argname);
683 if (info == NULL) {
684 logout << text_t2ascii << "Error: the cgi argument \"" << argname
685 << "\" is used in the configuration string for the\n"
686 << "saved arguments but does not exist as a valid argument.\n\n";
687 return false;
688 }
689 if (info->savedarginfo == cgiarginfo::mustnot) {
690 logout << text_t2ascii << "Error: the cgi argument \"" << argname
691 << "\" is used in the configuration string for the\n"
692 << "saved arguments but has been specified as an argument whose\n"
693 << "state must not be saved.\n\n";
694 return false;
695 }
696 }
697 }
698
699
700 // next check that all saved arguments that should be saved
701 // are saved
702 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
703 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
704
705 while (argsinfohere != argsinfoend) {
706 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
707 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
708 logout << text_t2ascii << "Error: the cgi argument \""
709 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
710 << "be save but was not listed in the saved arguments.\n\n";
711 return false;
712 }
713
714 ++argsinfohere;
715 }
716
717 return true; // made it, no clashes
718}
719
720
721// create_save_conf_str will create a configuration string
722// based on the information in argsinfo. This method of configuration
723// is not recomended as small changes can produce large changes in
724// the resulting configuration string (for instance a totally different
725// ordering). Only arguments which "must" be saved are included in
726// the resulting string.
727text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
728 ostream &/*logout*/) {
729 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
730 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
731 text_t saveconf;
732 bool first = true;
733
734 while (argsinfohere != argsinfoend) {
735 // save this argument if it must be saved
736 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
737 if (!first) saveconf.push_back ('-');
738 else first = false;
739 saveconf += (*argsinfohere).second.shortname;
740 }
741
742 ++argsinfohere;
743 }
744
745 return saveconf;
746}
747
748
749// expand_save_args will expand the saved arguments based
750// on saveconf placing the results in args if they are not
751// already defined. If it encounters an error it will return false
752// and output more information to logout.
753bool expand_save_args (const cgiargsinfoclass &argsinfo,
754 const text_t &saveconf,
755 cgiargsclass &args,
756 ostream &logout) {
757 outconvertclass text_t2ascii;
758
759 text_t *arg_e = args.getarg("e");
760 if (arg_e == NULL) return true; // no compressed arguments
761 if (arg_e->empty()) return true; // no compressed arguments
762
763 text_t argname, argvalue;
764 const cgiarginfo *argnameinfo;
765
766 text_t::const_iterator saveconfhere = saveconf.begin();
767 text_t::const_iterator saveconfend = saveconf.end();
768
769 text_t::iterator arg_ebegin = arg_e->begin();
770 text_t::iterator arg_eend = arg_e->end();
771 text_t::iterator arg_ehere = arg_ebegin;
772 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
773 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
774
775 if (!argname.empty()) {
776 // found another entry
777 argnameinfo = argsinfo.getarginfo (argname);
778
779 if (argnameinfo == NULL) {
780 // no information about the argument could be found
781 // we can't keep going because we don't know whether
782 // this argument is a single or multiple character value
783 logout << text_t2ascii << "Error: the cgi argument \"" << argname
784 << "\" was specified as being a compressed argument\n"
785 << "but no information about it could be found within the "
786 << "cgiargsinfoclass.\n";
787 return false;
788
789 } else {
790
791 // found the argument information
792 if (argnameinfo->multiplechar) {
793 text_t::const_iterator sav = arg_ehere;
794 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
795 if (distance(arg_ebegin, arg_ehere) > 2) {
796 // replace any '-' chars escaped with 'Zz'
797 bool first = true;
798 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
799 if (first) argvalue.clear();
800
801 // Hey, here's a wild idea. Why don't we check that there is
802 // another hyphen in the cgiarge before we get a pointer to it and
803 // add one. That way we are far less likely to wander off into
804 // random memory merrily parsing arguments that are then lovingly
805 // spewed all over the HTML page returned at the usage logs.
806 text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
807 if (minus_itr == arg_eend)
808 {
809 logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
810 return false;
811 }
812 arg_ehere = minus_itr + 1;
813
814 while (sav != (arg_ehere-1)) {
815 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
816 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
817 ++sav;
818 }
819 first = false;
820 }
821 }
822 argvalue.setencoding(1); // other encoding
823 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
824 } else {
825 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
826 ++arg_ehere;
827 }
828 }
829 }
830 }
831
832 return true;
833}
834
835
836// adds the default values for those arguments which have not
837// been specified
838void add_default_args (const cgiargsinfoclass &argsinfo,
839 cgiargsclass &args,
840 ostream &/*logout*/) {
841 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
842 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
843
844 while (argsinfohere != argsinfoend) {
845 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
846 args.setdefaultarg ((*argsinfohere).second.shortname,
847 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
848 }
849 ++argsinfohere;
850 }
851}
852
853void add_fileupload_args (const cgiargsinfoclass &argsinfo,
854 cgiargsclass &args,
855 fileupload_tmap &fileuploads,
856 ostream &logout) {
857
858 const cgiarginfo *info = argsinfo.getarginfo("a");
859 fileupload_tmap::const_iterator this_file = fileuploads.begin();
860 fileupload_tmap::const_iterator end_file = fileuploads.end();
861 while (this_file != end_file) {
862 const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
863 if (info != NULL) {
864
865 if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
866
867 args.setargfile((*this_file).first, (*this_file).second);
868 }
869 }
870 this_file++;
871 }
872}
873
874// compress_save_args will compress the arguments and return
875// them in compressed_args. If an error was encountered
876// compressed_args will be set to to "", an error will be
877// written to logout, and the function will return false.
878bool compress_save_args (const cgiargsinfoclass &argsinfo,
879 const text_t &saveconf,
880 cgiargsclass &args,
881 text_t &compressed_args,
882 outconvertclass &outconvert,
883 ostream &logout) {
884 outconvertclass text_t2ascii;
885
886 compressed_args.clear();
887
888 text_t argname, argvalue;
889 const cgiarginfo *argnameinfo;
890
891 text_t::const_iterator saveconfhere = saveconf.begin();
892 text_t::const_iterator saveconfend = saveconf.end();
893
894 while (saveconfhere != saveconfend) {
895 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
896
897 if (!argname.empty()) {
898 // found another entry
899 argnameinfo = argsinfo.getarginfo (argname);
900
901 if (argnameinfo == NULL) {
902 // no information about the argument could be found
903 // we can't keep going because we don't know whether
904 // this argument is a single or multiple character value
905 logout << text_t2ascii << "Error: the cgi argument \"" << argname
906 << "\" was specified as being a compressed argument\n"
907 << "but no information about it could be found within the "
908 << "cgiargsinfoclass.\n";
909 compressed_args.clear();
910 return false;
911
912 } else {
913 // found the argument information
914 if (argnameinfo->multiplechar) {
915 // multiple character argument -- sort out any '-' chars
916 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
917 compressed_args += minus_safe (args[argname], false);
918 else
919 compressed_args += minus_safe (outconvert.convert(args[argname]), true);
920
921 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
922
923 } else {
924 // single character argument
925 if (args[argname].size() == 0) {
926 logout << text_t2ascii << "Error: the cgi argument \"" << argname
927 << "\" was specified as being a compressed argument which\n"
928 << "should have a one character value but it was empty.\n\n";
929 compressed_args.clear ();
930 return false;
931
932 } else if (args[argname].size() > 1) {
933 logout << text_t2ascii << "Error: the cgi argument \"" << argname
934 << "\" was specified as being a compressed argument which\n"
935 << "should have a one character value but it had multiple characters.\n\n";
936 compressed_args.clear ();
937 return false;
938 }
939
940 // everything is ok
941 compressed_args += args[argname];
942 }
943 }
944 }
945 }
946
947 return true;
948}
949
950
951// args_tounicode converts any arguments which are not in unicode
952// to unicode using inconvert
953void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
954 cgiargsclass::iterator here = args.begin();
955 cgiargsclass::iterator end = args.end();
956
957 while (here != end) {
958 if ((*here).second.value.getencoding() > 0) {
959 // Call reset() before converting each argument, to prevent problems when converting the last
960 // argument left the converter in a bad state
961 inconvert.reset();
962 (*here).second.value = inconvert.convert((*here).second.value);
963 }
964
965 ++here;
966 }
967}
968
969// fcgienv will be loaded with environment name-value pairs
970// if using fastcgi (had to do this as getenv doesn't work
971// with our implementation of fastcgi). if fcgienv is empty
972// we'll simply use getenv
973text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
974 if (fcgienv.empty()) {
975 char *n = name.getcstr();
976 char *v = getenv(n);
977 delete []n;
978 if (v != NULL) return v;
979 return g_EmptyText;
980
981 } else return fcgienv[name];
982}
Note: See TracBrowser for help on using the repository browser.