source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 30373

Last change on this file since 30373 was 30373, checked in by kjdon, 8 years ago

first stab at fixing diego's bug where you can't add accented values from a select in the depositor. the problem seems to be that argsstr form a get is not unicode, but from a post it is??

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 29.6 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "fileutil.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "unitool.h" // in mg, for output_utf8_char
31#include <cstdlib>
32#include <time.h>
33
34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\iostream>
36# include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38# include <iostream.h>
39# include <fstream.h>
40#else
41# include <iostream>
42# include <fstream>
43#endif
44
45// set to false to undo security changes (url-encoding arguments)
46static bool do_safe_cgi_args = false;
47
48static unsigned short hexdigit (unsigned short c) {
49 if (c >= '0' && c <= '9') return (c-'0');
50 if (c >= 'a' && c <= 'f') return (c-'a'+10);
51 if (c >= 'A' && c <= 'F') return (c-'A'+10);
52 return c;
53}
54
55
56static void c2hex (unsigned short c, text_t &t) {
57 t.clear();
58
59 if (c >= 256) {
60 t = "20"; // ' '
61 return;
62 }
63
64 unsigned short o1, o2;
65
66 o1 = (c/16) % 16;
67 o2 = c % 16;
68 if (o1 >= 10) o1 += 'a' - 10;
69 else o1 += '0';
70 if (o2 >= 10) o2 += 'a' - 10;
71 else o2 += '0';
72
73 t.push_back(o1);
74 t.push_back(o2);
75}
76
77static text_t::iterator getline (text_t::iterator first,
78 text_t::iterator last,
79 bool include_crlf) {
80 while (first != last) {
81 if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
82 // found <CRLF>
83 if (include_crlf) first += 2;
84 break;
85 }
86
87 first++;
88 }
89
90 return first;
91}
92
93static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
94 text_t &filetype, bool &isfile, text_t &argstr,
95 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
96
97 if (!argname.empty()) {
98
99 if (!isfile) {
100 // argdata includes a trailing <CRLF> that we must remove
101 if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
102 argdata.erase(argdata.end()-2, argdata.end());
103 }
104 if (!argstr.empty()) argstr += "&";
105 argstr += argname + "=" + argdata;
106
107 } else if (!filename.empty()) {
108 // filedata includes a trailing <CRLF> that we must remove
109 if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
110 filedata.erase(filedata.end()-2, filedata.end());
111 }
112
113 // create tmp_name for storing the file on disk, using the current timestamp
114 text_t tmp_name(time(NULL));
115 tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
116
117 char *tmp_name_c = tmp_name.getcstr();
118
119 // write the file data to disk
120 outconvertclass out;
121 ofstream filestream(tmp_name_c, ios::out | ios::binary);
122 filestream << out << filedata;
123 filestream.close();
124 delete tmp_name_c;
125
126 // populate the fields of a fileupload_t and put it in the
127 // fileuploads map
128 fileupload_t fu;
129 // note that filename currently may or may not include the path since
130 // some browsers (e.g. IE) include the path while others
131 // (e.g. mozilla) do not. we should probably remove the path from
132 // this field here to get a consistent value across all browsers.
133 text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
134 if (slash != filename.end()) {
135 filename = substr(slash+1, filename.end());
136 }
137 fu.name = filename;
138 fu.type = filetype;
139 // size has yet to be implemented
140 fu.size = filedata.size();
141
142 fu.tmp_name = tmp_name;
143 fileuploads[argname] = fu;
144 }
145 }
146 isfile = false;
147 argname.clear();
148 argdata.clear();
149 filename.clear();
150 filedata.clear();
151 filetype.clear();
152}
153
154// parse data obtained through a CGI POST request
155text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
156 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
157
158 text_t argstr;
159
160 text_t::iterator content_type_begin = content_type.begin();
161 text_t::iterator content_type_end = content_type.end();
162 if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
163 // a simple post request
164
165 return raw_post_data;
166
167 } else {
168 // multipart/form data - may contain one or more uploaded files
169
170 /*
171 content_type should look something like the following
172 multipart/form-data; boundary=---------------------------7d411e1a50330
173
174 while raw_post_data will be as follows
175 -----------------------------7d43e73450330CRLF
176 Content-Disposition: form-data; name="e"<CRLF>
177 <CRLF>
178 d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
179 -----------------------------7d43e73450330<CRLF>
180 Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
181 Content-Type: application/msword<CRLF>
182 <CRLF>
183 <Content of file><CRLF>
184
185 */
186
187 // first get the boundary from content-type
188 text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
189 if (boundary_begin+9 < content_type_end)
190 {
191 // skip over "boundary=" part of string
192 boundary_begin += 9;
193 }
194 else {
195 // error
196 cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
197 return "";
198 }
199 text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
200 int boundary_len = boundary.size();
201
202
203 text_t argname, argdata, filename, filedata, filetype;
204 bool isfile = false;
205 text_t::iterator data_here = raw_post_data.begin();
206 text_t::iterator data_end = raw_post_data.end();
207 while (data_here != data_end) {
208
209 // get the next available line (including the trailing <CRLF>
210 text_t line = substr(data_here, getline(data_here, data_end, true));
211
212 data_here += line.size();
213 text_t::iterator line_begin = line.begin();
214 text_t::iterator line_end = line.end();
215 if (findword(line_begin, line_end, boundary) != line_end) {
216 // we've found a boundary
217 process_post_section(argname, argdata, filename, filedata, filetype,
218 isfile, argstr, fileuploads, gsdlhome);
219
220 } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
221 // we've found the the beginning of a new section
222 argname.clear();
223 argdata.clear();
224
225 // get the name of this piece of form data
226 text_t::iterator it = findword(line_begin, line_end, "name=\"");
227 if (it == line_end) break; // error - this shouldn't happen
228 it = findchar(it, line_end, '"');
229 if ((it != line_end) && (it+1 != line_end)) {
230 argname = substr(it+1, findchar(it+1, line_end, '"'));
231 }
232
233 // if this piece of form data contains filename="" it's a file
234 // upload and needs to be treated special
235 it = (findword(line_begin, line_end, "filename=\""));
236 if (it != line_end) {
237 // we've found a file upload
238 isfile = true;
239 it = findchar(it, line_end, '"');
240 if ((it != line_end) && (it+1 != line_end)) {
241 filename = substr(it+1, findchar(it+1, line_end, '"'));
242 }
243
244 // the next line is the content-type of this section
245 line = substr(data_here, getline(data_here, data_end, true));
246 data_here += line.size();
247 line_begin = line.begin();
248 line_end = line.end();
249 it = (findword(line_begin, line_end, "Content-Type: "));
250 if (it != line_end) {
251 filetype = substr(it+14, getline(it, line_end, false));
252 }
253 }
254
255 // eat up the next line as it's just a <CRLF> on it's own
256 data_here += 2;
257
258 } else {
259 if (isfile) filedata += line;
260 else argdata += line;
261 }
262
263 }
264
265 // process last section
266 process_post_section(argname, argdata, filename, filedata, filetype,
267 isfile, argstr, fileuploads, gsdlhome);
268
269 return argstr;
270 }
271}
272
273// convert %xx and + to their appropriate equivalents
274// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
275// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
276// actually was, then this returns utf-8, and needs to_uni on the
277// result to get it back to unicode. If the encoding wasn't utf-8, then the
278// output may be crap. Seems to work for 8 bit encodings.
279// Really, this should be given the encoding, and should always return unicode.
280void decode_cgi_arg (text_t &argstr) {
281 text_t::iterator in = argstr.begin();
282 text_t::iterator out = in;
283 text_t::iterator end = argstr.end();
284
285 while (in != end) {
286 if (*in == '+') *out = ' ';
287
288 else if (*in == '%') {
289 unsigned short c = '%';
290 ++in;
291 if (in != end) { // this is an encoding...
292 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
293 // this assumes a short int is at least 16 bits...
294 ++in;
295 if (in != end)
296 c=hexdigit(*in++) << 12;
297 if (in != end)
298 c+=hexdigit(*in++) << 8;
299 if (in != end)
300 c+=hexdigit(*in++) << 4;
301 if (in != end)
302 c+=hexdigit(*in);
303 /* BAD!! The following assumes the interface is using utf-8. But
304 at this point we don't know what encoding we are using, unless
305 we can parse it out of the string we are currently decoding... */
306 text_t uni=" ";
307 uni[0]=c;
308 text_t utf8=to_utf8(uni);
309 int last_byte=utf8.size()-1;
310 for (int i=0;i<last_byte;++i)
311 *out++ = utf8[i];
312 c=utf8[last_byte];
313 } else { // convert %HH to hex value
314 c = hexdigit (*in);
315 ++in;
316 if (in != end && c < 16) { // sanity check on the previous character
317 c = c*16 + hexdigit (*in);
318 }
319 }
320 }
321 *out = c;
322 } else *out = *in;
323
324 if (in != end) ++in;
325 ++out;
326 }
327
328 // remove the excess characters
329 argstr.erase (out, end);
330
331}
332
333// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
334// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
335// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
336// apache log and that log file can be included in a local file inclusion (LFI) or
337// remote file include (RFI) attack.
338// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
339// break out of an html/XML/javascript context.
340void safe_cgi_arg (const text_t &key, text_t &argstr) {
341 if(!do_safe_cgi_args) {
342 return;
343 }
344
345 text_t::iterator in = argstr.begin();
346 text_t out = "";
347 text_t::iterator end = argstr.end();
348
349 while (in != end) {
350 if (*in == '<') out += "%3C";
351 else if (*in == '>') out += "%3E";
352 else if (*in == '&') out += "%26";
353 else if (*in == '\"') out += "%22";
354 else if (*in == '\'') out += "%27";
355 //else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
356 else { // append whatever char is in *in, but as a char, not int
357 //out += *in; // appends as int
358 out.push_back(*in);
359 }
360 ++in;
361 }
362
363 argstr.erase (argstr.begin(), end);
364 argstr += out;
365}
366
367
368// given a list of characters (or "all") to decode, and given the string, str, where those
369// characters are to be decoded, this method replaces any occurrences of the url-encoded
370// variants of those characters with their actual characters in the given string str.
371void unsafe_cgi_arg(const text_t &chars, text_t &str) {
372 if(!do_safe_cgi_args) {
373 return;
374 }
375
376 text_t allchars = "<>&\"\'/";
377
378 text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars;
379
380 text_t::iterator in = chars_to_decode.begin();
381 text_t::iterator end = chars_to_decode.end();
382
383 char hex_char[4];
384
385 // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html
386
387 while (in != end) {
388
389 // *in is a character from the accepted list of chars_to_decode list
390
391 // 1. create the url-encoded value of the char *in in variable hex_char
392 // sprintf adds in a null byte at the end
393 sprintf(hex_char,"%%%02X",*in);
394
395 // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it
396 text_t tmp = "";
397 tmp.push_back(*in);
398
399 // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version
400 str.replace(hex_char, tmp);
401
402 ++in;
403 }
404}
405
406
407// split up the cgi arguments
408void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
409 cgiargsclass &args) {
410 args.clear();
411
412 text_t::const_iterator here = argstr.begin();
413 text_t::const_iterator end = argstr.end();
414 // get seems to be not unicode, while post is, so don't want to just assume encoding is 1 (not unicode)
415 unsigned short args_encoding = argstr.getencoding();
416 cerr << "args enc = "<< args_encoding<<endl;
417 text_t key, value;
418
419 // extract out the key=value pairs
420 while (here != end) {
421 // get the next key and value pair
422 here = getdelimitstr (here, end, '=', key);
423 here = getdelimitstr (here, end, '&', value);
424
425 // convert %xx and + to their appropriate equivalents
426 decode_cgi_arg (value);
427
428 safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params
429
430 value.setencoding(args_encoding); //1 // other encoding
431 // store this key=value pair
432 if (!key.empty()) {
433
434 // if arg occurs multiple times (as is the case with multiple
435 // checkboxes using the same name) we'll create a comma separated
436 // list of all the values (this uses a hack that encodes naturally
437 // occurring commas as %2C - values will therefore need to be decoded
438 // again before use) - it should use an array instead
439 const cgiarginfo *info = argsinfo.getarginfo (key);
440 if (info==NULL) {
441 // If info is NULL, we can't tell if the arg is multiple value or not
442 // Because we need to have dynamically named arguments multivalued, we
443 // will always assume multiplevalue = true
444 // If the arg is not multi valued, then you need to decode the commas.
445 if (args.getarg(key)==NULL) {
446 // encode_commas returns a text_t without encoding bit set
447 text_t newvalue = encode_commas(value);
448 newvalue.setencoding(args_encoding);
449 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
450 }
451 else {
452 text_t newvalue = args[key];
453
454 newvalue += "," + encode_commas(value);
455 newvalue.setencoding(args_encoding); // other encoding
456 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
457 }
458 }
459 else {
460 if (info->multiplevalue) {
461
462 text_t newvalue = args[key];
463 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
464 newvalue += encode_commas(value);
465 newvalue.setencoding(args_encoding); // other encoding
466 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
467
468 } else {
469 args.setarg (key, value, cgiarg_t::cgi_arg);
470 }
471 }
472 }
473 }
474}
475
476text_t encode_commas (const text_t &intext) {
477
478 text_t outtext;
479
480 text_t::const_iterator here = intext.begin ();
481 text_t::const_iterator end = intext.end ();
482
483 while (here != end) {
484 if (*here == ',') outtext += "%2C";
485 else outtext.push_back (*here);
486 ++here;
487 }
488 return outtext;
489}
490
491text_t decode_commas (const text_t &intext) {
492
493 text_t outtext;
494
495 text_t::const_iterator here = intext.begin ();
496 text_t::const_iterator end = intext.end ();
497
498 // for loop
499 int intext_len = intext.size();
500 for(int i = 0; i < intext_len; i++) {
501 if ((i+2)<intext_len) {
502 if(intext[i] == '%' && intext[i+1] == '2'
503 && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
504 i += 2;
505 outtext.push_back(',');
506 continue;
507 }
508 }
509 outtext.push_back (intext[i]);
510 }
511 return outtext;
512}
513
514// set utf8 to true if input is in utf-8, otherwise expects input in unicode
515text_t minus_safe (const text_t &intext, bool utf8) {
516
517 text_t outtext;
518
519 text_t::const_iterator here = intext.begin ();
520 text_t::const_iterator end = intext.end ();
521
522 while (here != end) {
523 if (*here == '-') outtext += "Zz-";
524 else outtext.push_back (*here);
525 ++here;
526 }
527 if (utf8) {
528 outtext = cgi_safe_utf8 (outtext);
529 } else {
530 outtext = cgi_safe_unicode (outtext);
531 }
532 return outtext;
533}
534
535// takes utf-8 input
536text_t cgi_safe_utf8 (const text_t &intext) {
537 text_t outtext;
538
539 text_t::const_iterator here = intext.begin ();
540 text_t::const_iterator end = intext.end ();
541 unsigned short c;
542 text_t ttmp;
543
544 while (here != end) {
545 c = *here;
546 if (((c >= 'a') && (c <= 'z')) ||
547 ((c >= 'A') && (c <= 'Z')) ||
548 ((c >= '0') && (c <= '9')) ||
549 (c == '%') || (c == '-')) {
550 // alphanumeric character
551 outtext.push_back(c);
552 } else if (c == ' ') {
553 // space
554 outtext.push_back('+');
555 } else if (c > 255) { // not utf-8 character
556 cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
557 } else {
558 // everything else
559 outtext.push_back('%');
560 c2hex(c, ttmp);
561 outtext += ttmp;
562 }
563
564 ++here;
565 }
566
567 return outtext;
568}
569// takes unicode input
570text_t cgi_safe_unicode (const text_t &intext) {
571 text_t outtext;
572
573 text_t::const_iterator here = intext.begin ();
574 text_t::const_iterator end = intext.end ();
575 unsigned short c;
576 text_t ttmp;
577
578 while (here != end) {
579 c = *here;
580 if (((c >= 'a') && (c <= 'z')) ||
581 ((c >= 'A') && (c <= 'Z')) ||
582 ((c >= '0') && (c <= '9')) ||
583 (c == '%') || (c == '-')) {
584 // alphanumeric character
585 outtext.push_back(c);
586 } else if (c == ' ') {
587 // space
588 outtext.push_back('+');
589 } else if (c > 127) { // unicode character
590 unsigned char buf[3]; // up to 3 bytes
591 buf[0]='\0';buf[1]='\0';buf[2]='\0';
592 output_utf8_char(c,buf, buf+2);
593 outtext.push_back('%');
594 c2hex(buf[0], ttmp);
595 outtext += ttmp;
596 outtext.push_back('%');
597 c2hex(buf[1], ttmp);
598 outtext += ttmp;
599 if (buf[2]) {
600 outtext.push_back('%');
601 c2hex(buf[2], ttmp);
602 outtext += ttmp;
603 }
604 } else {
605 // everything else
606 outtext.push_back('%');
607 c2hex(c, ttmp);
608 outtext += ttmp;
609 }
610
611 ++here;
612 }
613
614 return outtext;
615}
616
617
618
619
620static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
621 text_t::const_iterator last,
622 text_t &argname) {
623 first = getdelimitstr (first, last, '-', argname);
624 return first;
625}
626
627
628// check_save_conf_str checks the configuration string for
629// the saved args and makes sure it does not conflict with
630// the information about the arguments. If an error is encountered
631// it will return false and the program should not produce any
632// output.
633bool check_save_conf_str (const text_t &saveconf,
634 const cgiargsinfoclass &argsinfo,
635 ostream &logout) {
636 outconvertclass text_t2ascii;
637
638 text_tset argsset;
639 text_t::const_iterator saveconfhere = saveconf.begin ();
640 text_t::const_iterator saveconfend = saveconf.end ();
641 text_t argname;
642 const cgiarginfo *info;
643
644 // first check to make sure all saved arguments can be saved
645
646 while (saveconfhere != saveconfend) {
647 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
648
649 if (!argname.empty()) {
650 // save the argument name for later
651 argsset.insert (argname);
652
653 // check the argument
654 info = argsinfo.getarginfo (argname);
655 if (info == NULL) {
656 logout << text_t2ascii << "Error: the cgi argument \"" << argname
657 << "\" is used in the configuration string for the\n"
658 << "saved arguments but does not exist as a valid argument.\n\n";
659 return false;
660 }
661 if (info->savedarginfo == cgiarginfo::mustnot) {
662 logout << text_t2ascii << "Error: the cgi argument \"" << argname
663 << "\" is used in the configuration string for the\n"
664 << "saved arguments but has been specified as an argument whose\n"
665 << "state must not be saved.\n\n";
666 return false;
667 }
668 }
669 }
670
671
672 // next check that all saved arguments that should be saved
673 // are saved
674 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
675 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
676
677 while (argsinfohere != argsinfoend) {
678 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
679 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
680 logout << text_t2ascii << "Error: the cgi argument \""
681 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
682 << "be save but was not listed in the saved arguments.\n\n";
683 return false;
684 }
685
686 ++argsinfohere;
687 }
688
689 return true; // made it, no clashes
690}
691
692
693// create_save_conf_str will create a configuration string
694// based on the information in argsinfo. This method of configuration
695// is not recomended as small changes can produce large changes in
696// the resulting configuration string (for instance a totally different
697// ordering). Only arguments which "must" be saved are included in
698// the resulting string.
699text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
700 ostream &/*logout*/) {
701 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
702 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
703 text_t saveconf;
704 bool first = true;
705
706 while (argsinfohere != argsinfoend) {
707 // save this argument if it must be saved
708 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
709 if (!first) saveconf.push_back ('-');
710 else first = false;
711 saveconf += (*argsinfohere).second.shortname;
712 }
713
714 ++argsinfohere;
715 }
716
717 return saveconf;
718}
719
720
721// expand_save_args will expand the saved arguments based
722// on saveconf placing the results in args if they are not
723// already defined. If it encounters an error it will return false
724// and output more information to logout.
725bool expand_save_args (const cgiargsinfoclass &argsinfo,
726 const text_t &saveconf,
727 cgiargsclass &args,
728 ostream &logout) {
729 outconvertclass text_t2ascii;
730
731 text_t *arg_e = args.getarg("e");
732 if (arg_e == NULL) return true; // no compressed arguments
733 if (arg_e->empty()) return true; // no compressed arguments
734
735 text_t argname, argvalue;
736 const cgiarginfo *argnameinfo;
737
738 text_t::const_iterator saveconfhere = saveconf.begin();
739 text_t::const_iterator saveconfend = saveconf.end();
740
741 text_t::iterator arg_ebegin = arg_e->begin();
742 text_t::iterator arg_eend = arg_e->end();
743 text_t::iterator arg_ehere = arg_ebegin;
744 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
745 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
746
747 if (!argname.empty()) {
748 // found another entry
749 argnameinfo = argsinfo.getarginfo (argname);
750
751 if (argnameinfo == NULL) {
752 // no information about the argument could be found
753 // we can't keep going because we don't know whether
754 // this argument is a single or multiple character value
755 logout << text_t2ascii << "Error: the cgi argument \"" << argname
756 << "\" was specified as being a compressed argument\n"
757 << "but no information about it could be found within the "
758 << "cgiargsinfoclass.\n";
759 return false;
760
761 } else {
762
763 // found the argument information
764 if (argnameinfo->multiplechar) {
765 text_t::const_iterator sav = arg_ehere;
766 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
767 if (distance(arg_ebegin, arg_ehere) > 2) {
768 // replace any '-' chars escaped with 'Zz'
769 bool first = true;
770 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
771 if (first) argvalue.clear();
772
773 // Hey, here's a wild idea. Why don't we check that there is
774 // another hyphen in the cgiarge before we get a pointer to it and
775 // add one. That way we are far less likely to wander off into
776 // random memory merrily parsing arguments that are then lovingly
777 // spewed all over the HTML page returned at the usage logs.
778 text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
779 if (minus_itr == arg_eend)
780 {
781 logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
782 return false;
783 }
784 arg_ehere = minus_itr + 1;
785
786 while (sav != (arg_ehere-1)) {
787 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
788 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
789 ++sav;
790 }
791 first = false;
792 }
793 }
794 argvalue.setencoding(1); // other encoding
795 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
796 } else {
797 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
798 ++arg_ehere;
799 }
800 }
801 }
802 }
803
804 return true;
805}
806
807
808// adds the default values for those arguments which have not
809// been specified
810void add_default_args (const cgiargsinfoclass &argsinfo,
811 cgiargsclass &args,
812 ostream &/*logout*/) {
813 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
814 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
815
816 while (argsinfohere != argsinfoend) {
817 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
818 args.setdefaultarg ((*argsinfohere).second.shortname,
819 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
820 }
821 ++argsinfohere;
822 }
823}
824
825void add_fileupload_args (const cgiargsinfoclass &argsinfo,
826 cgiargsclass &args,
827 fileupload_tmap &fileuploads,
828 ostream &logout) {
829
830 const cgiarginfo *info = argsinfo.getarginfo("a");
831 fileupload_tmap::const_iterator this_file = fileuploads.begin();
832 fileupload_tmap::const_iterator end_file = fileuploads.end();
833 while (this_file != end_file) {
834 const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
835 if (info != NULL) {
836
837 if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
838
839 args.setargfile((*this_file).first, (*this_file).second);
840 }
841 }
842 this_file++;
843 }
844}
845
846// compress_save_args will compress the arguments and return
847// them in compressed_args. If an error was encountered
848// compressed_args will be set to to "", an error will be
849// written to logout, and the function will return false.
850bool compress_save_args (const cgiargsinfoclass &argsinfo,
851 const text_t &saveconf,
852 cgiargsclass &args,
853 text_t &compressed_args,
854 outconvertclass &outconvert,
855 ostream &logout) {
856 outconvertclass text_t2ascii;
857
858 compressed_args.clear();
859
860 text_t argname, argvalue;
861 const cgiarginfo *argnameinfo;
862
863 text_t::const_iterator saveconfhere = saveconf.begin();
864 text_t::const_iterator saveconfend = saveconf.end();
865
866 while (saveconfhere != saveconfend) {
867 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
868
869 if (!argname.empty()) {
870 // found another entry
871 argnameinfo = argsinfo.getarginfo (argname);
872
873 if (argnameinfo == NULL) {
874 // no information about the argument could be found
875 // we can't keep going because we don't know whether
876 // this argument is a single or multiple character value
877 logout << text_t2ascii << "Error: the cgi argument \"" << argname
878 << "\" was specified as being a compressed argument\n"
879 << "but no information about it could be found within the "
880 << "cgiargsinfoclass.\n";
881 compressed_args.clear();
882 return false;
883
884 } else {
885 // found the argument information
886 if (argnameinfo->multiplechar) {
887 // multiple character argument -- sort out any '-' chars
888 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
889 compressed_args += minus_safe (args[argname], false);
890 else
891 compressed_args += minus_safe (outconvert.convert(args[argname]), true);
892
893 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
894
895 } else {
896 // single character argument
897 if (args[argname].size() == 0) {
898 logout << text_t2ascii << "Error: the cgi argument \"" << argname
899 << "\" was specified as being a compressed argument which\n"
900 << "should have a one character value but it was empty.\n\n";
901 compressed_args.clear ();
902 return false;
903
904 } else if (args[argname].size() > 1) {
905 logout << text_t2ascii << "Error: the cgi argument \"" << argname
906 << "\" was specified as being a compressed argument which\n"
907 << "should have a one character value but it had multiple characters.\n\n";
908 compressed_args.clear ();
909 return false;
910 }
911
912 // everything is ok
913 compressed_args += args[argname];
914 }
915 }
916 }
917 }
918
919 return true;
920}
921
922
923// args_tounicode converts any arguments which are not in unicode
924// to unicode using inconvert
925void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
926 cgiargsclass::iterator here = args.begin();
927 cgiargsclass::iterator end = args.end();
928
929 while (here != end) {
930 if ((*here).second.value.getencoding() > 0) {
931 // Call reset() before converting each argument, to prevent problems when converting the last
932 // argument left the converter in a bad state
933 inconvert.reset();
934 (*here).second.value = inconvert.convert((*here).second.value);
935 }
936
937 ++here;
938 }
939}
940
941// fcgienv will be loaded with environment name-value pairs
942// if using fastcgi (had to do this as getenv doesn't work
943// with our implementation of fastcgi). if fcgienv is empty
944// we'll simply use getenv
945text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
946 if (fcgienv.empty()) {
947 char *n = name.getcstr();
948 char *v = getenv(n);
949 delete []n;
950 if (v != NULL) return v;
951 return g_EmptyText;
952
953 } else return fcgienv[name];
954}
Note: See TracBrowser for help on using the repository browser.