source: main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp@ 22942

Last change on this file since 22942 was 22942, checked in by ak19, 14 years ago

For ticket no 712 again. Tidier code in decode_commas function. Related to previous commit to fix a crash that occurred when using a combination of advanced and fielded searching - in an MGPP collection (server.exe and library.cgi would crash depending on which web server was used): 1. When parsing cgi args, arrays stem and fold contained the URL encodings percent-2-C rather than commas for delimiters and weren't split properly resulting in arrays of unexpected lengths (and values). Need to decode the percent-2-C to commas by calling decode_commas() in cgiutils.cpp before splitting. 2. decode_commas in cgiutils.cpp was performing an illegal iterator operation by attempting to peek PAST the end of the iterator which doesn't seem to be allowed by the STL code. When the iteration really got past the end, the iteration operation causes a problem resulting in a server.exe crash of its own.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 26.5 KB
RevLine 
[108]1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[108]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[108]24 *********************************************************************/
25
26#include "cgiutils.h"
[12513]27#include "fileutil.h"
[3151]28#include "gsdlunicode.h"
[11998]29#include "fileutil.h"
[3217]30#include "unitool.h" // in mg, for output_utf8_char
[18882]31#include <cstdlib>
[22796]32#include <time.h>
[108]33
[12513]34#if defined(GSDL_USE_OBJECTSPACE)
35# include <ospace\std\iostream>
36# include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38# include <iostream.h>
39# include <fstream.h>
40#else
41# include <iostream>
42# include <fstream>
43#endif
[11998]44
[12513]45
[108]46static unsigned short hexdigit (unsigned short c) {
47 if (c >= '0' && c <= '9') return (c-'0');
48 if (c >= 'a' && c <= 'f') return (c-'a'+10);
49 if (c >= 'A' && c <= 'F') return (c-'A'+10);
50 return c;
51}
52
53
54static void c2hex (unsigned short c, text_t &t) {
55 t.clear();
56
57 if (c >= 256) {
58 t = "20"; // ' '
59 return;
60 }
61
62 unsigned short o1, o2;
63
64 o1 = (c/16) % 16;
65 o2 = c % 16;
66 if (o1 >= 10) o1 += 'a' - 10;
67 else o1 += '0';
68 if (o2 >= 10) o2 += 'a' - 10;
69 else o2 += '0';
70
71 t.push_back(o1);
72 t.push_back(o2);
73}
74
[12513]75static text_t::iterator getline (text_t::iterator first,
76 text_t::iterator last,
77 bool include_crlf) {
78 while (first != last) {
79 if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
80 // found <CRLF>
81 if (include_crlf) first += 2;
82 break;
83 }
[12794]84
[12513]85 first++;
86 }
[12794]87
[12513]88 return first;
89}
90
91static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92 text_t &filetype, bool &isfile, text_t &argstr,
93 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95 if (!argname.empty()) {
96
97 if (!isfile) {
98 // argdata includes a trailing <CRLF> that we must remove
99 if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
100 argdata.erase(argdata.end()-2, argdata.end());
101 }
102 if (!argstr.empty()) argstr += "&";
103 argstr += argname + "=" + argdata;
104
105 } else if (!filename.empty()) {
106 // filedata includes a trailing <CRLF> that we must remove
107 if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
108 filedata.erase(filedata.end()-2, filedata.end());
109 }
110
[22791]111 // create tmp_name for storing the file on disk, using the current timestamp
112 text_t tmp_name(time(NULL));
[12513]113 tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
[12794]114
[12513]115 char *tmp_name_c = tmp_name.getcstr();
116
117 // write the file data to disk
118 outconvertclass out;
119 ofstream filestream(tmp_name_c, ios::out | ios::binary);
120 filestream << out << filedata;
121 filestream.close();
122 delete tmp_name_c;
123
124 // populate the fields of a fileupload_t and put it in the
125 // fileuploads map
126 fileupload_t fu;
127 // note that filename currently may or may not include the path since
128 // some browsers (e.g. IE) include the path while others
129 // (e.g. mozilla) do not. we should probably remove the path from
130 // this field here to get a consistent value across all browsers.
131 text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132 if (slash != filename.end()) {
133 filename = substr(slash+1, filename.end());
134 }
135 fu.name = filename;
136 fu.type = filetype;
137 // size has yet to be implemented
[12579]138 fu.size = filedata.size();
[12794]139
[12513]140 fu.tmp_name = tmp_name;
141 fileuploads[argname] = fu;
142 }
143 }
144 isfile = false;
145 argname.clear();
146 argdata.clear();
147 filename.clear();
148 filedata.clear();
149 filetype.clear();
150}
151
152// parse data obtained through a CGI POST request
153text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154 fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156 text_t argstr;
157
158 text_t::iterator content_type_begin = content_type.begin();
159 text_t::iterator content_type_end = content_type.end();
160 if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161 // a simple post request
[12794]162
[12513]163 return raw_post_data;
164
165 } else {
166 // multipart/form data - may contain one or more uploaded files
167
168 /*
169 content_type should look something like the following
170 multipart/form-data; boundary=---------------------------7d411e1a50330
171
172 while raw_post_data will be as follows
173 -----------------------------7d43e73450330CRLF
174 Content-Disposition: form-data; name="e"<CRLF>
175 <CRLF>
176 d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177 -----------------------------7d43e73450330<CRLF>
178 Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179 Content-Type: application/msword<CRLF>
180 <CRLF>
181 <Content of file><CRLF>
182
183 */
184
185 // first get the boundary from content-type
186 text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
[12794]187 if (boundary_begin+9 < content_type_end)
188 {
189 // skip over "boundary=" part of string
190 boundary_begin += 9;
191 }
[12513]192 else {
193 // error
[12796]194 cerr << "Error: malformed boundary? '" << content_type << "'" << endl;
[12513]195 return "";
196 }
197 text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198 int boundary_len = boundary.size();
199
[12794]200
[12513]201 text_t argname, argdata, filename, filedata, filetype;
202 bool isfile = false;
203 text_t::iterator data_here = raw_post_data.begin();
204 text_t::iterator data_end = raw_post_data.end();
205 while (data_here != data_end) {
206
207 // get the next available line (including the trailing <CRLF>
208 text_t line = substr(data_here, getline(data_here, data_end, true));
[12794]209
[12513]210 data_here += line.size();
211 text_t::iterator line_begin = line.begin();
212 text_t::iterator line_end = line.end();
213 if (findword(line_begin, line_end, boundary) != line_end) {
214 // we've found a boundary
215 process_post_section(argname, argdata, filename, filedata, filetype,
216 isfile, argstr, fileuploads, gsdlhome);
217
218 } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219 // we've found the the beginning of a new section
220 argname.clear();
221 argdata.clear();
222
223 // get the name of this piece of form data
224 text_t::iterator it = findword(line_begin, line_end, "name=\"");
225 if (it == line_end) break; // error - this shouldn't happen
226 it = findchar(it, line_end, '"');
227 if ((it != line_end) && (it+1 != line_end)) {
228 argname = substr(it+1, findchar(it+1, line_end, '"'));
229 }
230
231 // if this piece of form data contains filename="" it's a file
232 // upload and needs to be treated special
233 it = (findword(line_begin, line_end, "filename=\""));
234 if (it != line_end) {
235 // we've found a file upload
236 isfile = true;
237 it = findchar(it, line_end, '"');
238 if ((it != line_end) && (it+1 != line_end)) {
239 filename = substr(it+1, findchar(it+1, line_end, '"'));
240 }
241
242 // the next line is the content-type of this section
243 line = substr(data_here, getline(data_here, data_end, true));
244 data_here += line.size();
245 line_begin = line.begin();
246 line_end = line.end();
247 it = (findword(line_begin, line_end, "Content-Type: "));
248 if (it != line_end) {
249 filetype = substr(it+14, getline(it, line_end, false));
250 }
251 }
252
253 // eat up the next line as it's just a <CRLF> on it's own
254 data_here += 2;
255
256 } else {
257 if (isfile) filedata += line;
258 else argdata += line;
259 }
[12794]260
[12513]261 }
262
263 // process last section
264 process_post_section(argname, argdata, filename, filedata, filetype,
265 isfile, argstr, fileuploads, gsdlhome);
266
267 return argstr;
268 }
269}
270
[108]271// convert %xx and + to their appropriate equivalents
[3217]272// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
[13461]273// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274// actually was, then this returns utf-8, and needs to_uni on the
275// result to get it back to unicode. If the encoding wasn't utf-8, then the
276// output may be crap. Seems to work for 8 bit encodings.
277// Really, this should be given the encoding, and should always return unicode.
[108]278void decode_cgi_arg (text_t &argstr) {
279 text_t::iterator in = argstr.begin();
280 text_t::iterator out = in;
281 text_t::iterator end = argstr.end();
282
283 while (in != end) {
284 if (*in == '+') *out = ' ';
285
286 else if (*in == '%') {
287 unsigned short c = '%';
[3151]288 ++in;
289 if (in != end) { // this is an encoding...
290 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291 // this assumes a short int is at least 16 bits...
292 ++in;
293 if (in != end)
294 c=hexdigit(*in++) << 12;
295 if (in != end)
296 c+=hexdigit(*in++) << 8;
297 if (in != end)
298 c+=hexdigit(*in++) << 4;
299 if (in != end)
300 c+=hexdigit(*in);
301 /* BAD!! The following assumes the interface is using utf-8. But
302 at this point we don't know what encoding we are using, unless
303 we can parse it out of the string we are currently decoding... */
304 text_t uni=" ";
305 uni[0]=c;
306 text_t utf8=to_utf8(uni);
307 int last_byte=utf8.size()-1;
[9620]308 for (int i=0;i<last_byte;++i)
[3151]309 *out++ = utf8[i];
310 c=utf8[last_byte];
311 } else { // convert %HH to hex value
312 c = hexdigit (*in);
313 ++in;
314 if (in != end && c < 16) { // sanity check on the previous character
315 c = c*16 + hexdigit (*in);
316 }
317 }
[108]318 }
319 *out = c;
320 } else *out = *in;
321
[9620]322 if (in != end) ++in;
323 ++out;
[108]324 }
325
326 // remove the excess characters
327 argstr.erase (out, end);
[13461]328
[108]329}
330
331
332// split up the cgi arguments
[776]333void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
334 cgiargsclass &args) {
[108]335 args.clear();
336
[7432]337 text_t::const_iterator here = argstr.begin();
338 text_t::const_iterator end = argstr.end();
[108]339 text_t key, value;
[11998]340
[108]341 // extract out the key=value pairs
342 while (here != end) {
343 // get the next key and value pair
344 here = getdelimitstr (here, end, '=', key);
345 here = getdelimitstr (here, end, '&', value);
346
347 // convert %xx and + to their appropriate equivalents
[614]348 decode_cgi_arg (value);
[108]349 value.setencoding(1); // other encoding
350 // store this key=value pair
[764]351 if (!key.empty()) {
[2426]352
353 // if arg occurs multiple times (as is the case with multiple
354 // checkboxes using the same name) we'll create a comma separated
355 // list of all the values (this uses a hack that encodes naturally
356 // occurring commas as %2C - values will therefore need to be decoded
357 // again before use) - it should use an array instead
[2417]358 const cgiarginfo *info = argsinfo.getarginfo (key);
[12562]359 if (info==NULL) {
360 // If info is NULL, we can't tell if the arg is multiple value or not
361 // Because we need to have dynamically named arguments multivalued, we
362 // will always assume multiplevalue = true
363 // If the arg is not multi valued, then you need to decode the commas.
364 if (args.getarg(key)==NULL) {
365 args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
366 }
367 else {
368 text_t newvalue = args[key];
[2417]369
[12562]370 newvalue += "," + encode_commas(value);
[22404]371 newvalue.setencoding(1); // other encoding
[12562]372 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
373 }
[2417]374 }
[12562]375 else {
376 if (info->multiplevalue) {
377
378 text_t newvalue = args[key];
379 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
380 newvalue += encode_commas(value);
[22404]381 newvalue.setencoding(1); // other encoding
[12562]382 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
383
384 } else {
385 args.setarg (key, value, cgiarg_t::cgi_arg);
386 }
387 }
[764]388 }
[108]389 }
390}
391
[2426]392text_t encode_commas (const text_t &intext) {
393
394 text_t outtext;
395
396 text_t::const_iterator here = intext.begin ();
397 text_t::const_iterator end = intext.end ();
398
399 while (here != end) {
400 if (*here == ',') outtext += "%2C";
401 else outtext.push_back (*here);
[9620]402 ++here;
[2426]403 }
404 return outtext;
405}
406
407text_t decode_commas (const text_t &intext) {
408
409 text_t outtext;
410
411 text_t::const_iterator here = intext.begin ();
412 text_t::const_iterator end = intext.end ();
413
[22942]414 // for loop
415 int intext_len = intext.size();
416 for(int i = 0; i < intext_len; i++) {
417 if ((i+2)<intext_len) {
418 if(intext[i] == '%' && intext[i+1] == '2'
419 && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
420 i += 2;
421 outtext.push_back(',');
422 continue;
423 }
424 }
425 outtext.push_back (intext[i]);
426 }
427 return outtext;
[2426]428}
429
[13456]430// set utf8 to true if input is in utf-8, otherwise expects input in unicode
431text_t minus_safe (const text_t &intext, bool utf8) {
[607]432
433 text_t outtext;
434
435 text_t::const_iterator here = intext.begin ();
436 text_t::const_iterator end = intext.end ();
437
438 while (here != end) {
[1504]439 if (*here == '-') outtext += "Zz-";
[607]440 else outtext.push_back (*here);
[9620]441 ++here;
[607]442 }
[13456]443 if (utf8) {
444 outtext = cgi_safe_utf8 (outtext);
445 } else {
446 outtext = cgi_safe_unicode (outtext);
447 }
[607]448 return outtext;
449}
450
[13456]451// takes utf-8 input
452text_t cgi_safe_utf8 (const text_t &intext) {
[108]453 text_t outtext;
454
455 text_t::const_iterator here = intext.begin ();
456 text_t::const_iterator end = intext.end ();
457 unsigned short c;
458 text_t ttmp;
459
460 while (here != end) {
461 c = *here;
462 if (((c >= 'a') && (c <= 'z')) ||
463 ((c >= 'A') && (c <= 'Z')) ||
[474]464 ((c >= '0') && (c <= '9')) ||
[13456]465 (c == '%') || (c == '-')) {
[108]466 // alphanumeric character
467 outtext.push_back(c);
468 } else if (c == ' ') {
469 // space
[150]470 outtext.push_back('+');
[13456]471 } else if (c > 255) { // not utf-8 character
472 cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
473 } else {
474 // everything else
475 outtext.push_back('%');
476 c2hex(c, ttmp);
477 outtext += ttmp;
478 }
479
480 ++here;
481 }
482
483 return outtext;
484}
485// takes unicode input
486text_t cgi_safe_unicode (const text_t &intext) {
487 text_t outtext;
488
489 text_t::const_iterator here = intext.begin ();
490 text_t::const_iterator end = intext.end ();
491 unsigned short c;
492 text_t ttmp;
493
494 while (here != end) {
495 c = *here;
496 if (((c >= 'a') && (c <= 'z')) ||
497 ((c >= 'A') && (c <= 'Z')) ||
498 ((c >= '0') && (c <= '9')) ||
499 (c == '%') || (c == '-')) {
500 // alphanumeric character
501 outtext.push_back(c);
502 } else if (c == ' ') {
503 // space
504 outtext.push_back('+');
[13103]505 } else if (c > 127) { // unicode character
[3217]506 unsigned char buf[3]; // up to 3 bytes
507 buf[0]='\0';buf[1]='\0';buf[2]='\0';
508 output_utf8_char(c,buf, buf+2);
509 outtext.push_back('%');
510 c2hex(buf[0], ttmp);
511 outtext += ttmp;
512 outtext.push_back('%');
513 c2hex(buf[1], ttmp);
514 outtext += ttmp;
515 if (buf[2]) {
516 outtext.push_back('%');
517 c2hex(buf[2], ttmp);
518 outtext += ttmp;
519 }
[108]520 } else {
521 // everything else
522 outtext.push_back('%');
523 c2hex(c, ttmp);
524 outtext += ttmp;
525 }
526
[9620]527 ++here;
[108]528 }
529
530 return outtext;
531}
[155]532
533
534
535
536static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
537 text_t::const_iterator last,
538 text_t &argname) {
539 first = getdelimitstr (first, last, '-', argname);
540 return first;
541}
542
543
544// check_save_conf_str checks the configuration string for
545// the saved args and makes sure it does not conflict with
546// the information about the arguments. If an error is encountered
547// it will return false and the program should not produce any
548// output.
549bool check_save_conf_str (const text_t &saveconf,
550 const cgiargsinfoclass &argsinfo,
551 ostream &logout) {
552 outconvertclass text_t2ascii;
553
554 text_tset argsset;
555 text_t::const_iterator saveconfhere = saveconf.begin ();
556 text_t::const_iterator saveconfend = saveconf.end ();
557 text_t argname;
558 const cgiarginfo *info;
559
560 // first check to make sure all saved arguments can be saved
561
562 while (saveconfhere != saveconfend) {
563 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
564
565 if (!argname.empty()) {
566 // save the argument name for later
567 argsset.insert (argname);
568
569 // check the argument
570 info = argsinfo.getarginfo (argname);
571 if (info == NULL) {
572 logout << text_t2ascii << "Error: the cgi argument \"" << argname
573 << "\" is used in the configuration string for the\n"
574 << "saved arguments but does not exist as a valid argument.\n\n";
575 return false;
576 }
577 if (info->savedarginfo == cgiarginfo::mustnot) {
578 logout << text_t2ascii << "Error: the cgi argument \"" << argname
579 << "\" is used in the configuration string for the\n"
580 << "saved arguments but has been specified as an argument whose\n"
581 << "state must not be saved.\n\n";
582 return false;
583 }
584 }
585 }
586
587
588 // next check that all saved arguments that should be saved
589 // are saved
590 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
591 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
592
593 while (argsinfohere != argsinfoend) {
594 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
595 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
596 logout << text_t2ascii << "Error: the cgi argument \""
597 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
598 << "be save but was not listed in the saved arguments.\n\n";
599 return false;
600 }
601
[9620]602 ++argsinfohere;
[155]603 }
604
605 return true; // made it, no clashes
606}
607
608
609// create_save_conf_str will create a configuration string
610// based on the information in argsinfo. This method of configuration
611// is not recomended as small changes can produce large changes in
612// the resulting configuration string (for instance a totally different
613// ordering). Only arguments which "must" be saved are included in
614// the resulting string.
615text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
616 ostream &/*logout*/) {
617 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
618 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
619 text_t saveconf;
620 bool first = true;
621
622 while (argsinfohere != argsinfoend) {
623 // save this argument if it must be saved
624 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
625 if (!first) saveconf.push_back ('-');
626 else first = false;
627 saveconf += (*argsinfohere).second.shortname;
628 }
629
[9620]630 ++argsinfohere;
[155]631 }
632
633 return saveconf;
634}
635
636
637// expand_save_args will expand the saved arguments based
638// on saveconf placing the results in args if they are not
639// already defined. If it encounters an error it will return false
640// and output more information to logout.
641bool expand_save_args (const cgiargsinfoclass &argsinfo,
642 const text_t &saveconf,
643 cgiargsclass &args,
644 ostream &logout) {
645 outconvertclass text_t2ascii;
646
647 text_t *arg_e = args.getarg("e");
648 if (arg_e == NULL) return true; // no compressed arguments
649 if (arg_e->empty()) return true; // no compressed arguments
650
651 text_t argname, argvalue;
652 const cgiarginfo *argnameinfo;
653
654 text_t::const_iterator saveconfhere = saveconf.begin();
655 text_t::const_iterator saveconfend = saveconf.end();
656
[11259]657 text_t::iterator arg_ebegin = arg_e->begin();
[155]658 text_t::iterator arg_eend = arg_e->end();
[11259]659 text_t::iterator arg_ehere = arg_ebegin;
[155]660 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
661 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
662
663 if (!argname.empty()) {
664 // found another entry
665 argnameinfo = argsinfo.getarginfo (argname);
666
667 if (argnameinfo == NULL) {
668 // no information about the argument could be found
669 // we can't keep going because we don't know whether
670 // this argument is a single or multiple character value
671 logout << text_t2ascii << "Error: the cgi argument \"" << argname
672 << "\" was specified as being a compressed argument\n"
673 << "but no information about it could be found within the "
674 << "cgiargsinfoclass.\n";
675 return false;
676
677 } else {
[294]678
[155]679 // found the argument information
680 if (argnameinfo->multiplechar) {
[607]681 text_t::const_iterator sav = arg_ehere;
[155]682 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
[11259]683 if (distance(arg_ebegin, arg_ehere) > 2) {
684 // replace any '-' chars escaped with 'Zz'
685 bool first = true;
686 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
687 if (first) argvalue.clear();
[21997]688
689 // Hey, here's a wild idea. Why don't we check that there is
690 // another hyphen in the cgiarge before we get a pointer to it and
691 // add one. That way we are far less likely to wander off into
692 // random memory merrily parsing arguments that are then lovingly
693 // spewed all over the HTML page returned at the usage logs.
694 text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
695 if (minus_itr == arg_eend)
696 {
697 logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
698 return false;
699 }
700 arg_ehere = minus_itr + 1;
701
[11259]702 while (sav != (arg_ehere-1)) {
703 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
704 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
705 ++sav;
706 }
707 first = false;
[607]708 }
709 }
[294]710 argvalue.setencoding(1); // other encoding
[366]711 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
[155]712 } else {
[366]713 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
[9620]714 ++arg_ehere;
[155]715 }
716 }
717 }
718 }
719
720 return true;
721}
722
723
724// adds the default values for those arguments which have not
725// been specified
726void add_default_args (const cgiargsinfoclass &argsinfo,
727 cgiargsclass &args,
728 ostream &/*logout*/) {
729 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
730 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
731
732 while (argsinfohere != argsinfoend) {
733 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
734 args.setdefaultarg ((*argsinfohere).second.shortname,
[366]735 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
[155]736 }
[9620]737 ++argsinfohere;
[155]738 }
739}
740
[12513]741void add_fileupload_args (const cgiargsinfoclass &argsinfo,
742 cgiargsclass &args,
743 fileupload_tmap &fileuploads,
744 ostream &logout) {
745
746 const cgiarginfo *info = argsinfo.getarginfo("a");
747 fileupload_tmap::const_iterator this_file = fileuploads.begin();
748 fileupload_tmap::const_iterator end_file = fileuploads.end();
749 while (this_file != end_file) {
750 const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
751 if (info != NULL) {
[12794]752
[12513]753 if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
[12794]754
[12513]755 args.setargfile((*this_file).first, (*this_file).second);
756 }
757 }
758 this_file++;
759 }
760}
[155]761
762// compress_save_args will compress the arguments and return
763// them in compressed_args. If an error was encountered
764// compressed_args will be set to to "", an error will be
765// written to logout, and the function will return false.
766bool compress_save_args (const cgiargsinfoclass &argsinfo,
767 const text_t &saveconf,
768 cgiargsclass &args,
769 text_t &compressed_args,
[294]770 outconvertclass &outconvert,
[155]771 ostream &logout) {
772 outconvertclass text_t2ascii;
773
774 compressed_args.clear();
775
776 text_t argname, argvalue;
777 const cgiarginfo *argnameinfo;
778
779 text_t::const_iterator saveconfhere = saveconf.begin();
780 text_t::const_iterator saveconfend = saveconf.end();
781
782 while (saveconfhere != saveconfend) {
783 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
784
785 if (!argname.empty()) {
786 // found another entry
787 argnameinfo = argsinfo.getarginfo (argname);
788
789 if (argnameinfo == NULL) {
790 // no information about the argument could be found
791 // we can't keep going because we don't know whether
792 // this argument is a single or multiple character value
793 logout << text_t2ascii << "Error: the cgi argument \"" << argname
794 << "\" was specified as being a compressed argument\n"
795 << "but no information about it could be found within the "
796 << "cgiargsinfoclass.\n";
797 compressed_args.clear();
798 return false;
799
800 } else {
801 // found the argument information
802 if (argnameinfo->multiplechar) {
[607]803 // multiple character argument -- sort out any '-' chars
[9674]804 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
[13456]805 compressed_args += minus_safe (args[argname], false);
[3670]806 else
[13456]807 compressed_args += minus_safe (outconvert.convert(args[argname]), true);
808
[155]809 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
810
811 } else {
812 // single character argument
813 if (args[argname].size() == 0) {
814 logout << text_t2ascii << "Error: the cgi argument \"" << argname
815 << "\" was specified as being a compressed argument which\n"
816 << "should have a one character value but it was empty.\n\n";
817 compressed_args.clear ();
818 return false;
819
820 } else if (args[argname].size() > 1) {
821 logout << text_t2ascii << "Error: the cgi argument \"" << argname
822 << "\" was specified as being a compressed argument which\n"
823 << "should have a one character value but it had multiple characters.\n\n";
824 compressed_args.clear ();
825 return false;
826 }
827
828 // everything is ok
829 compressed_args += args[argname];
830 }
831 }
832 }
833 }
834
835 return true;
836}
837
838
839// args_tounicode converts any arguments which are not in unicode
840// to unicode using inconvert
841void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
842 cgiargsclass::iterator here = args.begin();
843 cgiargsclass::iterator end = args.end();
844
845 while (here != end) {
[366]846 if ((*here).second.value.getencoding() > 0) {
[21961]847 // Call reset() before converting each argument, to prevent problems when converting the last
848 // argument left the converter in a bad state
849 inconvert.reset();
[366]850 (*here).second.value = inconvert.convert((*here).second.value);
[155]851 }
852
[9620]853 ++here;
[155]854 }
855}
[873]856
857// fcgienv will be loaded with environment name-value pairs
858// if using fastcgi (had to do this as getenv doesn't work
859// with our implementation of fastcgi). if fcgienv is empty
860// we'll simply use getenv
861text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
862 if (fcgienv.empty()) {
863 char *n = name.getcstr();
864 char *v = getenv(n);
[7432]865 delete []n;
[873]866 if (v != NULL) return v;
[7432]867 return g_EmptyText;
[873]868
869 } else return fcgienv[name];
870}
Note: See TracBrowser for help on using the repository browser.