root/main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp @ 26560

Revision 26560, 27.9 KB (checked in by ak19, 8 years ago)

URL encoding forward-slash breaks subcollections, as subcollections are designated with forward slash. Not encoding forward slash anymore.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "fileutil.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "unitool.h" // in mg, for output_utf8_char
31#include <cstdlib>
32#include <time.h>
33
34#if defined(GSDL_USE_OBJECTSPACE)
35#  include <ospace\std\iostream>
36#  include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38#  include <iostream.h>
39#  include <fstream.h>
40#else
41#  include <iostream>
42#  include <fstream>
43#endif
44
45
46static unsigned short hexdigit (unsigned short c) {
47  if (c >= '0' && c <= '9') return (c-'0');
48  if (c >= 'a' && c <= 'f') return (c-'a'+10);
49  if (c >= 'A' && c <= 'F') return (c-'A'+10);
50  return c;
51}
52
53
54static void c2hex (unsigned short c, text_t &t) {
55  t.clear();
56 
57  if (c >= 256) {
58    t = "20"; // ' '
59    return;
60  }
61 
62  unsigned short o1, o2;
63 
64  o1 = (c/16) % 16;
65  o2 = c % 16;
66  if (o1 >= 10) o1 += 'a' - 10;
67  else o1 += '0';
68  if (o2 >= 10) o2 += 'a' - 10;
69  else o2 += '0';
70 
71  t.push_back(o1);
72  t.push_back(o2);
73}
74
75static text_t::iterator getline (text_t::iterator first,
76                 text_t::iterator last,
77                 bool include_crlf) {
78  while (first != last) {
79    if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
80      // found <CRLF>
81      if (include_crlf) first += 2;
82      break;
83    }
84
85    first++;
86  }
87
88  return first;
89}
90
91static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92                  text_t &filetype, bool &isfile, text_t &argstr,
93                  fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95  if (!argname.empty()) {
96
97    if (!isfile) {
98      // argdata includes a trailing <CRLF> that we must remove
99      if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
100    argdata.erase(argdata.end()-2, argdata.end());
101      }
102      if (!argstr.empty()) argstr += "&";
103      argstr += argname + "=" + argdata;
104
105    } else if (!filename.empty()) {
106      // filedata includes a trailing <CRLF> that we must remove
107      if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
108    filedata.erase(filedata.end()-2, filedata.end());
109      }
110
111      // create tmp_name for storing the file on disk, using the current timestamp
112      text_t tmp_name(time(NULL));
113      tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
114
115      char *tmp_name_c = tmp_name.getcstr();
116
117      // write the file data to disk
118      outconvertclass out;
119      ofstream filestream(tmp_name_c, ios::out | ios::binary);
120      filestream << out << filedata;
121      filestream.close();
122      delete tmp_name_c;
123
124      // populate the fields of a fileupload_t and put it in the
125      // fileuploads map
126      fileupload_t fu;
127      // note that filename currently may or may not include the path since
128      // some browsers (e.g. IE) include the path while others
129      // (e.g. mozilla) do not. we should probably remove the path from
130      // this field here to get a consistent value across all browsers.
131      text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132      if (slash != filename.end()) {
133    filename = substr(slash+1, filename.end());
134      }
135      fu.name = filename;
136      fu.type = filetype;
137      // size has yet to be implemented
138      fu.size = filedata.size();
139
140      fu.tmp_name = tmp_name;
141      fileuploads[argname] = fu;
142    }
143  }
144  isfile = false;
145  argname.clear();
146  argdata.clear();
147  filename.clear();
148  filedata.clear();
149  filetype.clear();
150}
151
152// parse data obtained through a CGI POST request
153text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154            fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156  text_t argstr;
157
158  text_t::iterator content_type_begin = content_type.begin();
159  text_t::iterator content_type_end = content_type.end();
160  if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161    // a simple post request
162
163    return raw_post_data;
164
165  } else {
166    // multipart/form data - may contain one or more uploaded files
167
168    /*
169      content_type should look something like the following
170        multipart/form-data; boundary=---------------------------7d411e1a50330
171   
172      while raw_post_data will be as follows
173        -----------------------------7d43e73450330CRLF
174    Content-Disposition: form-data; name="e"<CRLF>
175    <CRLF>
176    d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177    -----------------------------7d43e73450330<CRLF>
178    Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179    Content-Type: application/msword<CRLF>
180    <CRLF>
181    <Content of file><CRLF>
182   
183    */
184
185    // first get the boundary from content-type
186    text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
187    if (boundary_begin+9 < content_type_end)
188      {
189    // skip over "boundary=" part of string
190    boundary_begin += 9;
191      }
192    else {
193      // error
194      cerr << "Error: malformed boundary? '" <<  content_type << "'" << endl;
195      return "";
196    }
197    text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198    int boundary_len = boundary.size();
199
200
201    text_t argname, argdata, filename, filedata, filetype;
202    bool isfile = false;
203    text_t::iterator data_here = raw_post_data.begin();
204    text_t::iterator data_end = raw_post_data.end();
205    while (data_here != data_end) {
206     
207      // get the next available line (including the trailing <CRLF>
208      text_t line = substr(data_here, getline(data_here, data_end, true));
209
210      data_here += line.size();
211      text_t::iterator line_begin = line.begin();
212      text_t::iterator line_end = line.end();
213      if (findword(line_begin, line_end, boundary) != line_end) {
214    // we've found a boundary
215    process_post_section(argname, argdata, filename, filedata, filetype,
216                 isfile, argstr, fileuploads, gsdlhome);
217
218      } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219    // we've found the the beginning of a new section
220    argname.clear();
221    argdata.clear();
222
223    // get the name of this piece of form data
224    text_t::iterator it = findword(line_begin, line_end, "name=\"");
225    if (it == line_end) break; // error - this shouldn't happen
226    it = findchar(it, line_end, '"');
227    if ((it != line_end) && (it+1 != line_end)) {
228      argname = substr(it+1, findchar(it+1, line_end, '"'));
229    }
230
231    // if this piece of form data contains filename="" it's a file
232    // upload and needs to be treated special
233    it = (findword(line_begin, line_end, "filename=\""));
234    if (it != line_end) {
235      // we've found a file upload
236      isfile = true;
237      it = findchar(it, line_end, '"');
238      if ((it != line_end) && (it+1 != line_end)) {
239        filename = substr(it+1, findchar(it+1, line_end, '"'));
240      }
241
242      // the next line is the content-type of this section
243      line = substr(data_here, getline(data_here, data_end, true));
244      data_here += line.size();
245      line_begin = line.begin();
246      line_end = line.end();
247      it = (findword(line_begin, line_end, "Content-Type: "));
248      if (it != line_end) {
249        filetype = substr(it+14, getline(it, line_end, false));
250      }
251    }
252
253    // eat up the next line as it's just a <CRLF> on it's own
254    data_here += 2;
255
256      } else {
257    if (isfile) filedata += line;
258    else argdata += line;
259      }
260
261    }
262
263    // process last section
264    process_post_section(argname, argdata, filename, filedata, filetype,
265             isfile, argstr, fileuploads, gsdlhome);
266
267    return argstr;
268  }
269}
270
271// convert %xx and + to their appropriate equivalents
272// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
273// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274// actually was, then this returns utf-8, and needs to_uni on the
275// result to get it back to unicode. If the encoding wasn't utf-8, then the
276// output may be crap. Seems to work for 8 bit encodings.
277// Really, this should be given the encoding, and should always return unicode.
278void decode_cgi_arg (text_t &argstr) {
279  text_t::iterator in = argstr.begin();
280  text_t::iterator out = in;
281  text_t::iterator end = argstr.end();
282 
283  while (in != end) {
284    if (*in == '+') *out = ' ';
285   
286    else if (*in == '%') {
287      unsigned short c = '%';
288      ++in;
289      if (in != end) { // this is an encoding...
290    if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291      // this assumes a short int is at least 16 bits...
292      ++in; 
293      if (in != end)
294        c=hexdigit(*in++) << 12;
295      if (in != end)
296        c+=hexdigit(*in++) << 8;
297      if (in != end)
298        c+=hexdigit(*in++) << 4;
299      if (in != end)
300        c+=hexdigit(*in);
301      /* BAD!! The following assumes the interface is using utf-8. But
302         at this point we don't know what encoding we are using, unless
303         we can parse it out of the string we are currently decoding... */
304      text_t uni=" ";
305      uni[0]=c;
306      text_t utf8=to_utf8(uni);
307      int last_byte=utf8.size()-1;
308      for (int i=0;i<last_byte;++i)
309        *out++ = utf8[i];
310      c=utf8[last_byte];
311    } else {  // convert %HH to hex value
312      c = hexdigit (*in);
313      ++in;
314      if (in != end && c < 16) { // sanity check on the previous character
315        c = c*16 + hexdigit (*in);
316      }
317    }
318      }
319      *out = c;
320    } else *out = *in;
321   
322    if (in != end) ++in;
323    ++out;
324  }
325 
326  // remove the excess characters
327  argstr.erase (out, end);
328 
329}
330
331// Ensure dangerous tags and chars in cgi-args are URL encoded, to prevent obvious XSS attempts
332// (e.g. c=<script>alert("hacked")</script>) and log poisoning (apache writes unrecognised URLs
333// into log. If the user entered c=garbage <?php ...> in the URL, it gets written out into the
334// apache log and that log file can be included in a local file inclusion (LFI) or
335// remote file include (RFI) attack.
336// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to
337// break out of an html/XML/javascript context.
338void safe_cgi_arg (text_t &argstr) {
339  text_t::iterator in = argstr.begin();
340  text_t out = "";
341  text_t::iterator end = argstr.end();
342 
343  while (in != end) {
344    if (*in == '<') out += "%3C";
345    else if (*in == '>') out += "%3E";
346    else if (*in == '&') out += "%26";
347    else if (*in == '\"') out += "%22";
348    else if (*in == '\'') out += "%27";
349    //else if (*in == '/') out += "%2F"; //unfortunately URL-encoding / breaks subcollections, as this uses /
350    else { // append whatever char is in *in, but as a char, not int
351            //out += *in; // appends as int
352      out += " "; // append placeholder character
353      out[out.size()-1] = *in; // now set location containing placeholder to what's in *in
354    }
355    ++in;
356  }
357 
358  argstr.erase (argstr.begin(), end);
359  argstr += out; 
360}
361
362// split up the cgi arguments
363void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
364             cgiargsclass &args) {
365  args.clear();
366 
367  text_t::const_iterator here = argstr.begin();
368  text_t::const_iterator end = argstr.end();
369  text_t key, value;
370 
371  // extract out the key=value pairs
372  while (here != end) {
373    // get the next key and value pair
374    here = getdelimitstr (here, end, '=', key);
375    here = getdelimitstr (here, end, '&', value);
376   
377    // convert %xx and + to their appropriate equivalents
378    decode_cgi_arg (value);
379
380    safe_cgi_arg(value); // mitigate obvious cross-site scripting hacks in URL cgi-params
381
382    value.setencoding(1); // other encoding
383    // store this key=value pair
384    if (!key.empty()) {
385
386      // if arg occurs multiple times (as is the case with multiple
387      // checkboxes using the same name) we'll create a comma separated
388      // list of all the values (this uses a hack that encodes naturally
389      // occurring commas as %2C - values will therefore need to be decoded
390      // again before use) - it should use an array instead
391      const cgiarginfo *info = argsinfo.getarginfo (key);
392      if (info==NULL) {
393    // If info is NULL, we can't tell if the arg is multiple value or not
394    // Because we need to have dynamically named arguments multivalued, we
395    // will always assume multiplevalue = true
396    // If the arg is not multi valued, then you need to decode the commas.
397    if (args.getarg(key)==NULL) {
398      args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
399    }
400    else {
401      text_t newvalue = args[key];
402
403      newvalue += "," + encode_commas(value);
404      newvalue.setencoding(1); // other encoding
405      args.setarg (key, newvalue, cgiarg_t::cgi_arg);
406    }
407      }
408      else {
409    if (info->multiplevalue) {
410
411      text_t newvalue = args[key];
412      if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
413      newvalue += encode_commas(value);
414      newvalue.setencoding(1); // other encoding
415      args.setarg (key, newvalue, cgiarg_t::cgi_arg);
416
417    } else {
418      args.setarg (key, value, cgiarg_t::cgi_arg);
419    }
420      }
421    }
422  }
423}
424
425text_t encode_commas (const text_t &intext) {
426
427  text_t outtext;
428
429  text_t::const_iterator here = intext.begin ();
430  text_t::const_iterator end = intext.end ();
431
432  while (here != end) {
433    if (*here == ',') outtext += "%2C";
434    else outtext.push_back (*here);
435    ++here;
436  }
437  return outtext;
438}
439
440text_t decode_commas (const text_t &intext) {
441
442  text_t outtext;
443 
444  text_t::const_iterator here = intext.begin ();
445  text_t::const_iterator end = intext.end ();
446 
447  // for loop
448  int intext_len = intext.size();
449  for(int i = 0; i < intext_len; i++) {
450      if ((i+2)<intext_len) {
451          if(intext[i] == '%' && intext[i+1] == '2'
452            && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
453                i += 2;
454                outtext.push_back(',');
455                continue;
456          }
457      }
458      outtext.push_back (intext[i]);
459   }
460  return outtext;
461}
462
463// set utf8 to true if input is in utf-8, otherwise expects input in unicode
464text_t minus_safe (const text_t &intext, bool utf8) {
465
466  text_t outtext;
467
468  text_t::const_iterator here = intext.begin ();
469  text_t::const_iterator end = intext.end ();
470
471  while (here != end) {
472    if (*here == '-') outtext += "Zz-";
473    else outtext.push_back (*here);
474    ++here;
475  }
476  if (utf8) {
477    outtext = cgi_safe_utf8 (outtext);
478  } else {
479    outtext = cgi_safe_unicode (outtext);
480  }
481  return outtext;
482}
483
484// takes utf-8 input
485text_t cgi_safe_utf8 (const text_t &intext) {
486  text_t outtext;
487 
488  text_t::const_iterator here = intext.begin ();
489  text_t::const_iterator end = intext.end ();
490  unsigned short c;
491  text_t ttmp;
492 
493  while (here != end) {
494    c = *here;
495    if (((c >= 'a') && (c <= 'z')) ||
496    ((c >= 'A') && (c <= 'Z')) ||
497    ((c >= '0') && (c <= '9')) ||
498    (c == '%') || (c == '-')) {
499      // alphanumeric character
500      outtext.push_back(c);
501    } else if (c == ' ') {
502      // space
503      outtext.push_back('+');
504    } else if (c > 255) { // not utf-8 character
505      cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
506    } else {
507      // everything else
508      outtext.push_back('%');
509      c2hex(c, ttmp);
510      outtext += ttmp;
511    }
512   
513    ++here;
514  }
515 
516  return outtext;
517}
518// takes unicode input
519text_t cgi_safe_unicode (const text_t &intext) {
520  text_t outtext;
521 
522  text_t::const_iterator here = intext.begin ();
523  text_t::const_iterator end = intext.end ();
524  unsigned short c;
525  text_t ttmp;
526 
527  while (here != end) {
528    c = *here;
529    if (((c >= 'a') && (c <= 'z')) ||
530    ((c >= 'A') && (c <= 'Z')) ||
531    ((c >= '0') && (c <= '9')) ||
532    (c == '%') || (c == '-')) {
533      // alphanumeric character
534      outtext.push_back(c);
535    } else if (c == ' ') {
536      // space
537      outtext.push_back('+');
538    } else if (c > 127) { // unicode character
539      unsigned char buf[3]; // up to 3 bytes
540      buf[0]='\0';buf[1]='\0';buf[2]='\0';
541      output_utf8_char(c,buf, buf+2);
542      outtext.push_back('%');
543      c2hex(buf[0], ttmp);
544      outtext += ttmp;
545      outtext.push_back('%');
546      c2hex(buf[1], ttmp);
547      outtext += ttmp;
548      if (buf[2]) {
549      outtext.push_back('%');
550      c2hex(buf[2], ttmp);
551      outtext += ttmp;
552      }
553    } else {
554      // everything else
555      outtext.push_back('%');
556      c2hex(c, ttmp);
557      outtext += ttmp;
558    }
559   
560    ++here;
561  }
562 
563  return outtext;
564}
565
566
567
568
569static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
570                       text_t::const_iterator last,
571                       text_t &argname) {
572  first = getdelimitstr (first, last, '-', argname);
573  return first;
574}
575
576
577// check_save_conf_str checks the configuration string for
578// the saved args and makes sure it does not conflict with
579// the information about the arguments. If an error is encountered
580// it will return false and the program should not produce any
581// output.
582bool check_save_conf_str (const text_t &saveconf,
583              const cgiargsinfoclass &argsinfo,
584              ostream &logout) {
585  outconvertclass text_t2ascii;
586
587  text_tset argsset;
588  text_t::const_iterator saveconfhere = saveconf.begin ();
589  text_t::const_iterator saveconfend = saveconf.end ();
590  text_t argname;
591  const cgiarginfo *info;
592
593  // first check to make sure all saved arguments can be saved
594 
595  while (saveconfhere != saveconfend) {
596    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
597
598    if (!argname.empty()) {
599      // save the argument name for later
600      argsset.insert (argname);
601
602      // check the argument
603      info =  argsinfo.getarginfo (argname);
604      if (info == NULL) {
605    logout << text_t2ascii << "Error: the cgi argument \"" << argname
606           << "\" is used in the configuration string for the\n"
607           << "saved arguments but does not exist as a valid argument.\n\n";
608    return false;
609      }
610      if (info->savedarginfo == cgiarginfo::mustnot) {
611    logout << text_t2ascii << "Error: the cgi argument \"" << argname
612           << "\" is used in the configuration string for the\n"
613           << "saved arguments but has been specified as an argument whose\n"
614           << "state must not be saved.\n\n";
615    return false;
616      }
617    }
618  }
619
620
621  // next check that all saved arguments that should be saved
622  // are saved
623  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
624  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
625
626  while (argsinfohere != argsinfoend) {
627    if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
628    (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
629      logout << text_t2ascii << "Error: the cgi argument \""
630         << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
631         << "be save but was not listed in the saved arguments.\n\n";
632      return false;
633    }
634
635    ++argsinfohere;
636  }
637 
638  return true; // made it, no clashes
639}
640
641
642// create_save_conf_str will create a configuration string
643// based on the information in argsinfo. This method of configuration
644// is not recomended as small changes can produce large changes in
645// the resulting configuration string (for instance a totally different
646// ordering). Only arguments which "must" be saved are included in
647// the resulting string.
648text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
649                 ostream &/*logout*/) {
650  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
651  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
652  text_t saveconf;
653  bool first = true;
654
655  while (argsinfohere != argsinfoend) {
656    // save this argument if it must be saved
657    if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
658      if (!first) saveconf.push_back ('-');
659      else first = false;
660      saveconf += (*argsinfohere).second.shortname;
661    }
662
663    ++argsinfohere;
664  }
665 
666  return saveconf;
667}
668
669
670// expand_save_args will expand the saved arguments based
671// on saveconf placing the results in args if they are not
672// already defined. If it encounters an error it will return false
673// and output more information to logout.
674bool expand_save_args (const cgiargsinfoclass &argsinfo,
675               const text_t &saveconf,
676               cgiargsclass &args,
677               ostream &logout) {
678  outconvertclass text_t2ascii;
679
680  text_t *arg_e = args.getarg("e");
681  if (arg_e == NULL) return true; // no compressed arguments
682  if (arg_e->empty()) return true; // no compressed arguments
683
684  text_t argname, argvalue;
685  const cgiarginfo *argnameinfo;
686 
687  text_t::const_iterator saveconfhere = saveconf.begin();
688  text_t::const_iterator saveconfend = saveconf.end();
689 
690  text_t::iterator arg_ebegin = arg_e->begin();
691  text_t::iterator arg_eend = arg_e->end();
692  text_t::iterator arg_ehere = arg_ebegin;
693  while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
694    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
695
696    if (!argname.empty()) {
697      // found another entry
698      argnameinfo = argsinfo.getarginfo (argname);
699
700      if (argnameinfo == NULL) {
701    // no information about the argument could be found
702    // we can't keep going because we don't know whether
703    // this argument is a single or multiple character value
704    logout << text_t2ascii << "Error: the cgi argument \"" << argname
705           << "\" was specified as being a compressed argument\n"
706           << "but no information about it could be found within the "
707           << "cgiargsinfoclass.\n";
708    return false;
709
710      } else {
711
712    // found the argument information
713    if (argnameinfo->multiplechar) {
714      text_t::const_iterator sav = arg_ehere;
715      arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
716      if (distance(arg_ebegin, arg_ehere) > 2) {
717        // replace any '-' chars escaped with 'Zz'
718        bool first = true;
719        while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
720          if (first) argvalue.clear();
721
722          // Hey, here's a wild idea. Why don't we check that there is
723          // another hyphen in the cgiarge before we get a pointer to it and
724          // add one. That way we are far less likely to wander off into
725          // random memory merrily parsing arguments that are then lovingly
726          // spewed all over the HTML page returned at the usage logs.
727          text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
728          if (minus_itr == arg_eend)
729          {
730        logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
731        return false;             
732          }
733          arg_ehere = minus_itr + 1;
734
735          while (sav != (arg_ehere-1)) {
736        if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
737            !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
738        ++sav;
739          }
740          first = false;
741        }
742      }
743      argvalue.setencoding(1); // other encoding
744      if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
745    } else {
746      args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
747      ++arg_ehere;
748    }
749      }
750    }
751  }
752
753  return true;
754}
755
756
757// adds the default values for those arguments which have not
758// been specified
759void add_default_args (const cgiargsinfoclass &argsinfo,
760               cgiargsclass &args,
761               ostream &/*logout*/) {
762  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
763  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
764
765  while (argsinfohere != argsinfoend) {
766    if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
767      args.setdefaultarg ((*argsinfohere).second.shortname,
768              (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
769    }
770    ++argsinfohere;
771  }
772}
773
774void add_fileupload_args (const cgiargsinfoclass &argsinfo,
775              cgiargsclass &args,
776              fileupload_tmap &fileuploads,
777              ostream &logout) {
778 
779  const cgiarginfo *info = argsinfo.getarginfo("a");
780  fileupload_tmap::const_iterator this_file = fileuploads.begin();
781  fileupload_tmap::const_iterator end_file = fileuploads.end();
782  while (this_file != end_file) {
783    const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
784    if (info != NULL) {
785
786      if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
787
788    args.setargfile((*this_file).first, (*this_file).second);
789      }
790    }
791    this_file++;
792  }
793}
794
795// compress_save_args will compress the arguments and return
796// them in compressed_args. If an error was encountered
797// compressed_args will be set to to "", an error will be
798// written to logout, and the function will return false.
799bool compress_save_args (const cgiargsinfoclass &argsinfo,
800             const text_t &saveconf,
801             cgiargsclass &args,
802             text_t &compressed_args,
803             outconvertclass &outconvert,
804             ostream &logout) {
805  outconvertclass text_t2ascii;
806
807  compressed_args.clear();
808
809  text_t argname, argvalue;
810  const cgiarginfo *argnameinfo;
811 
812  text_t::const_iterator saveconfhere = saveconf.begin();
813  text_t::const_iterator saveconfend = saveconf.end();
814 
815  while (saveconfhere != saveconfend) {
816    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
817
818    if (!argname.empty()) {
819      // found another entry
820      argnameinfo = argsinfo.getarginfo (argname);
821
822      if (argnameinfo == NULL) {
823    // no information about the argument could be found
824    // we can't keep going because we don't know whether
825    // this argument is a single or multiple character value
826    logout << text_t2ascii << "Error: the cgi argument \"" << argname
827           << "\" was specified as being a compressed argument\n"
828           << "but no information about it could be found within the "
829           << "cgiargsinfoclass.\n";
830    compressed_args.clear();
831    return false;
832
833      } else {
834    // found the argument information
835    if (argnameinfo->multiplechar) {
836      // multiple character argument -- sort out any '-' chars
837      if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
838        compressed_args += minus_safe (args[argname], false);
839      else
840        compressed_args += minus_safe (outconvert.convert(args[argname]), true);
841     
842      if (saveconfhere != saveconfend) compressed_args.push_back ('-');
843
844    } else {
845      // single character argument
846      if (args[argname].size() == 0) {
847        logout << text_t2ascii << "Error: the cgi argument \"" << argname
848           << "\" was specified as being a compressed argument which\n"
849           << "should have a one character value but it was empty.\n\n";
850        compressed_args.clear ();
851        return false;
852
853      } else if (args[argname].size() > 1) {
854        logout << text_t2ascii << "Error: the cgi argument \"" << argname
855           << "\" was specified as being a compressed argument which\n"
856           << "should have a one character value but it had multiple characters.\n\n";
857        compressed_args.clear ();
858        return false;
859      }
860
861      // everything is ok
862      compressed_args += args[argname];
863    }
864      }
865    }
866  }
867
868  return true;
869}
870
871
872// args_tounicode converts any arguments which are not in unicode
873// to unicode using inconvert
874void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
875  cgiargsclass::iterator here = args.begin();
876  cgiargsclass::iterator end = args.end();
877
878  while (here != end) {
879    if ((*here).second.value.getencoding() > 0) {
880      // Call reset() before converting each argument, to prevent problems when converting the last
881      //   argument left the converter in a bad state
882      inconvert.reset();
883      (*here).second.value = inconvert.convert((*here).second.value);
884    }
885   
886    ++here;
887  }
888}
889
890// fcgienv will be loaded with environment name-value pairs
891// if using fastcgi (had to do this as getenv doesn't work
892// with our implementation of fastcgi). if fcgienv is empty
893// we'll simply use getenv
894text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
895  if (fcgienv.empty()) {
896    char *n = name.getcstr();
897    char *v = getenv(n);
898    delete []n;
899    if (v != NULL) return v;
900    return g_EmptyText;
901
902  } else return fcgienv[name];
903}
Note: See TracBrowser for help on using the browser.