root/main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp @ 22942

Revision 22942, 26.5 KB (checked in by ak19, 9 years ago)

For ticket no 712 again. Tidier code in decode_commas function. Related to previous commit to fix a crash that occurred when using a combination of advanced and fielded searching - in an MGPP collection (server.exe and library.cgi would crash depending on which web server was used): 1. When parsing cgi args, arrays stem and fold contained the URL encodings percent-2-C rather than commas for delimiters and weren't split properly resulting in arrays of unexpected lengths (and values). Need to decode the percent-2-C to commas by calling decode_commas() in cgiutils.cpp before splitting. 2. decode_commas in cgiutils.cpp was performing an illegal iterator operation by attempting to peek PAST the end of the iterator which doesn't seem to be allowed by the STL code. When the iteration really got past the end, the iteration operation causes a problem resulting in a server.exe crash of its own.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "fileutil.h"
28#include "gsdlunicode.h"
29#include "fileutil.h"
30#include "unitool.h" // in mg, for output_utf8_char
31#include <cstdlib>
32#include <time.h>
33
34#if defined(GSDL_USE_OBJECTSPACE)
35#  include <ospace\std\iostream>
36#  include <ospace\std\fstream>
37#elif defined(GSDL_USE_IOS_H)
38#  include <iostream.h>
39#  include <fstream.h>
40#else
41#  include <iostream>
42#  include <fstream>
43#endif
44
45
46static unsigned short hexdigit (unsigned short c) {
47  if (c >= '0' && c <= '9') return (c-'0');
48  if (c >= 'a' && c <= 'f') return (c-'a'+10);
49  if (c >= 'A' && c <= 'F') return (c-'A'+10);
50  return c;
51}
52
53
54static void c2hex (unsigned short c, text_t &t) {
55  t.clear();
56 
57  if (c >= 256) {
58    t = "20"; // ' '
59    return;
60  }
61 
62  unsigned short o1, o2;
63 
64  o1 = (c/16) % 16;
65  o2 = c % 16;
66  if (o1 >= 10) o1 += 'a' - 10;
67  else o1 += '0';
68  if (o2 >= 10) o2 += 'a' - 10;
69  else o2 += '0';
70 
71  t.push_back(o1);
72  t.push_back(o2);
73}
74
75static text_t::iterator getline (text_t::iterator first,
76                 text_t::iterator last,
77                 bool include_crlf) {
78  while (first != last) {
79    if (((first+1) != last) && (*first == 13) && (*(first+1) == 10)) {
80      // found <CRLF>
81      if (include_crlf) first += 2;
82      break;
83    }
84
85    first++;
86  }
87
88  return first;
89}
90
91static void process_post_section (text_t &argname, text_t &argdata, text_t &filename, text_t &filedata,
92                  text_t &filetype, bool &isfile, text_t &argstr,
93                  fileupload_tmap &fileuploads, const text_t &gsdlhome) {
94
95  if (!argname.empty()) {
96
97    if (!isfile) {
98      // argdata includes a trailing <CRLF> that we must remove
99      if ((argdata.size() > 1) && (*(argdata.end()-2) == 13) && (*(argdata.end()-1) == 10)) {
100    argdata.erase(argdata.end()-2, argdata.end());
101      }
102      if (!argstr.empty()) argstr += "&";
103      argstr += argname + "=" + argdata;
104
105    } else if (!filename.empty()) {
106      // filedata includes a trailing <CRLF> that we must remove
107      if ((filedata.size() > 1) && (*(filedata.end()-2) == 13) && (*(filedata.end()-1) == 10)) {
108    filedata.erase(filedata.end()-2, filedata.end());
109      }
110
111      // create tmp_name for storing the file on disk, using the current timestamp
112      text_t tmp_name(time(NULL));
113      tmp_name = filename_cat(gsdlhome, "tmp", tmp_name);
114
115      char *tmp_name_c = tmp_name.getcstr();
116
117      // write the file data to disk
118      outconvertclass out;
119      ofstream filestream(tmp_name_c, ios::out | ios::binary);
120      filestream << out << filedata;
121      filestream.close();
122      delete tmp_name_c;
123
124      // populate the fields of a fileupload_t and put it in the
125      // fileuploads map
126      fileupload_t fu;
127      // note that filename currently may or may not include the path since
128      // some browsers (e.g. IE) include the path while others
129      // (e.g. mozilla) do not. we should probably remove the path from
130      // this field here to get a consistent value across all browsers.
131      text_t::iterator slash = findlastchar(filename.begin(), filename.end(), '\\');
132      if (slash != filename.end()) {
133    filename = substr(slash+1, filename.end());
134      }
135      fu.name = filename;
136      fu.type = filetype;
137      // size has yet to be implemented
138      fu.size = filedata.size();
139
140      fu.tmp_name = tmp_name;
141      fileuploads[argname] = fu;
142    }
143  }
144  isfile = false;
145  argname.clear();
146  argdata.clear();
147  filename.clear();
148  filedata.clear();
149  filetype.clear();
150}
151
152// parse data obtained through a CGI POST request
153text_t parse_post_data (text_t &content_type, text_t &raw_post_data,
154            fileupload_tmap &fileuploads, const text_t &gsdlhome) {
155
156  text_t argstr;
157
158  text_t::iterator content_type_begin = content_type.begin();
159  text_t::iterator content_type_end = content_type.end();
160  if (findword(content_type_begin, content_type_end, "multipart/form-data") == content_type_end) {
161    // a simple post request
162
163    return raw_post_data;
164
165  } else {
166    // multipart/form data - may contain one or more uploaded files
167
168    /*
169      content_type should look something like the following
170        multipart/form-data; boundary=---------------------------7d411e1a50330
171   
172      while raw_post_data will be as follows
173        -----------------------------7d43e73450330CRLF
174    Content-Disposition: form-data; name="e"<CRLF>
175    <CRLF>
176    d-0testss--1-0-00---4----0--0-110--1en-Zz-1---10-about-0--00031-001utfZz-8-0<CRLF>
177    -----------------------------7d43e73450330<CRLF>
178    Content-Disposition: form-data; name="afile"; filename="C:\somedoc.doc"<CRLF>
179    Content-Type: application/msword<CRLF>
180    <CRLF>
181    <Content of file><CRLF>
182   
183    */
184
185    // first get the boundary from content-type
186    text_t::iterator boundary_begin = findword(content_type_begin, content_type_end, "boundary=");
187    if (boundary_begin+9 < content_type_end)
188      {
189    // skip over "boundary=" part of string
190    boundary_begin += 9;
191      }
192    else {
193      // error
194      cerr << "Error: malformed boundary? '" <<  content_type << "'" << endl;
195      return "";
196    }
197    text_t boundary = substr(boundary_begin, getline(boundary_begin, content_type_end, false));
198    int boundary_len = boundary.size();
199
200
201    text_t argname, argdata, filename, filedata, filetype;
202    bool isfile = false;
203    text_t::iterator data_here = raw_post_data.begin();
204    text_t::iterator data_end = raw_post_data.end();
205    while (data_here != data_end) {
206     
207      // get the next available line (including the trailing <CRLF>
208      text_t line = substr(data_here, getline(data_here, data_end, true));
209
210      data_here += line.size();
211      text_t::iterator line_begin = line.begin();
212      text_t::iterator line_end = line.end();
213      if (findword(line_begin, line_end, boundary) != line_end) {
214    // we've found a boundary
215    process_post_section(argname, argdata, filename, filedata, filetype,
216                 isfile, argstr, fileuploads, gsdlhome);
217
218      } else if (findword(line_begin, line_end, "Content-Disposition: form-data") != line_end) {
219    // we've found the the beginning of a new section
220    argname.clear();
221    argdata.clear();
222
223    // get the name of this piece of form data
224    text_t::iterator it = findword(line_begin, line_end, "name=\"");
225    if (it == line_end) break; // error - this shouldn't happen
226    it = findchar(it, line_end, '"');
227    if ((it != line_end) && (it+1 != line_end)) {
228      argname = substr(it+1, findchar(it+1, line_end, '"'));
229    }
230
231    // if this piece of form data contains filename="" it's a file
232    // upload and needs to be treated special
233    it = (findword(line_begin, line_end, "filename=\""));
234    if (it != line_end) {
235      // we've found a file upload
236      isfile = true;
237      it = findchar(it, line_end, '"');
238      if ((it != line_end) && (it+1 != line_end)) {
239        filename = substr(it+1, findchar(it+1, line_end, '"'));
240      }
241
242      // the next line is the content-type of this section
243      line = substr(data_here, getline(data_here, data_end, true));
244      data_here += line.size();
245      line_begin = line.begin();
246      line_end = line.end();
247      it = (findword(line_begin, line_end, "Content-Type: "));
248      if (it != line_end) {
249        filetype = substr(it+14, getline(it, line_end, false));
250      }
251    }
252
253    // eat up the next line as it's just a <CRLF> on it's own
254    data_here += 2;
255
256      } else {
257    if (isfile) filedata += line;
258    else argdata += line;
259      }
260
261    }
262
263    // process last section
264    process_post_section(argname, argdata, filename, filedata, filetype,
265             isfile, argstr, fileuploads, gsdlhome);
266
267    return argstr;
268  }
269}
270
271// convert %xx and + to their appropriate equivalents
272// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
273// NOTE: this method is crap. It assumes the input encoding is utf-8. If it
274// actually was, then this returns utf-8, and needs to_uni on the
275// result to get it back to unicode. If the encoding wasn't utf-8, then the
276// output may be crap. Seems to work for 8 bit encodings.
277// Really, this should be given the encoding, and should always return unicode.
278void decode_cgi_arg (text_t &argstr) {
279  text_t::iterator in = argstr.begin();
280  text_t::iterator out = in;
281  text_t::iterator end = argstr.end();
282 
283  while (in != end) {
284    if (*in == '+') *out = ' ';
285   
286    else if (*in == '%') {
287      unsigned short c = '%';
288      ++in;
289      if (in != end) { // this is an encoding...
290    if (*in == 'u') { // convert %uHHHH to unicode then current encoding
291      // this assumes a short int is at least 16 bits...
292      ++in; 
293      if (in != end)
294        c=hexdigit(*in++) << 12;
295      if (in != end)
296        c+=hexdigit(*in++) << 8;
297      if (in != end)
298        c+=hexdigit(*in++) << 4;
299      if (in != end)
300        c+=hexdigit(*in);
301      /* BAD!! The following assumes the interface is using utf-8. But
302         at this point we don't know what encoding we are using, unless
303         we can parse it out of the string we are currently decoding... */
304      text_t uni=" ";
305      uni[0]=c;
306      text_t utf8=to_utf8(uni);
307      int last_byte=utf8.size()-1;
308      for (int i=0;i<last_byte;++i)
309        *out++ = utf8[i];
310      c=utf8[last_byte];
311    } else {  // convert %HH to hex value
312      c = hexdigit (*in);
313      ++in;
314      if (in != end && c < 16) { // sanity check on the previous character
315        c = c*16 + hexdigit (*in);
316      }
317    }
318      }
319      *out = c;
320    } else *out = *in;
321   
322    if (in != end) ++in;
323    ++out;
324  }
325 
326  // remove the excess characters
327  argstr.erase (out, end);
328 
329}
330
331
332// split up the cgi arguments
333void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
334             cgiargsclass &args) {
335  args.clear();
336 
337  text_t::const_iterator here = argstr.begin();
338  text_t::const_iterator end = argstr.end();
339  text_t key, value;
340 
341  // extract out the key=value pairs
342  while (here != end) {
343    // get the next key and value pair
344    here = getdelimitstr (here, end, '=', key);
345    here = getdelimitstr (here, end, '&', value);
346   
347    // convert %xx and + to their appropriate equivalents
348    decode_cgi_arg (value);
349    value.setencoding(1); // other encoding
350    // store this key=value pair
351    if (!key.empty()) {
352
353      // if arg occurs multiple times (as is the case with multiple
354      // checkboxes using the same name) we'll create a comma separated
355      // list of all the values (this uses a hack that encodes naturally
356      // occurring commas as %2C - values will therefore need to be decoded
357      // again before use) - it should use an array instead
358      const cgiarginfo *info = argsinfo.getarginfo (key);
359      if (info==NULL) {
360    // If info is NULL, we can't tell if the arg is multiple value or not
361    // Because we need to have dynamically named arguments multivalued, we
362    // will always assume multiplevalue = true
363    // If the arg is not multi valued, then you need to decode the commas.
364    if (args.getarg(key)==NULL) {
365      args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
366    }
367    else {
368      text_t newvalue = args[key];
369
370      newvalue += "," + encode_commas(value);
371      newvalue.setencoding(1); // other encoding
372      args.setarg (key, newvalue, cgiarg_t::cgi_arg);
373    }
374      }
375      else {
376    if (info->multiplevalue) {
377
378      text_t newvalue = args[key];
379      if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
380      newvalue += encode_commas(value);
381      newvalue.setencoding(1); // other encoding
382      args.setarg (key, newvalue, cgiarg_t::cgi_arg);
383
384    } else {
385      args.setarg (key, value, cgiarg_t::cgi_arg);
386    }
387      }
388    }
389  }
390}
391
392text_t encode_commas (const text_t &intext) {
393
394  text_t outtext;
395
396  text_t::const_iterator here = intext.begin ();
397  text_t::const_iterator end = intext.end ();
398
399  while (here != end) {
400    if (*here == ',') outtext += "%2C";
401    else outtext.push_back (*here);
402    ++here;
403  }
404  return outtext;
405}
406
407text_t decode_commas (const text_t &intext) {
408
409  text_t outtext;
410 
411  text_t::const_iterator here = intext.begin ();
412  text_t::const_iterator end = intext.end ();
413 
414  // for loop
415  int intext_len = intext.size();
416  for(int i = 0; i < intext_len; i++) {
417      if ((i+2)<intext_len) {
418          if(intext[i] == '%' && intext[i+1] == '2'
419            && (intext[i+2] == 'C' || intext[i+2] == 'c')) {
420                i += 2;
421                outtext.push_back(',');
422                continue;
423          }
424      }
425      outtext.push_back (intext[i]);
426   }
427  return outtext;
428}
429
430// set utf8 to true if input is in utf-8, otherwise expects input in unicode
431text_t minus_safe (const text_t &intext, bool utf8) {
432
433  text_t outtext;
434
435  text_t::const_iterator here = intext.begin ();
436  text_t::const_iterator end = intext.end ();
437
438  while (here != end) {
439    if (*here == '-') outtext += "Zz-";
440    else outtext.push_back (*here);
441    ++here;
442  }
443  if (utf8) {
444    outtext = cgi_safe_utf8 (outtext);
445  } else {
446    outtext = cgi_safe_unicode (outtext);
447  }
448  return outtext;
449}
450
451// takes utf-8 input
452text_t cgi_safe_utf8 (const text_t &intext) {
453  text_t outtext;
454 
455  text_t::const_iterator here = intext.begin ();
456  text_t::const_iterator end = intext.end ();
457  unsigned short c;
458  text_t ttmp;
459 
460  while (here != end) {
461    c = *here;
462    if (((c >= 'a') && (c <= 'z')) ||
463    ((c >= 'A') && (c <= 'Z')) ||
464    ((c >= '0') && (c <= '9')) ||
465    (c == '%') || (c == '-')) {
466      // alphanumeric character
467      outtext.push_back(c);
468    } else if (c == ' ') {
469      // space
470      outtext.push_back('+');
471    } else if (c > 255) { // not utf-8 character
472      cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
473    } else {
474      // everything else
475      outtext.push_back('%');
476      c2hex(c, ttmp);
477      outtext += ttmp;
478    }
479   
480    ++here;
481  }
482 
483  return outtext;
484}
485// takes unicode input
486text_t cgi_safe_unicode (const text_t &intext) {
487  text_t outtext;
488 
489  text_t::const_iterator here = intext.begin ();
490  text_t::const_iterator end = intext.end ();
491  unsigned short c;
492  text_t ttmp;
493 
494  while (here != end) {
495    c = *here;
496    if (((c >= 'a') && (c <= 'z')) ||
497    ((c >= 'A') && (c <= 'Z')) ||
498    ((c >= '0') && (c <= '9')) ||
499    (c == '%') || (c == '-')) {
500      // alphanumeric character
501      outtext.push_back(c);
502    } else if (c == ' ') {
503      // space
504      outtext.push_back('+');
505    } else if (c > 127) { // unicode character
506      unsigned char buf[3]; // up to 3 bytes
507      buf[0]='\0';buf[1]='\0';buf[2]='\0';
508      output_utf8_char(c,buf, buf+2);
509      outtext.push_back('%');
510      c2hex(buf[0], ttmp);
511      outtext += ttmp;
512      outtext.push_back('%');
513      c2hex(buf[1], ttmp);
514      outtext += ttmp;
515      if (buf[2]) {
516      outtext.push_back('%');
517      c2hex(buf[2], ttmp);
518      outtext += ttmp;
519      }
520    } else {
521      // everything else
522      outtext.push_back('%');
523      c2hex(c, ttmp);
524      outtext += ttmp;
525    }
526   
527    ++here;
528  }
529 
530  return outtext;
531}
532
533
534
535
536static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
537                       text_t::const_iterator last,
538                       text_t &argname) {
539  first = getdelimitstr (first, last, '-', argname);
540  return first;
541}
542
543
544// check_save_conf_str checks the configuration string for
545// the saved args and makes sure it does not conflict with
546// the information about the arguments. If an error is encountered
547// it will return false and the program should not produce any
548// output.
549bool check_save_conf_str (const text_t &saveconf,
550              const cgiargsinfoclass &argsinfo,
551              ostream &logout) {
552  outconvertclass text_t2ascii;
553
554  text_tset argsset;
555  text_t::const_iterator saveconfhere = saveconf.begin ();
556  text_t::const_iterator saveconfend = saveconf.end ();
557  text_t argname;
558  const cgiarginfo *info;
559
560  // first check to make sure all saved arguments can be saved
561 
562  while (saveconfhere != saveconfend) {
563    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
564
565    if (!argname.empty()) {
566      // save the argument name for later
567      argsset.insert (argname);
568
569      // check the argument
570      info =  argsinfo.getarginfo (argname);
571      if (info == NULL) {
572    logout << text_t2ascii << "Error: the cgi argument \"" << argname
573           << "\" is used in the configuration string for the\n"
574           << "saved arguments but does not exist as a valid argument.\n\n";
575    return false;
576      }
577      if (info->savedarginfo == cgiarginfo::mustnot) {
578    logout << text_t2ascii << "Error: the cgi argument \"" << argname
579           << "\" is used in the configuration string for the\n"
580           << "saved arguments but has been specified as an argument whose\n"
581           << "state must not be saved.\n\n";
582    return false;
583      }
584    }
585  }
586
587
588  // next check that all saved arguments that should be saved
589  // are saved
590  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
591  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
592
593  while (argsinfohere != argsinfoend) {
594    if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
595    (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
596      logout << text_t2ascii << "Error: the cgi argument \""
597         << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
598         << "be save but was not listed in the saved arguments.\n\n";
599      return false;
600    }
601
602    ++argsinfohere;
603  }
604 
605  return true; // made it, no clashes
606}
607
608
609// create_save_conf_str will create a configuration string
610// based on the information in argsinfo. This method of configuration
611// is not recomended as small changes can produce large changes in
612// the resulting configuration string (for instance a totally different
613// ordering). Only arguments which "must" be saved are included in
614// the resulting string.
615text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
616                 ostream &/*logout*/) {
617  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
618  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
619  text_t saveconf;
620  bool first = true;
621
622  while (argsinfohere != argsinfoend) {
623    // save this argument if it must be saved
624    if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
625      if (!first) saveconf.push_back ('-');
626      else first = false;
627      saveconf += (*argsinfohere).second.shortname;
628    }
629
630    ++argsinfohere;
631  }
632 
633  return saveconf;
634}
635
636
637// expand_save_args will expand the saved arguments based
638// on saveconf placing the results in args if they are not
639// already defined. If it encounters an error it will return false
640// and output more information to logout.
641bool expand_save_args (const cgiargsinfoclass &argsinfo,
642               const text_t &saveconf,
643               cgiargsclass &args,
644               ostream &logout) {
645  outconvertclass text_t2ascii;
646
647  text_t *arg_e = args.getarg("e");
648  if (arg_e == NULL) return true; // no compressed arguments
649  if (arg_e->empty()) return true; // no compressed arguments
650
651  text_t argname, argvalue;
652  const cgiarginfo *argnameinfo;
653 
654  text_t::const_iterator saveconfhere = saveconf.begin();
655  text_t::const_iterator saveconfend = saveconf.end();
656 
657  text_t::iterator arg_ebegin = arg_e->begin();
658  text_t::iterator arg_eend = arg_e->end();
659  text_t::iterator arg_ehere = arg_ebegin;
660  while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
661    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
662
663    if (!argname.empty()) {
664      // found another entry
665      argnameinfo = argsinfo.getarginfo (argname);
666
667      if (argnameinfo == NULL) {
668    // no information about the argument could be found
669    // we can't keep going because we don't know whether
670    // this argument is a single or multiple character value
671    logout << text_t2ascii << "Error: the cgi argument \"" << argname
672           << "\" was specified as being a compressed argument\n"
673           << "but no information about it could be found within the "
674           << "cgiargsinfoclass.\n";
675    return false;
676
677      } else {
678
679    // found the argument information
680    if (argnameinfo->multiplechar) {
681      text_t::const_iterator sav = arg_ehere;
682      arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
683      if (distance(arg_ebegin, arg_ehere) > 2) {
684        // replace any '-' chars escaped with 'Zz'
685        bool first = true;
686        while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
687          if (first) argvalue.clear();
688
689          // Hey, here's a wild idea. Why don't we check that there is
690          // another hyphen in the cgiarge before we get a pointer to it and
691          // add one. That way we are far less likely to wander off into
692          // random memory merrily parsing arguments that are then lovingly
693          // spewed all over the HTML page returned at the usage logs.
694          text_t::iterator minus_itr = findchar (arg_ehere, arg_eend, '-');
695          if (minus_itr == arg_eend)
696          {
697        logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument but we have run out of cgiarge to decompress!\n";
698        return false;             
699          }
700          arg_ehere = minus_itr + 1;
701
702          while (sav != (arg_ehere-1)) {
703        if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
704            !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
705        ++sav;
706          }
707          first = false;
708        }
709      }
710      argvalue.setencoding(1); // other encoding
711      if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
712    } else {
713      args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
714      ++arg_ehere;
715    }
716      }
717    }
718  }
719
720  return true;
721}
722
723
724// adds the default values for those arguments which have not
725// been specified
726void add_default_args (const cgiargsinfoclass &argsinfo,
727               cgiargsclass &args,
728               ostream &/*logout*/) {
729  cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
730  cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
731
732  while (argsinfohere != argsinfoend) {
733    if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
734      args.setdefaultarg ((*argsinfohere).second.shortname,
735              (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
736    }
737    ++argsinfohere;
738  }
739}
740
741void add_fileupload_args (const cgiargsinfoclass &argsinfo,
742              cgiargsclass &args,
743              fileupload_tmap &fileuploads,
744              ostream &logout) {
745 
746  const cgiarginfo *info = argsinfo.getarginfo("a");
747  fileupload_tmap::const_iterator this_file = fileuploads.begin();
748  fileupload_tmap::const_iterator end_file = fileuploads.end();
749  while (this_file != end_file) {
750    const cgiarginfo *info = argsinfo.getarginfo((*this_file).first);
751    if (info != NULL) {
752
753      if ((*info).fileupload && (file_exists((*this_file).second.tmp_name))) {
754
755    args.setargfile((*this_file).first, (*this_file).second);
756      }
757    }
758    this_file++;
759  }
760}
761
762// compress_save_args will compress the arguments and return
763// them in compressed_args. If an error was encountered
764// compressed_args will be set to to "", an error will be
765// written to logout, and the function will return false.
766bool compress_save_args (const cgiargsinfoclass &argsinfo,
767             const text_t &saveconf,
768             cgiargsclass &args,
769             text_t &compressed_args,
770             outconvertclass &outconvert,
771             ostream &logout) {
772  outconvertclass text_t2ascii;
773
774  compressed_args.clear();
775
776  text_t argname, argvalue;
777  const cgiarginfo *argnameinfo;
778 
779  text_t::const_iterator saveconfhere = saveconf.begin();
780  text_t::const_iterator saveconfend = saveconf.end();
781 
782  while (saveconfhere != saveconfend) {
783    saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
784
785    if (!argname.empty()) {
786      // found another entry
787      argnameinfo = argsinfo.getarginfo (argname);
788
789      if (argnameinfo == NULL) {
790    // no information about the argument could be found
791    // we can't keep going because we don't know whether
792    // this argument is a single or multiple character value
793    logout << text_t2ascii << "Error: the cgi argument \"" << argname
794           << "\" was specified as being a compressed argument\n"
795           << "but no information about it could be found within the "
796           << "cgiargsinfoclass.\n";
797    compressed_args.clear();
798    return false;
799
800      } else {
801    // found the argument information
802    if (argnameinfo->multiplechar) {
803      // multiple character argument -- sort out any '-' chars
804      if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
805        compressed_args += minus_safe (args[argname], false);
806      else
807        compressed_args += minus_safe (outconvert.convert(args[argname]), true);
808     
809      if (saveconfhere != saveconfend) compressed_args.push_back ('-');
810
811    } else {
812      // single character argument
813      if (args[argname].size() == 0) {
814        logout << text_t2ascii << "Error: the cgi argument \"" << argname
815           << "\" was specified as being a compressed argument which\n"
816           << "should have a one character value but it was empty.\n\n";
817        compressed_args.clear ();
818        return false;
819
820      } else if (args[argname].size() > 1) {
821        logout << text_t2ascii << "Error: the cgi argument \"" << argname
822           << "\" was specified as being a compressed argument which\n"
823           << "should have a one character value but it had multiple characters.\n\n";
824        compressed_args.clear ();
825        return false;
826      }
827
828      // everything is ok
829      compressed_args += args[argname];
830    }
831      }
832    }
833  }
834
835  return true;
836}
837
838
839// args_tounicode converts any arguments which are not in unicode
840// to unicode using inconvert
841void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
842  cgiargsclass::iterator here = args.begin();
843  cgiargsclass::iterator end = args.end();
844
845  while (here != end) {
846    if ((*here).second.value.getencoding() > 0) {
847      // Call reset() before converting each argument, to prevent problems when converting the last
848      //   argument left the converter in a bad state
849      inconvert.reset();
850      (*here).second.value = inconvert.convert((*here).second.value);
851    }
852   
853    ++here;
854  }
855}
856
857// fcgienv will be loaded with environment name-value pairs
858// if using fastcgi (had to do this as getenv doesn't work
859// with our implementation of fastcgi). if fcgienv is empty
860// we'll simply use getenv
861text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
862  if (fcgienv.empty()) {
863    char *n = name.getcstr();
864    char *v = getenv(n);
865    delete []n;
866    if (v != NULL) return v;
867    return g_EmptyText;
868
869  } else return fcgienv[name];
870}
Note: See TracBrowser for help on using the browser.