source: trunk/gsdl/src/recpt/cgiutils.cpp@ 9620

Last change on this file since 9620 was 9620, checked in by kjdon, 19 years ago

added some x++ -> ++x changes submitted by Emanuel Dejanu

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 16.3 KB
RevLine 
[108]1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[108]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[108]24 *********************************************************************/
25
26#include "cgiutils.h"
[3151]27#include "gsdlunicode.h"
[3217]28#include "unitool.h" // in mg, for output_utf8_char
[108]29
30static unsigned short hexdigit (unsigned short c) {
31 if (c >= '0' && c <= '9') return (c-'0');
32 if (c >= 'a' && c <= 'f') return (c-'a'+10);
33 if (c >= 'A' && c <= 'F') return (c-'A'+10);
34 return c;
35}
36
37
38static void c2hex (unsigned short c, text_t &t) {
39 t.clear();
40
41 if (c >= 256) {
42 t = "20"; // ' '
43 return;
44 }
45
46 unsigned short o1, o2;
47
48 o1 = (c/16) % 16;
49 o2 = c % 16;
50 if (o1 >= 10) o1 += 'a' - 10;
51 else o1 += '0';
52 if (o2 >= 10) o2 += 'a' - 10;
53 else o2 += '0';
54
55 t.push_back(o1);
56 t.push_back(o2);
57}
58
59// convert %xx and + to their appropriate equivalents
[3217]60// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
[108]61void decode_cgi_arg (text_t &argstr) {
62 text_t::iterator in = argstr.begin();
63 text_t::iterator out = in;
64 text_t::iterator end = argstr.end();
65
66 while (in != end) {
67 if (*in == '+') *out = ' ';
68
69 else if (*in == '%') {
70 unsigned short c = '%';
[3151]71 ++in;
72 if (in != end) { // this is an encoding...
73 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
74 // this assumes a short int is at least 16 bits...
75 ++in;
76 if (in != end)
77 c=hexdigit(*in++) << 12;
78 if (in != end)
79 c+=hexdigit(*in++) << 8;
80 if (in != end)
81 c+=hexdigit(*in++) << 4;
82 if (in != end)
83 c+=hexdigit(*in);
84 /* BAD!! The following assumes the interface is using utf-8. But
85 at this point we don't know what encoding we are using, unless
86 we can parse it out of the string we are currently decoding... */
87 text_t uni=" ";
88 uni[0]=c;
89 text_t utf8=to_utf8(uni);
90 int last_byte=utf8.size()-1;
[9620]91 for (int i=0;i<last_byte;++i)
[3151]92 *out++ = utf8[i];
93 c=utf8[last_byte];
94 } else { // convert %HH to hex value
95 c = hexdigit (*in);
96 ++in;
97 if (in != end && c < 16) { // sanity check on the previous character
98 c = c*16 + hexdigit (*in);
99 }
100 }
[108]101 }
102 *out = c;
103 } else *out = *in;
104
[9620]105 if (in != end) ++in;
106 ++out;
[108]107 }
108
109 // remove the excess characters
110 argstr.erase (out, end);
111}
112
113
114// split up the cgi arguments
[776]115void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
116 cgiargsclass &args) {
[108]117 args.clear();
118
[7432]119 text_t::const_iterator here = argstr.begin();
120 text_t::const_iterator end = argstr.end();
[108]121 text_t key, value;
122
123 // extract out the key=value pairs
124 while (here != end) {
125 // get the next key and value pair
126 here = getdelimitstr (here, end, '=', key);
127 here = getdelimitstr (here, end, '&', value);
128
129 // convert %xx and + to their appropriate equivalents
[614]130 decode_cgi_arg (value);
[607]131
[108]132 value.setencoding(1); // other encoding
133 // store this key=value pair
[764]134 if (!key.empty()) {
[2426]135
136 // if arg occurs multiple times (as is the case with multiple
137 // checkboxes using the same name) we'll create a comma separated
138 // list of all the values (this uses a hack that encodes naturally
139 // occurring commas as %2C - values will therefore need to be decoded
140 // again before use) - it should use an array instead
[2417]141 const cgiarginfo *info = argsinfo.getarginfo (key);
142 if (info != NULL && info->multiplevalue) {
143 text_t newvalue = args[key];
144 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
[2426]145 newvalue += encode_commas(value);
[2417]146 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
147
148 } else {
[764]149 args.setarg (key, value, cgiarg_t::cgi_arg);
[2417]150 }
[764]151 }
[108]152 }
153}
154
[2426]155text_t encode_commas (const text_t &intext) {
156
157 text_t outtext;
158
159 text_t::const_iterator here = intext.begin ();
160 text_t::const_iterator end = intext.end ();
161
162 while (here != end) {
163 if (*here == ',') outtext += "%2C";
164 else outtext.push_back (*here);
[9620]165 ++here;
[2426]166 }
167 return outtext;
168}
169
170text_t decode_commas (const text_t &intext) {
171
172 text_t outtext;
173
174 text_t::const_iterator here = intext.begin ();
175 text_t::const_iterator end = intext.end ();
176
177 while (here != end) {
178 if ((here+2<end) && *here == '%' && *(here+1) == '2' &&
179 (*(here+2) == 'C' || *(here+2) == 'c')) {
180 here += 2;
181 outtext.push_back(',');
182
183 }else outtext.push_back (*here);
[9620]184 ++here;
[2426]185 }
186 return outtext;
187}
188
[607]189text_t minus_safe (const text_t &intext) {
190
191 text_t outtext;
192
193 text_t::const_iterator here = intext.begin ();
194 text_t::const_iterator end = intext.end ();
195
196 while (here != end) {
[1504]197 if (*here == '-') outtext += "Zz-";
[607]198 else outtext.push_back (*here);
[9620]199 ++here;
[607]200 }
[614]201 outtext = cgi_safe (outtext);
[607]202 return outtext;
203}
204
[108]205text_t cgi_safe (const text_t &intext) {
206 text_t outtext;
207
208 text_t::const_iterator here = intext.begin ();
209 text_t::const_iterator end = intext.end ();
210 unsigned short c;
211 text_t ttmp;
212
213 while (here != end) {
214 c = *here;
215 if (((c >= 'a') && (c <= 'z')) ||
216 ((c >= 'A') && (c <= 'Z')) ||
[474]217 ((c >= '0') && (c <= '9')) ||
[614]218 (c == '+') || (c == '%') || (c == '-')) {
[108]219 // alphanumeric character
220 outtext.push_back(c);
221 } else if (c == ' ') {
222 // space
[150]223 outtext.push_back('+');
[3217]224 } else if (c > 255) { // unicode character
225 unsigned char buf[3]; // up to 3 bytes
226 buf[0]='\0';buf[1]='\0';buf[2]='\0';
227 output_utf8_char(c,buf, buf+2);
228 outtext.push_back('%');
229 c2hex(buf[0], ttmp);
230 outtext += ttmp;
231 outtext.push_back('%');
232 c2hex(buf[1], ttmp);
233 outtext += ttmp;
234 if (buf[2]) {
235 outtext.push_back('%');
236 c2hex(buf[2], ttmp);
237 outtext += ttmp;
238 }
[108]239 } else {
240 // everything else
241 outtext.push_back('%');
242 c2hex(c, ttmp);
243 outtext += ttmp;
244 }
245
[9620]246 ++here;
[108]247 }
248
249 return outtext;
250}
[155]251
252
253
254
255static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
256 text_t::const_iterator last,
257 text_t &argname) {
258 first = getdelimitstr (first, last, '-', argname);
259 return first;
260}
261
262
263// check_save_conf_str checks the configuration string for
264// the saved args and makes sure it does not conflict with
265// the information about the arguments. If an error is encountered
266// it will return false and the program should not produce any
267// output.
268bool check_save_conf_str (const text_t &saveconf,
269 const cgiargsinfoclass &argsinfo,
270 ostream &logout) {
271 outconvertclass text_t2ascii;
272
273 text_tset argsset;
274 text_t::const_iterator saveconfhere = saveconf.begin ();
275 text_t::const_iterator saveconfend = saveconf.end ();
276 text_t argname;
277 const cgiarginfo *info;
278
279 // first check to make sure all saved arguments can be saved
280
281 while (saveconfhere != saveconfend) {
282 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
283
284 if (!argname.empty()) {
285 // save the argument name for later
286 argsset.insert (argname);
287
288 // check the argument
289 info = argsinfo.getarginfo (argname);
290 if (info == NULL) {
291 logout << text_t2ascii << "Error: the cgi argument \"" << argname
292 << "\" is used in the configuration string for the\n"
293 << "saved arguments but does not exist as a valid argument.\n\n";
294 return false;
295 }
296 if (info->savedarginfo == cgiarginfo::mustnot) {
297 logout << text_t2ascii << "Error: the cgi argument \"" << argname
298 << "\" is used in the configuration string for the\n"
299 << "saved arguments but has been specified as an argument whose\n"
300 << "state must not be saved.\n\n";
301 return false;
302 }
303 }
304 }
305
306
307 // next check that all saved arguments that should be saved
308 // are saved
309 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
310 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
311
312 while (argsinfohere != argsinfoend) {
313 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
314 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
315 logout << text_t2ascii << "Error: the cgi argument \""
316 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
317 << "be save but was not listed in the saved arguments.\n\n";
318 return false;
319 }
320
[9620]321 ++argsinfohere;
[155]322 }
323
324 return true; // made it, no clashes
325}
326
327
328// create_save_conf_str will create a configuration string
329// based on the information in argsinfo. This method of configuration
330// is not recomended as small changes can produce large changes in
331// the resulting configuration string (for instance a totally different
332// ordering). Only arguments which "must" be saved are included in
333// the resulting string.
334text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
335 ostream &/*logout*/) {
336 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
337 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
338 text_t saveconf;
339 bool first = true;
340
341 while (argsinfohere != argsinfoend) {
342 // save this argument if it must be saved
343 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
344 if (!first) saveconf.push_back ('-');
345 else first = false;
346 saveconf += (*argsinfohere).second.shortname;
347 }
348
[9620]349 ++argsinfohere;
[155]350 }
351
352 return saveconf;
353}
354
355
356// expand_save_args will expand the saved arguments based
357// on saveconf placing the results in args if they are not
358// already defined. If it encounters an error it will return false
359// and output more information to logout.
360bool expand_save_args (const cgiargsinfoclass &argsinfo,
361 const text_t &saveconf,
362 cgiargsclass &args,
363 ostream &logout) {
364 outconvertclass text_t2ascii;
365
366 text_t *arg_e = args.getarg("e");
367 if (arg_e == NULL) return true; // no compressed arguments
368 if (arg_e->empty()) return true; // no compressed arguments
369
370 text_t argname, argvalue;
371 const cgiarginfo *argnameinfo;
372
373 text_t::const_iterator saveconfhere = saveconf.begin();
374 text_t::const_iterator saveconfend = saveconf.end();
375
376 text_t::iterator arg_ehere = arg_e->begin();
377 text_t::iterator arg_eend = arg_e->end();
378 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
379 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
380
381 if (!argname.empty()) {
382 // found another entry
383 argnameinfo = argsinfo.getarginfo (argname);
384
385 if (argnameinfo == NULL) {
386 // no information about the argument could be found
387 // we can't keep going because we don't know whether
388 // this argument is a single or multiple character value
389 logout << text_t2ascii << "Error: the cgi argument \"" << argname
390 << "\" was specified as being a compressed argument\n"
391 << "but no information about it could be found within the "
392 << "cgiargsinfoclass.\n";
393 return false;
394
395 } else {
[294]396
[155]397 // found the argument information
398 if (argnameinfo->multiplechar) {
[607]399 text_t::const_iterator sav = arg_ehere;
[155]400 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
[1504]401 // replace any '-' chars escaped with 'Zz'
[1422]402 bool first = true;
[1504]403 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
[1422]404 if (first) argvalue.clear();
[607]405 arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1;
406 while (sav != (arg_ehere-1)) {
[1504]407 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
408 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
[9620]409 ++sav;
[607]410 }
[1422]411 first = false;
[607]412 }
413
[294]414 argvalue.setencoding(1); // other encoding
[366]415 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
[155]416 } else {
[366]417 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
[9620]418 ++arg_ehere;
[155]419 }
420 }
421 }
422 }
423
424 return true;
425}
426
427
428// adds the default values for those arguments which have not
429// been specified
430void add_default_args (const cgiargsinfoclass &argsinfo,
431 cgiargsclass &args,
432 ostream &/*logout*/) {
433 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
434 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
435
436 while (argsinfohere != argsinfoend) {
437 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
438 args.setdefaultarg ((*argsinfohere).second.shortname,
[366]439 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
[155]440 }
[9620]441 ++argsinfohere;
[155]442 }
443}
444
445
446// compress_save_args will compress the arguments and return
447// them in compressed_args. If an error was encountered
448// compressed_args will be set to to "", an error will be
449// written to logout, and the function will return false.
450bool compress_save_args (const cgiargsinfoclass &argsinfo,
451 const text_t &saveconf,
452 cgiargsclass &args,
453 text_t &compressed_args,
[294]454 outconvertclass &outconvert,
[155]455 ostream &logout) {
456 outconvertclass text_t2ascii;
457
458 compressed_args.clear();
459
460 text_t argname, argvalue;
461 const cgiarginfo *argnameinfo;
462
463 text_t::const_iterator saveconfhere = saveconf.begin();
464 text_t::const_iterator saveconfend = saveconf.end();
465
466 while (saveconfhere != saveconfend) {
467 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
468
469 if (!argname.empty()) {
470 // found another entry
471 argnameinfo = argsinfo.getarginfo (argname);
472
473 if (argnameinfo == NULL) {
474 // no information about the argument could be found
475 // we can't keep going because we don't know whether
476 // this argument is a single or multiple character value
477 logout << text_t2ascii << "Error: the cgi argument \"" << argname
478 << "\" was specified as being a compressed argument\n"
479 << "but no information about it could be found within the "
480 << "cgiargsinfoclass.\n";
481 compressed_args.clear();
482 return false;
483
484 } else {
485 // found the argument information
486 if (argnameinfo->multiplechar) {
[607]487 // multiple character argument -- sort out any '-' chars
[3670]488 if (args["w"]=="utf-16") // browsers don't like \0 in urls...
489 compressed_args += minus_safe (args[argname]);
490 else
491 compressed_args += minus_safe (outconvert.convert(args[argname]));
492
[155]493 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
494
495 } else {
496 // single character argument
497 if (args[argname].size() == 0) {
498 logout << text_t2ascii << "Error: the cgi argument \"" << argname
499 << "\" was specified as being a compressed argument which\n"
500 << "should have a one character value but it was empty.\n\n";
501 compressed_args.clear ();
502 return false;
503
504 } else if (args[argname].size() > 1) {
505 logout << text_t2ascii << "Error: the cgi argument \"" << argname
506 << "\" was specified as being a compressed argument which\n"
507 << "should have a one character value but it had multiple characters.\n\n";
508 compressed_args.clear ();
509 return false;
510 }
511
512 // everything is ok
513 compressed_args += args[argname];
514 }
515 }
516 }
517 }
518
519 return true;
520}
521
522
523// args_tounicode converts any arguments which are not in unicode
524// to unicode using inconvert
525void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
526 cgiargsclass::iterator here = args.begin();
527 cgiargsclass::iterator end = args.end();
528
529 while (here != end) {
[366]530 if ((*here).second.value.getencoding() > 0) {
531 (*here).second.value = inconvert.convert((*here).second.value);
[155]532 }
533
[9620]534 ++here;
[155]535 }
536}
[873]537
538// fcgienv will be loaded with environment name-value pairs
539// if using fastcgi (had to do this as getenv doesn't work
540// with our implementation of fastcgi). if fcgienv is empty
541// we'll simply use getenv
542text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
543 if (fcgienv.empty()) {
544 char *n = name.getcstr();
545 char *v = getenv(n);
[7432]546 delete []n;
[873]547 if (v != NULL) return v;
[7432]548 return g_EmptyText;
[873]549
550 } else return fcgienv[name];
551}
Note: See TracBrowser for help on using the repository browser.