source: main/tags/2.70u/gsdl/src/recpt/cgiutils.cpp@ 31708

Last change on this file since 31708 was 11259, checked in by mdewsnip, 18 years ago

Various little bug fixes and improvements (many to get things working with Visual Studio 2005), by Emanuel Dejanu.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 16.4 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "gsdlunicode.h"
28#include "unitool.h" // in mg, for output_utf8_char
29
30static unsigned short hexdigit (unsigned short c) {
31 if (c >= '0' && c <= '9') return (c-'0');
32 if (c >= 'a' && c <= 'f') return (c-'a'+10);
33 if (c >= 'A' && c <= 'F') return (c-'A'+10);
34 return c;
35}
36
37
38static void c2hex (unsigned short c, text_t &t) {
39 t.clear();
40
41 if (c >= 256) {
42 t = "20"; // ' '
43 return;
44 }
45
46 unsigned short o1, o2;
47
48 o1 = (c/16) % 16;
49 o2 = c % 16;
50 if (o1 >= 10) o1 += 'a' - 10;
51 else o1 += '0';
52 if (o2 >= 10) o2 += 'a' - 10;
53 else o2 += '0';
54
55 t.push_back(o1);
56 t.push_back(o2);
57}
58
59// convert %xx and + to their appropriate equivalents
60// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
61void decode_cgi_arg (text_t &argstr) {
62 text_t::iterator in = argstr.begin();
63 text_t::iterator out = in;
64 text_t::iterator end = argstr.end();
65
66 while (in != end) {
67 if (*in == '+') *out = ' ';
68
69 else if (*in == '%') {
70 unsigned short c = '%';
71 ++in;
72 if (in != end) { // this is an encoding...
73 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
74 // this assumes a short int is at least 16 bits...
75 ++in;
76 if (in != end)
77 c=hexdigit(*in++) << 12;
78 if (in != end)
79 c+=hexdigit(*in++) << 8;
80 if (in != end)
81 c+=hexdigit(*in++) << 4;
82 if (in != end)
83 c+=hexdigit(*in);
84 /* BAD!! The following assumes the interface is using utf-8. But
85 at this point we don't know what encoding we are using, unless
86 we can parse it out of the string we are currently decoding... */
87 text_t uni=" ";
88 uni[0]=c;
89 text_t utf8=to_utf8(uni);
90 int last_byte=utf8.size()-1;
91 for (int i=0;i<last_byte;++i)
92 *out++ = utf8[i];
93 c=utf8[last_byte];
94 } else { // convert %HH to hex value
95 c = hexdigit (*in);
96 ++in;
97 if (in != end && c < 16) { // sanity check on the previous character
98 c = c*16 + hexdigit (*in);
99 }
100 }
101 }
102 *out = c;
103 } else *out = *in;
104
105 if (in != end) ++in;
106 ++out;
107 }
108
109 // remove the excess characters
110 argstr.erase (out, end);
111}
112
113
114// split up the cgi arguments
115void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
116 cgiargsclass &args) {
117 args.clear();
118
119 text_t::const_iterator here = argstr.begin();
120 text_t::const_iterator end = argstr.end();
121 text_t key, value;
122
123 // extract out the key=value pairs
124 while (here != end) {
125 // get the next key and value pair
126 here = getdelimitstr (here, end, '=', key);
127 here = getdelimitstr (here, end, '&', value);
128
129 // convert %xx and + to their appropriate equivalents
130 decode_cgi_arg (value);
131
132 value.setencoding(1); // other encoding
133 // store this key=value pair
134 if (!key.empty()) {
135
136 // if arg occurs multiple times (as is the case with multiple
137 // checkboxes using the same name) we'll create a comma separated
138 // list of all the values (this uses a hack that encodes naturally
139 // occurring commas as %2C - values will therefore need to be decoded
140 // again before use) - it should use an array instead
141 const cgiarginfo *info = argsinfo.getarginfo (key);
142 if (info != NULL && info->multiplevalue) {
143 text_t newvalue = args[key];
144 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
145 newvalue += encode_commas(value);
146 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
147
148 } else {
149 args.setarg (key, value, cgiarg_t::cgi_arg);
150 }
151 }
152 }
153}
154
155text_t encode_commas (const text_t &intext) {
156
157 text_t outtext;
158
159 text_t::const_iterator here = intext.begin ();
160 text_t::const_iterator end = intext.end ();
161
162 while (here != end) {
163 if (*here == ',') outtext += "%2C";
164 else outtext.push_back (*here);
165 ++here;
166 }
167 return outtext;
168}
169
170text_t decode_commas (const text_t &intext) {
171
172 text_t outtext;
173
174 text_t::const_iterator here = intext.begin ();
175 text_t::const_iterator end = intext.end ();
176
177 while (here != end) {
178 if ((here+2<end) && *here == '%' && *(here+1) == '2' &&
179 (*(here+2) == 'C' || *(here+2) == 'c')) {
180 here += 2;
181 outtext.push_back(',');
182
183 }else outtext.push_back (*here);
184 ++here;
185 }
186 return outtext;
187}
188
189text_t minus_safe (const text_t &intext) {
190
191 text_t outtext;
192
193 text_t::const_iterator here = intext.begin ();
194 text_t::const_iterator end = intext.end ();
195
196 while (here != end) {
197 if (*here == '-') outtext += "Zz-";
198 else outtext.push_back (*here);
199 ++here;
200 }
201 outtext = cgi_safe (outtext);
202 return outtext;
203}
204
205text_t cgi_safe (const text_t &intext) {
206 text_t outtext;
207
208 text_t::const_iterator here = intext.begin ();
209 text_t::const_iterator end = intext.end ();
210 unsigned short c;
211 text_t ttmp;
212
213 while (here != end) {
214 c = *here;
215 if (((c >= 'a') && (c <= 'z')) ||
216 ((c >= 'A') && (c <= 'Z')) ||
217 ((c >= '0') && (c <= '9')) ||
218 (c == '+') || (c == '%') || (c == '-')) {
219 // alphanumeric character
220 outtext.push_back(c);
221 } else if (c == ' ') {
222 // space
223 outtext.push_back('+');
224 } else if (c > 255) { // unicode character
225 unsigned char buf[3]; // up to 3 bytes
226 buf[0]='\0';buf[1]='\0';buf[2]='\0';
227 output_utf8_char(c,buf, buf+2);
228 outtext.push_back('%');
229 c2hex(buf[0], ttmp);
230 outtext += ttmp;
231 outtext.push_back('%');
232 c2hex(buf[1], ttmp);
233 outtext += ttmp;
234 if (buf[2]) {
235 outtext.push_back('%');
236 c2hex(buf[2], ttmp);
237 outtext += ttmp;
238 }
239 } else {
240 // everything else
241 outtext.push_back('%');
242 c2hex(c, ttmp);
243 outtext += ttmp;
244 }
245
246 ++here;
247 }
248
249 return outtext;
250}
251
252
253
254
255static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
256 text_t::const_iterator last,
257 text_t &argname) {
258 first = getdelimitstr (first, last, '-', argname);
259 return first;
260}
261
262
263// check_save_conf_str checks the configuration string for
264// the saved args and makes sure it does not conflict with
265// the information about the arguments. If an error is encountered
266// it will return false and the program should not produce any
267// output.
268bool check_save_conf_str (const text_t &saveconf,
269 const cgiargsinfoclass &argsinfo,
270 ostream &logout) {
271 outconvertclass text_t2ascii;
272
273 text_tset argsset;
274 text_t::const_iterator saveconfhere = saveconf.begin ();
275 text_t::const_iterator saveconfend = saveconf.end ();
276 text_t argname;
277 const cgiarginfo *info;
278
279 // first check to make sure all saved arguments can be saved
280
281 while (saveconfhere != saveconfend) {
282 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
283
284 if (!argname.empty()) {
285 // save the argument name for later
286 argsset.insert (argname);
287
288 // check the argument
289 info = argsinfo.getarginfo (argname);
290 if (info == NULL) {
291 logout << text_t2ascii << "Error: the cgi argument \"" << argname
292 << "\" is used in the configuration string for the\n"
293 << "saved arguments but does not exist as a valid argument.\n\n";
294 return false;
295 }
296 if (info->savedarginfo == cgiarginfo::mustnot) {
297 logout << text_t2ascii << "Error: the cgi argument \"" << argname
298 << "\" is used in the configuration string for the\n"
299 << "saved arguments but has been specified as an argument whose\n"
300 << "state must not be saved.\n\n";
301 return false;
302 }
303 }
304 }
305
306
307 // next check that all saved arguments that should be saved
308 // are saved
309 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
310 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
311
312 while (argsinfohere != argsinfoend) {
313 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
314 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
315 logout << text_t2ascii << "Error: the cgi argument \""
316 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
317 << "be save but was not listed in the saved arguments.\n\n";
318 return false;
319 }
320
321 ++argsinfohere;
322 }
323
324 return true; // made it, no clashes
325}
326
327
328// create_save_conf_str will create a configuration string
329// based on the information in argsinfo. This method of configuration
330// is not recomended as small changes can produce large changes in
331// the resulting configuration string (for instance a totally different
332// ordering). Only arguments which "must" be saved are included in
333// the resulting string.
334text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
335 ostream &/*logout*/) {
336 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
337 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
338 text_t saveconf;
339 bool first = true;
340
341 while (argsinfohere != argsinfoend) {
342 // save this argument if it must be saved
343 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
344 if (!first) saveconf.push_back ('-');
345 else first = false;
346 saveconf += (*argsinfohere).second.shortname;
347 }
348
349 ++argsinfohere;
350 }
351
352 return saveconf;
353}
354
355
356// expand_save_args will expand the saved arguments based
357// on saveconf placing the results in args if they are not
358// already defined. If it encounters an error it will return false
359// and output more information to logout.
360bool expand_save_args (const cgiargsinfoclass &argsinfo,
361 const text_t &saveconf,
362 cgiargsclass &args,
363 ostream &logout) {
364 outconvertclass text_t2ascii;
365
366 text_t *arg_e = args.getarg("e");
367 if (arg_e == NULL) return true; // no compressed arguments
368 if (arg_e->empty()) return true; // no compressed arguments
369
370 text_t argname, argvalue;
371 const cgiarginfo *argnameinfo;
372
373 text_t::const_iterator saveconfhere = saveconf.begin();
374 text_t::const_iterator saveconfend = saveconf.end();
375
376 text_t::iterator arg_ebegin = arg_e->begin();
377 text_t::iterator arg_eend = arg_e->end();
378 text_t::iterator arg_ehere = arg_ebegin;
379 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
380 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
381
382 if (!argname.empty()) {
383 // found another entry
384 argnameinfo = argsinfo.getarginfo (argname);
385
386 if (argnameinfo == NULL) {
387 // no information about the argument could be found
388 // we can't keep going because we don't know whether
389 // this argument is a single or multiple character value
390 logout << text_t2ascii << "Error: the cgi argument \"" << argname
391 << "\" was specified as being a compressed argument\n"
392 << "but no information about it could be found within the "
393 << "cgiargsinfoclass.\n";
394 return false;
395
396 } else {
397
398 // found the argument information
399 if (argnameinfo->multiplechar) {
400 text_t::const_iterator sav = arg_ehere;
401 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
402 if (distance(arg_ebegin, arg_ehere) > 2) {
403 // replace any '-' chars escaped with 'Zz'
404 bool first = true;
405 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
406 if (first) argvalue.clear();
407 arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1;
408 while (sav != (arg_ehere-1)) {
409 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
410 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
411 ++sav;
412 }
413 first = false;
414 }
415 }
416 argvalue.setencoding(1); // other encoding
417 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
418 } else {
419 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
420 ++arg_ehere;
421 }
422 }
423 }
424 }
425
426 return true;
427}
428
429
430// adds the default values for those arguments which have not
431// been specified
432void add_default_args (const cgiargsinfoclass &argsinfo,
433 cgiargsclass &args,
434 ostream &/*logout*/) {
435 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
436 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
437
438 while (argsinfohere != argsinfoend) {
439 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
440 args.setdefaultarg ((*argsinfohere).second.shortname,
441 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
442 }
443 ++argsinfohere;
444 }
445}
446
447
448// compress_save_args will compress the arguments and return
449// them in compressed_args. If an error was encountered
450// compressed_args will be set to to "", an error will be
451// written to logout, and the function will return false.
452bool compress_save_args (const cgiargsinfoclass &argsinfo,
453 const text_t &saveconf,
454 cgiargsclass &args,
455 text_t &compressed_args,
456 outconvertclass &outconvert,
457 ostream &logout) {
458 outconvertclass text_t2ascii;
459
460 compressed_args.clear();
461
462 text_t argname, argvalue;
463 const cgiarginfo *argnameinfo;
464
465 text_t::const_iterator saveconfhere = saveconf.begin();
466 text_t::const_iterator saveconfend = saveconf.end();
467
468 while (saveconfhere != saveconfend) {
469 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
470
471 if (!argname.empty()) {
472 // found another entry
473 argnameinfo = argsinfo.getarginfo (argname);
474
475 if (argnameinfo == NULL) {
476 // no information about the argument could be found
477 // we can't keep going because we don't know whether
478 // this argument is a single or multiple character value
479 logout << text_t2ascii << "Error: the cgi argument \"" << argname
480 << "\" was specified as being a compressed argument\n"
481 << "but no information about it could be found within the "
482 << "cgiargsinfoclass.\n";
483 compressed_args.clear();
484 return false;
485
486 } else {
487 // found the argument information
488 if (argnameinfo->multiplechar) {
489 // multiple character argument -- sort out any '-' chars
490 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
491 compressed_args += minus_safe (args[argname]);
492 else
493 compressed_args += minus_safe (outconvert.convert(args[argname]));
494
495 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
496
497 } else {
498 // single character argument
499 if (args[argname].size() == 0) {
500 logout << text_t2ascii << "Error: the cgi argument \"" << argname
501 << "\" was specified as being a compressed argument which\n"
502 << "should have a one character value but it was empty.\n\n";
503 compressed_args.clear ();
504 return false;
505
506 } else if (args[argname].size() > 1) {
507 logout << text_t2ascii << "Error: the cgi argument \"" << argname
508 << "\" was specified as being a compressed argument which\n"
509 << "should have a one character value but it had multiple characters.\n\n";
510 compressed_args.clear ();
511 return false;
512 }
513
514 // everything is ok
515 compressed_args += args[argname];
516 }
517 }
518 }
519 }
520
521 return true;
522}
523
524
525// args_tounicode converts any arguments which are not in unicode
526// to unicode using inconvert
527void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
528 cgiargsclass::iterator here = args.begin();
529 cgiargsclass::iterator end = args.end();
530
531 while (here != end) {
532 if ((*here).second.value.getencoding() > 0) {
533 (*here).second.value = inconvert.convert((*here).second.value);
534 }
535
536 ++here;
537 }
538}
539
540// fcgienv will be loaded with environment name-value pairs
541// if using fastcgi (had to do this as getenv doesn't work
542// with our implementation of fastcgi). if fcgienv is empty
543// we'll simply use getenv
544text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
545 if (fcgienv.empty()) {
546 char *n = name.getcstr();
547 char *v = getenv(n);
548 delete []n;
549 if (v != NULL) return v;
550 return g_EmptyText;
551
552 } else return fcgienv[name];
553}
Note: See TracBrowser for help on using the repository browser.