source: trunk/gsdl/src/recpt/cgiutils.cpp@ 12485

Last change on this file since 12485 was 11998, checked in by davidb, 18 years ago

First cut at 'The Depositor' -- Greenstone support for institutional
repositories

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "gsdlunicode.h"
28#include "fileutil.h"
29#include "unitool.h" // in mg, for output_utf8_char
30
31
32static unsigned short hexdigit (unsigned short c) {
33 if (c >= '0' && c <= '9') return (c-'0');
34 if (c >= 'a' && c <= 'f') return (c-'a'+10);
35 if (c >= 'A' && c <= 'F') return (c-'A'+10);
36 return c;
37}
38
39
40static void c2hex (unsigned short c, text_t &t) {
41 t.clear();
42
43 if (c >= 256) {
44 t = "20"; // ' '
45 return;
46 }
47
48 unsigned short o1, o2;
49
50 o1 = (c/16) % 16;
51 o2 = c % 16;
52 if (o1 >= 10) o1 += 'a' - 10;
53 else o1 += '0';
54 if (o2 >= 10) o2 += 'a' - 10;
55 else o2 += '0';
56
57 t.push_back(o1);
58 t.push_back(o2);
59}
60
61// convert %xx and + to their appropriate equivalents
62// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
63void decode_cgi_arg (text_t &argstr) {
64 text_t::iterator in = argstr.begin();
65 text_t::iterator out = in;
66 text_t::iterator end = argstr.end();
67
68 while (in != end) {
69 if (*in == '+') *out = ' ';
70
71 else if (*in == '%') {
72 unsigned short c = '%';
73 ++in;
74 if (in != end) { // this is an encoding...
75 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
76 // this assumes a short int is at least 16 bits...
77 ++in;
78 if (in != end)
79 c=hexdigit(*in++) << 12;
80 if (in != end)
81 c+=hexdigit(*in++) << 8;
82 if (in != end)
83 c+=hexdigit(*in++) << 4;
84 if (in != end)
85 c+=hexdigit(*in);
86 /* BAD!! The following assumes the interface is using utf-8. But
87 at this point we don't know what encoding we are using, unless
88 we can parse it out of the string we are currently decoding... */
89 text_t uni=" ";
90 uni[0]=c;
91 text_t utf8=to_utf8(uni);
92 int last_byte=utf8.size()-1;
93 for (int i=0;i<last_byte;++i)
94 *out++ = utf8[i];
95 c=utf8[last_byte];
96 } else { // convert %HH to hex value
97 c = hexdigit (*in);
98 ++in;
99 if (in != end && c < 16) { // sanity check on the previous character
100 c = c*16 + hexdigit (*in);
101 }
102 }
103 }
104 *out = c;
105 } else *out = *in;
106
107 if (in != end) ++in;
108 ++out;
109 }
110
111 // remove the excess characters
112 argstr.erase (out, end);
113}
114
115
116// split up the cgi arguments
117void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
118 cgiargsclass &args) {
119 args.clear();
120
121 text_t::const_iterator here = argstr.begin();
122 text_t::const_iterator end = argstr.end();
123 text_t key, value;
124
125 text_tset multivalue;
126
127
128 // extract out the key=value pairs
129 while (here != end) {
130 // get the next key and value pair
131 here = getdelimitstr (here, end, '=', key);
132 here = getdelimitstr (here, end, '&', value);
133
134 // convert %xx and + to their appropriate equivalents
135 decode_cgi_arg (value);
136
137 value.setencoding(1); // other encoding
138 // store this key=value pair
139 if (!key.empty()) {
140
141 // if arg occurs multiple times (as is the case with multiple
142 // checkboxes using the same name) we'll create a comma separated
143 // list of all the values (this uses a hack that encodes naturally
144 // occurring commas as %2C - values will therefore need to be decoded
145 // again before use) - it should use an array instead
146 const cgiarginfo *info = argsinfo.getarginfo (key);
147
148 if (info==NULL) {
149 if (args.getarg(key)==NULL) {
150 args.setarg (key, encode_commas(value), cgiarg_t::cgi_arg);
151 }
152 else {
153 text_t newvalue = args[key];
154
155 newvalue += "," + encode_commas(value);
156 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
157 }
158 }
159 else {
160 if (info->multiplevalue) {
161
162 text_t newvalue = args[key];
163 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
164 newvalue += encode_commas(value);
165 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
166
167 } else {
168 args.setarg (key, value, cgiarg_t::cgi_arg);
169 }
170 }
171 }
172 }
173}
174
175text_t encode_commas (const text_t &intext) {
176
177 text_t outtext;
178
179 text_t::const_iterator here = intext.begin ();
180 text_t::const_iterator end = intext.end ();
181
182 while (here != end) {
183 if (*here == ',') outtext += "%2C";
184 else outtext.push_back (*here);
185 ++here;
186 }
187 return outtext;
188}
189
190text_t decode_commas (const text_t &intext) {
191
192 text_t outtext;
193
194 text_t::const_iterator here = intext.begin ();
195 text_t::const_iterator end = intext.end ();
196
197 while (here != end) {
198 if ((here+2<end) && *here == '%' && *(here+1) == '2' &&
199 (*(here+2) == 'C' || *(here+2) == 'c')) {
200 here += 2;
201 outtext.push_back(',');
202
203 }else outtext.push_back (*here);
204 ++here;
205 }
206 return outtext;
207}
208
209text_t minus_safe (const text_t &intext) {
210
211 text_t outtext;
212
213 text_t::const_iterator here = intext.begin ();
214 text_t::const_iterator end = intext.end ();
215
216 while (here != end) {
217 if (*here == '-') outtext += "Zz-";
218 else outtext.push_back (*here);
219 ++here;
220 }
221 outtext = cgi_safe (outtext);
222 return outtext;
223}
224
225text_t cgi_safe (const text_t &intext) {
226 text_t outtext;
227
228 text_t::const_iterator here = intext.begin ();
229 text_t::const_iterator end = intext.end ();
230 unsigned short c;
231 text_t ttmp;
232
233 while (here != end) {
234 c = *here;
235 if (((c >= 'a') && (c <= 'z')) ||
236 ((c >= 'A') && (c <= 'Z')) ||
237 ((c >= '0') && (c <= '9')) ||
238 (c == '+') || (c == '%') || (c == '-')) {
239 // alphanumeric character
240 outtext.push_back(c);
241 } else if (c == ' ') {
242 // space
243 outtext.push_back('+');
244 } else if (c > 255) { // unicode character
245 unsigned char buf[3]; // up to 3 bytes
246 buf[0]='\0';buf[1]='\0';buf[2]='\0';
247 output_utf8_char(c,buf, buf+2);
248 outtext.push_back('%');
249 c2hex(buf[0], ttmp);
250 outtext += ttmp;
251 outtext.push_back('%');
252 c2hex(buf[1], ttmp);
253 outtext += ttmp;
254 if (buf[2]) {
255 outtext.push_back('%');
256 c2hex(buf[2], ttmp);
257 outtext += ttmp;
258 }
259 } else {
260 // everything else
261 outtext.push_back('%');
262 c2hex(c, ttmp);
263 outtext += ttmp;
264 }
265
266 ++here;
267 }
268
269 return outtext;
270}
271
272
273
274
275static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
276 text_t::const_iterator last,
277 text_t &argname) {
278 first = getdelimitstr (first, last, '-', argname);
279 return first;
280}
281
282
283// check_save_conf_str checks the configuration string for
284// the saved args and makes sure it does not conflict with
285// the information about the arguments. If an error is encountered
286// it will return false and the program should not produce any
287// output.
288bool check_save_conf_str (const text_t &saveconf,
289 const cgiargsinfoclass &argsinfo,
290 ostream &logout) {
291 outconvertclass text_t2ascii;
292
293 text_tset argsset;
294 text_t::const_iterator saveconfhere = saveconf.begin ();
295 text_t::const_iterator saveconfend = saveconf.end ();
296 text_t argname;
297 const cgiarginfo *info;
298
299 // first check to make sure all saved arguments can be saved
300
301 while (saveconfhere != saveconfend) {
302 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
303
304 if (!argname.empty()) {
305 // save the argument name for later
306 argsset.insert (argname);
307
308 // check the argument
309 info = argsinfo.getarginfo (argname);
310 if (info == NULL) {
311 logout << text_t2ascii << "Error: the cgi argument \"" << argname
312 << "\" is used in the configuration string for the\n"
313 << "saved arguments but does not exist as a valid argument.\n\n";
314 return false;
315 }
316 if (info->savedarginfo == cgiarginfo::mustnot) {
317 logout << text_t2ascii << "Error: the cgi argument \"" << argname
318 << "\" is used in the configuration string for the\n"
319 << "saved arguments but has been specified as an argument whose\n"
320 << "state must not be saved.\n\n";
321 return false;
322 }
323 }
324 }
325
326
327 // next check that all saved arguments that should be saved
328 // are saved
329 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
330 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
331
332 while (argsinfohere != argsinfoend) {
333 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
334 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
335 logout << text_t2ascii << "Error: the cgi argument \""
336 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
337 << "be save but was not listed in the saved arguments.\n\n";
338 return false;
339 }
340
341 ++argsinfohere;
342 }
343
344 return true; // made it, no clashes
345}
346
347
348// create_save_conf_str will create a configuration string
349// based on the information in argsinfo. This method of configuration
350// is not recomended as small changes can produce large changes in
351// the resulting configuration string (for instance a totally different
352// ordering). Only arguments which "must" be saved are included in
353// the resulting string.
354text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
355 ostream &/*logout*/) {
356 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
357 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
358 text_t saveconf;
359 bool first = true;
360
361 while (argsinfohere != argsinfoend) {
362 // save this argument if it must be saved
363 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
364 if (!first) saveconf.push_back ('-');
365 else first = false;
366 saveconf += (*argsinfohere).second.shortname;
367 }
368
369 ++argsinfohere;
370 }
371
372 return saveconf;
373}
374
375
376// expand_save_args will expand the saved arguments based
377// on saveconf placing the results in args if they are not
378// already defined. If it encounters an error it will return false
379// and output more information to logout.
380bool expand_save_args (const cgiargsinfoclass &argsinfo,
381 const text_t &saveconf,
382 cgiargsclass &args,
383 ostream &logout) {
384 outconvertclass text_t2ascii;
385
386 text_t *arg_e = args.getarg("e");
387 if (arg_e == NULL) return true; // no compressed arguments
388 if (arg_e->empty()) return true; // no compressed arguments
389
390 text_t argname, argvalue;
391 const cgiarginfo *argnameinfo;
392
393 text_t::const_iterator saveconfhere = saveconf.begin();
394 text_t::const_iterator saveconfend = saveconf.end();
395
396 text_t::iterator arg_ebegin = arg_e->begin();
397 text_t::iterator arg_eend = arg_e->end();
398 text_t::iterator arg_ehere = arg_ebegin;
399 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
400 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
401
402 if (!argname.empty()) {
403 // found another entry
404 argnameinfo = argsinfo.getarginfo (argname);
405
406 if (argnameinfo == NULL) {
407 // no information about the argument could be found
408 // we can't keep going because we don't know whether
409 // this argument is a single or multiple character value
410 logout << text_t2ascii << "Error: the cgi argument \"" << argname
411 << "\" was specified as being a compressed argument\n"
412 << "but no information about it could be found within the "
413 << "cgiargsinfoclass.\n";
414 return false;
415
416 } else {
417
418 // found the argument information
419 if (argnameinfo->multiplechar) {
420 text_t::const_iterator sav = arg_ehere;
421 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
422 if (distance(arg_ebegin, arg_ehere) > 2) {
423 // replace any '-' chars escaped with 'Zz'
424 bool first = true;
425 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
426 if (first) argvalue.clear();
427 arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1;
428 while (sav != (arg_ehere-1)) {
429 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
430 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
431 ++sav;
432 }
433 first = false;
434 }
435 }
436 argvalue.setencoding(1); // other encoding
437 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
438 } else {
439 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
440 ++arg_ehere;
441 }
442 }
443 }
444 }
445
446 return true;
447}
448
449
450// adds the default values for those arguments which have not
451// been specified
452void add_default_args (const cgiargsinfoclass &argsinfo,
453 cgiargsclass &args,
454 ostream &/*logout*/) {
455 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
456 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
457
458 while (argsinfohere != argsinfoend) {
459 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
460 args.setdefaultarg ((*argsinfohere).second.shortname,
461 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
462 }
463 ++argsinfohere;
464 }
465}
466
467
468// compress_save_args will compress the arguments and return
469// them in compressed_args. If an error was encountered
470// compressed_args will be set to to "", an error will be
471// written to logout, and the function will return false.
472bool compress_save_args (const cgiargsinfoclass &argsinfo,
473 const text_t &saveconf,
474 cgiargsclass &args,
475 text_t &compressed_args,
476 outconvertclass &outconvert,
477 ostream &logout) {
478 outconvertclass text_t2ascii;
479
480 compressed_args.clear();
481
482 text_t argname, argvalue;
483 const cgiarginfo *argnameinfo;
484
485 text_t::const_iterator saveconfhere = saveconf.begin();
486 text_t::const_iterator saveconfend = saveconf.end();
487
488 while (saveconfhere != saveconfend) {
489 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
490
491 if (!argname.empty()) {
492 // found another entry
493 argnameinfo = argsinfo.getarginfo (argname);
494
495 if (argnameinfo == NULL) {
496 // no information about the argument could be found
497 // we can't keep going because we don't know whether
498 // this argument is a single or multiple character value
499 logout << text_t2ascii << "Error: the cgi argument \"" << argname
500 << "\" was specified as being a compressed argument\n"
501 << "but no information about it could be found within the "
502 << "cgiargsinfoclass.\n";
503 compressed_args.clear();
504 return false;
505
506 } else {
507 // found the argument information
508 if (argnameinfo->multiplechar) {
509 // multiple character argument -- sort out any '-' chars
510 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
511 compressed_args += minus_safe (args[argname]);
512 else
513 compressed_args += minus_safe (outconvert.convert(args[argname]));
514
515 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
516
517 } else {
518 // single character argument
519 if (args[argname].size() == 0) {
520 logout << text_t2ascii << "Error: the cgi argument \"" << argname
521 << "\" was specified as being a compressed argument which\n"
522 << "should have a one character value but it was empty.\n\n";
523 compressed_args.clear ();
524 return false;
525
526 } else if (args[argname].size() > 1) {
527 logout << text_t2ascii << "Error: the cgi argument \"" << argname
528 << "\" was specified as being a compressed argument which\n"
529 << "should have a one character value but it had multiple characters.\n\n";
530 compressed_args.clear ();
531 return false;
532 }
533
534 // everything is ok
535 compressed_args += args[argname];
536 }
537 }
538 }
539 }
540
541 return true;
542}
543
544
545// args_tounicode converts any arguments which are not in unicode
546// to unicode using inconvert
547void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
548 cgiargsclass::iterator here = args.begin();
549 cgiargsclass::iterator end = args.end();
550
551 while (here != end) {
552 if ((*here).second.value.getencoding() > 0) {
553 (*here).second.value = inconvert.convert((*here).second.value);
554 }
555
556 ++here;
557 }
558}
559
560// fcgienv will be loaded with environment name-value pairs
561// if using fastcgi (had to do this as getenv doesn't work
562// with our implementation of fastcgi). if fcgienv is empty
563// we'll simply use getenv
564text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
565 if (fcgienv.empty()) {
566 char *n = name.getcstr();
567 char *v = getenv(n);
568 delete []n;
569 if (v != NULL) return v;
570 return g_EmptyText;
571
572 } else return fcgienv[name];
573}
Note: See TracBrowser for help on using the repository browser.