source: trunk/gsdl/src/recpt/cgiutils.cpp@ 9674

Last change on this file since 9674 was 9674, checked in by jrm21, 17 years ago

rename utf-16 to utf-16be, otherwise browsers might get confused

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 16.3 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "gsdlunicode.h"
28#include "unitool.h" // in mg, for output_utf8_char
29
30static unsigned short hexdigit (unsigned short c) {
31 if (c >= '0' && c <= '9') return (c-'0');
32 if (c >= 'a' && c <= 'f') return (c-'a'+10);
33 if (c >= 'A' && c <= 'F') return (c-'A'+10);
34 return c;
35}
36
37
38static void c2hex (unsigned short c, text_t &t) {
39 t.clear();
40
41 if (c >= 256) {
42 t = "20"; // ' '
43 return;
44 }
45
46 unsigned short o1, o2;
47
48 o1 = (c/16) % 16;
49 o2 = c % 16;
50 if (o1 >= 10) o1 += 'a' - 10;
51 else o1 += '0';
52 if (o2 >= 10) o2 += 'a' - 10;
53 else o2 += '0';
54
55 t.push_back(o1);
56 t.push_back(o2);
57}
58
59// convert %xx and + to their appropriate equivalents
60// IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension!
61void decode_cgi_arg (text_t &argstr) {
62 text_t::iterator in = argstr.begin();
63 text_t::iterator out = in;
64 text_t::iterator end = argstr.end();
65
66 while (in != end) {
67 if (*in == '+') *out = ' ';
68
69 else if (*in == '%') {
70 unsigned short c = '%';
71 ++in;
72 if (in != end) { // this is an encoding...
73 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
74 // this assumes a short int is at least 16 bits...
75 ++in;
76 if (in != end)
77 c=hexdigit(*in++) << 12;
78 if (in != end)
79 c+=hexdigit(*in++) << 8;
80 if (in != end)
81 c+=hexdigit(*in++) << 4;
82 if (in != end)
83 c+=hexdigit(*in);
84 /* BAD!! The following assumes the interface is using utf-8. But
85 at this point we don't know what encoding we are using, unless
86 we can parse it out of the string we are currently decoding... */
87 text_t uni=" ";
88 uni[0]=c;
89 text_t utf8=to_utf8(uni);
90 int last_byte=utf8.size()-1;
91 for (int i=0;i<last_byte;++i)
92 *out++ = utf8[i];
93 c=utf8[last_byte];
94 } else { // convert %HH to hex value
95 c = hexdigit (*in);
96 ++in;
97 if (in != end && c < 16) { // sanity check on the previous character
98 c = c*16 + hexdigit (*in);
99 }
100 }
101 }
102 *out = c;
103 } else *out = *in;
104
105 if (in != end) ++in;
106 ++out;
107 }
108
109 // remove the excess characters
110 argstr.erase (out, end);
111}
112
113
114// split up the cgi arguments
115void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
116 cgiargsclass &args) {
117 args.clear();
118
119 text_t::const_iterator here = argstr.begin();
120 text_t::const_iterator end = argstr.end();
121 text_t key, value;
122
123 // extract out the key=value pairs
124 while (here != end) {
125 // get the next key and value pair
126 here = getdelimitstr (here, end, '=', key);
127 here = getdelimitstr (here, end, '&', value);
128
129 // convert %xx and + to their appropriate equivalents
130 decode_cgi_arg (value);
131
132 value.setencoding(1); // other encoding
133 // store this key=value pair
134 if (!key.empty()) {
135
136 // if arg occurs multiple times (as is the case with multiple
137 // checkboxes using the same name) we'll create a comma separated
138 // list of all the values (this uses a hack that encodes naturally
139 // occurring commas as %2C - values will therefore need to be decoded
140 // again before use) - it should use an array instead
141 const cgiarginfo *info = argsinfo.getarginfo (key);
142 if (info != NULL && info->multiplevalue) {
143 text_t newvalue = args[key];
144 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
145 newvalue += encode_commas(value);
146 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
147
148 } else {
149 args.setarg (key, value, cgiarg_t::cgi_arg);
150 }
151 }
152 }
153}
154
155text_t encode_commas (const text_t &intext) {
156
157 text_t outtext;
158
159 text_t::const_iterator here = intext.begin ();
160 text_t::const_iterator end = intext.end ();
161
162 while (here != end) {
163 if (*here == ',') outtext += "%2C";
164 else outtext.push_back (*here);
165 ++here;
166 }
167 return outtext;
168}
169
170text_t decode_commas (const text_t &intext) {
171
172 text_t outtext;
173
174 text_t::const_iterator here = intext.begin ();
175 text_t::const_iterator end = intext.end ();
176
177 while (here != end) {
178 if ((here+2<end) && *here == '%' && *(here+1) == '2' &&
179 (*(here+2) == 'C' || *(here+2) == 'c')) {
180 here += 2;
181 outtext.push_back(',');
182
183 }else outtext.push_back (*here);
184 ++here;
185 }
186 return outtext;
187}
188
189text_t minus_safe (const text_t &intext) {
190
191 text_t outtext;
192
193 text_t::const_iterator here = intext.begin ();
194 text_t::const_iterator end = intext.end ();
195
196 while (here != end) {
197 if (*here == '-') outtext += "Zz-";
198 else outtext.push_back (*here);
199 ++here;
200 }
201 outtext = cgi_safe (outtext);
202 return outtext;
203}
204
205text_t cgi_safe (const text_t &intext) {
206 text_t outtext;
207
208 text_t::const_iterator here = intext.begin ();
209 text_t::const_iterator end = intext.end ();
210 unsigned short c;
211 text_t ttmp;
212
213 while (here != end) {
214 c = *here;
215 if (((c >= 'a') && (c <= 'z')) ||
216 ((c >= 'A') && (c <= 'Z')) ||
217 ((c >= '0') && (c <= '9')) ||
218 (c == '+') || (c == '%') || (c == '-')) {
219 // alphanumeric character
220 outtext.push_back(c);
221 } else if (c == ' ') {
222 // space
223 outtext.push_back('+');
224 } else if (c > 255) { // unicode character
225 unsigned char buf[3]; // up to 3 bytes
226 buf[0]='\0';buf[1]='\0';buf[2]='\0';
227 output_utf8_char(c,buf, buf+2);
228 outtext.push_back('%');
229 c2hex(buf[0], ttmp);
230 outtext += ttmp;
231 outtext.push_back('%');
232 c2hex(buf[1], ttmp);
233 outtext += ttmp;
234 if (buf[2]) {
235 outtext.push_back('%');
236 c2hex(buf[2], ttmp);
237 outtext += ttmp;
238 }
239 } else {
240 // everything else
241 outtext.push_back('%');
242 c2hex(c, ttmp);
243 outtext += ttmp;
244 }
245
246 ++here;
247 }
248
249 return outtext;
250}
251
252
253
254
255static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
256 text_t::const_iterator last,
257 text_t &argname) {
258 first = getdelimitstr (first, last, '-', argname);
259 return first;
260}
261
262
263// check_save_conf_str checks the configuration string for
264// the saved args and makes sure it does not conflict with
265// the information about the arguments. If an error is encountered
266// it will return false and the program should not produce any
267// output.
268bool check_save_conf_str (const text_t &saveconf,
269 const cgiargsinfoclass &argsinfo,
270 ostream &logout) {
271 outconvertclass text_t2ascii;
272
273 text_tset argsset;
274 text_t::const_iterator saveconfhere = saveconf.begin ();
275 text_t::const_iterator saveconfend = saveconf.end ();
276 text_t argname;
277 const cgiarginfo *info;
278
279 // first check to make sure all saved arguments can be saved
280
281 while (saveconfhere != saveconfend) {
282 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
283
284 if (!argname.empty()) {
285 // save the argument name for later
286 argsset.insert (argname);
287
288 // check the argument
289 info = argsinfo.getarginfo (argname);
290 if (info == NULL) {
291 logout << text_t2ascii << "Error: the cgi argument \"" << argname
292 << "\" is used in the configuration string for the\n"
293 << "saved arguments but does not exist as a valid argument.\n\n";
294 return false;
295 }
296 if (info->savedarginfo == cgiarginfo::mustnot) {
297 logout << text_t2ascii << "Error: the cgi argument \"" << argname
298 << "\" is used in the configuration string for the\n"
299 << "saved arguments but has been specified as an argument whose\n"
300 << "state must not be saved.\n\n";
301 return false;
302 }
303 }
304 }
305
306
307 // next check that all saved arguments that should be saved
308 // are saved
309 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
310 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
311
312 while (argsinfohere != argsinfoend) {
313 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
314 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
315 logout << text_t2ascii << "Error: the cgi argument \""
316 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
317 << "be save but was not listed in the saved arguments.\n\n";
318 return false;
319 }
320
321 ++argsinfohere;
322 }
323
324 return true; // made it, no clashes
325}
326
327
328// create_save_conf_str will create a configuration string
329// based on the information in argsinfo. This method of configuration
330// is not recomended as small changes can produce large changes in
331// the resulting configuration string (for instance a totally different
332// ordering). Only arguments which "must" be saved are included in
333// the resulting string.
334text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
335 ostream &/*logout*/) {
336 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
337 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
338 text_t saveconf;
339 bool first = true;
340
341 while (argsinfohere != argsinfoend) {
342 // save this argument if it must be saved
343 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
344 if (!first) saveconf.push_back ('-');
345 else first = false;
346 saveconf += (*argsinfohere).second.shortname;
347 }
348
349 ++argsinfohere;
350 }
351
352 return saveconf;
353}
354
355
356// expand_save_args will expand the saved arguments based
357// on saveconf placing the results in args if they are not
358// already defined. If it encounters an error it will return false
359// and output more information to logout.
360bool expand_save_args (const cgiargsinfoclass &argsinfo,
361 const text_t &saveconf,
362 cgiargsclass &args,
363 ostream &logout) {
364 outconvertclass text_t2ascii;
365
366 text_t *arg_e = args.getarg("e");
367 if (arg_e == NULL) return true; // no compressed arguments
368 if (arg_e->empty()) return true; // no compressed arguments
369
370 text_t argname, argvalue;
371 const cgiarginfo *argnameinfo;
372
373 text_t::const_iterator saveconfhere = saveconf.begin();
374 text_t::const_iterator saveconfend = saveconf.end();
375
376 text_t::iterator arg_ehere = arg_e->begin();
377 text_t::iterator arg_eend = arg_e->end();
378 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
379 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
380
381 if (!argname.empty()) {
382 // found another entry
383 argnameinfo = argsinfo.getarginfo (argname);
384
385 if (argnameinfo == NULL) {
386 // no information about the argument could be found
387 // we can't keep going because we don't know whether
388 // this argument is a single or multiple character value
389 logout << text_t2ascii << "Error: the cgi argument \"" << argname
390 << "\" was specified as being a compressed argument\n"
391 << "but no information about it could be found within the "
392 << "cgiargsinfoclass.\n";
393 return false;
394
395 } else {
396
397 // found the argument information
398 if (argnameinfo->multiplechar) {
399 text_t::const_iterator sav = arg_ehere;
400 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
401 // replace any '-' chars escaped with 'Zz'
402 bool first = true;
403 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
404 if (first) argvalue.clear();
405 arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1;
406 while (sav != (arg_ehere-1)) {
407 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
408 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
409 ++sav;
410 }
411 first = false;
412 }
413
414 argvalue.setencoding(1); // other encoding
415 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
416 } else {
417 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
418 ++arg_ehere;
419 }
420 }
421 }
422 }
423
424 return true;
425}
426
427
428// adds the default values for those arguments which have not
429// been specified
430void add_default_args (const cgiargsinfoclass &argsinfo,
431 cgiargsclass &args,
432 ostream &/*logout*/) {
433 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
434 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
435
436 while (argsinfohere != argsinfoend) {
437 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
438 args.setdefaultarg ((*argsinfohere).second.shortname,
439 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
440 }
441 ++argsinfohere;
442 }
443}
444
445
446// compress_save_args will compress the arguments and return
447// them in compressed_args. If an error was encountered
448// compressed_args will be set to to "", an error will be
449// written to logout, and the function will return false.
450bool compress_save_args (const cgiargsinfoclass &argsinfo,
451 const text_t &saveconf,
452 cgiargsclass &args,
453 text_t &compressed_args,
454 outconvertclass &outconvert,
455 ostream &logout) {
456 outconvertclass text_t2ascii;
457
458 compressed_args.clear();
459
460 text_t argname, argvalue;
461 const cgiarginfo *argnameinfo;
462
463 text_t::const_iterator saveconfhere = saveconf.begin();
464 text_t::const_iterator saveconfend = saveconf.end();
465
466 while (saveconfhere != saveconfend) {
467 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
468
469 if (!argname.empty()) {
470 // found another entry
471 argnameinfo = argsinfo.getarginfo (argname);
472
473 if (argnameinfo == NULL) {
474 // no information about the argument could be found
475 // we can't keep going because we don't know whether
476 // this argument is a single or multiple character value
477 logout << text_t2ascii << "Error: the cgi argument \"" << argname
478 << "\" was specified as being a compressed argument\n"
479 << "but no information about it could be found within the "
480 << "cgiargsinfoclass.\n";
481 compressed_args.clear();
482 return false;
483
484 } else {
485 // found the argument information
486 if (argnameinfo->multiplechar) {
487 // multiple character argument -- sort out any '-' chars
488 if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
489 compressed_args += minus_safe (args[argname]);
490 else
491 compressed_args += minus_safe (outconvert.convert(args[argname]));
492
493 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
494
495 } else {
496 // single character argument
497 if (args[argname].size() == 0) {
498 logout << text_t2ascii << "Error: the cgi argument \"" << argname
499 << "\" was specified as being a compressed argument which\n"
500 << "should have a one character value but it was empty.\n\n";
501 compressed_args.clear ();
502 return false;
503
504 } else if (args[argname].size() > 1) {
505 logout << text_t2ascii << "Error: the cgi argument \"" << argname
506 << "\" was specified as being a compressed argument which\n"
507 << "should have a one character value but it had multiple characters.\n\n";
508 compressed_args.clear ();
509 return false;
510 }
511
512 // everything is ok
513 compressed_args += args[argname];
514 }
515 }
516 }
517 }
518
519 return true;
520}
521
522
523// args_tounicode converts any arguments which are not in unicode
524// to unicode using inconvert
525void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
526 cgiargsclass::iterator here = args.begin();
527 cgiargsclass::iterator end = args.end();
528
529 while (here != end) {
530 if ((*here).second.value.getencoding() > 0) {
531 (*here).second.value = inconvert.convert((*here).second.value);
532 }
533
534 ++here;
535 }
536}
537
538// fcgienv will be loaded with environment name-value pairs
539// if using fastcgi (had to do this as getenv doesn't work
540// with our implementation of fastcgi). if fcgienv is empty
541// we'll simply use getenv
542text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
543 if (fcgienv.empty()) {
544 char *n = name.getcstr();
545 char *v = getenv(n);
546 delete []n;
547 if (v != NULL) return v;
548 return g_EmptyText;
549
550 } else return fcgienv[name];
551}
Note: See TracBrowser for help on using the repository browser.