source: trunk/gsdl/src/recpt/cgiutils.cpp@ 3151

Last change on this file since 3151 was 3151, checked in by jrm21, 22 years ago

MSIE >= 6.0 encodes high bytes in URL as %uHHHH (unicode) rather than %HH%HH (utf8), apparently regardless of what character encoding is in use. We now convert that as well, although we assume we always convert it to utf8. This should really use whatever encoding the user interface is using...

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1/**********************************************************************
2 *
3 * cgiutils.cpp -- general cgi utilities
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "cgiutils.h"
27#include "gsdlunicode.h"
28
29static unsigned short hexdigit (unsigned short c) {
30 if (c >= '0' && c <= '9') return (c-'0');
31 if (c >= 'a' && c <= 'f') return (c-'a'+10);
32 if (c >= 'A' && c <= 'F') return (c-'A'+10);
33 return c;
34}
35
36
37static void c2hex (unsigned short c, text_t &t) {
38 t.clear();
39
40 if (c >= 256) {
41 t = "20"; // ' '
42 return;
43 }
44
45 unsigned short o1, o2;
46
47 o1 = (c/16) % 16;
48 o2 = c % 16;
49 if (o1 >= 10) o1 += 'a' - 10;
50 else o1 += '0';
51 if (o2 >= 10) o2 += 'a' - 10;
52 else o2 += '0';
53
54 t.push_back(o1);
55 t.push_back(o2);
56}
57
58// convert %xx and + to their appropriate equivalents
59// IE 6.0 and later use "%u" followed by 4 hex digits...
60void decode_cgi_arg (text_t &argstr) {
61 text_t::iterator in = argstr.begin();
62 text_t::iterator out = in;
63 text_t::iterator end = argstr.end();
64
65 while (in != end) {
66 if (*in == '+') *out = ' ';
67
68 else if (*in == '%') {
69 unsigned short c = '%';
70 ++in;
71 if (in != end) { // this is an encoding...
72 if (*in == 'u') { // convert %uHHHH to unicode then current encoding
73 // this assumes a short int is at least 16 bits...
74 ++in;
75 if (in != end)
76 c=hexdigit(*in++) << 12;
77 if (in != end)
78 c+=hexdigit(*in++) << 8;
79 if (in != end)
80 c+=hexdigit(*in++) << 4;
81 if (in != end)
82 c+=hexdigit(*in);
83 /* BAD!! The following assumes the interface is using utf-8. But
84 at this point we don't know what encoding we are using, unless
85 we can parse it out of the string we are currently decoding... */
86 text_t uni=" ";
87 uni[0]=c;
88 text_t utf8=to_utf8(uni);
89 int last_byte=utf8.size()-1;
90 for (int i=0;i<last_byte;i++)
91 *out++ = utf8[i];
92 c=utf8[last_byte];
93 } else { // convert %HH to hex value
94 c = hexdigit (*in);
95 ++in;
96 if (in != end && c < 16) { // sanity check on the previous character
97 c = c*16 + hexdigit (*in);
98 }
99 }
100 }
101 *out = c;
102 } else *out = *in;
103
104 if (in != end) in++;
105 out++;
106 }
107
108 // remove the excess characters
109 argstr.erase (out, end);
110}
111
112
113// split up the cgi arguments
114void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr,
115 cgiargsclass &args) {
116 args.clear();
117
118 text_t::iterator here = argstr.begin();
119 text_t::iterator end = argstr.end();
120 text_t key, value;
121
122 // extract out the key=value pairs
123 while (here != end) {
124 // get the next key and value pair
125 here = getdelimitstr (here, end, '=', key);
126 here = getdelimitstr (here, end, '&', value);
127
128 // convert %xx and + to their appropriate equivalents
129 decode_cgi_arg (value);
130
131 value.setencoding(1); // other encoding
132 // store this key=value pair
133 if (!key.empty()) {
134
135 // if arg occurs multiple times (as is the case with multiple
136 // checkboxes using the same name) we'll create a comma separated
137 // list of all the values (this uses a hack that encodes naturally
138 // occurring commas as %2C - values will therefore need to be decoded
139 // again before use) - it should use an array instead
140 const cgiarginfo *info = argsinfo.getarginfo (key);
141 if (info != NULL && info->multiplevalue) {
142 text_t newvalue = args[key];
143 if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ",";
144 newvalue += encode_commas(value);
145 args.setarg (key, newvalue, cgiarg_t::cgi_arg);
146
147 } else {
148 args.setarg (key, value, cgiarg_t::cgi_arg);
149 }
150 }
151 }
152}
153
154text_t encode_commas (const text_t &intext) {
155
156 text_t outtext;
157
158 text_t::const_iterator here = intext.begin ();
159 text_t::const_iterator end = intext.end ();
160
161 while (here != end) {
162 if (*here == ',') outtext += "%2C";
163 else outtext.push_back (*here);
164 here ++;
165 }
166 return outtext;
167}
168
169text_t decode_commas (const text_t &intext) {
170
171 text_t outtext;
172
173 text_t::const_iterator here = intext.begin ();
174 text_t::const_iterator end = intext.end ();
175
176 while (here != end) {
177 if ((here+2<end) && *here == '%' && *(here+1) == '2' &&
178 (*(here+2) == 'C' || *(here+2) == 'c')) {
179 here += 2;
180 outtext.push_back(',');
181
182 }else outtext.push_back (*here);
183 here ++;
184 }
185 return outtext;
186}
187
188text_t minus_safe (const text_t &intext) {
189
190 text_t outtext;
191
192 text_t::const_iterator here = intext.begin ();
193 text_t::const_iterator end = intext.end ();
194
195 while (here != end) {
196 if (*here == '-') outtext += "Zz-";
197 else outtext.push_back (*here);
198 here ++;
199 }
200 outtext = cgi_safe (outtext);
201 return outtext;
202}
203
204text_t cgi_safe (const text_t &intext) {
205 text_t outtext;
206
207 text_t::const_iterator here = intext.begin ();
208 text_t::const_iterator end = intext.end ();
209 unsigned short c;
210 text_t ttmp;
211
212 while (here != end) {
213 c = *here;
214 if (((c >= 'a') && (c <= 'z')) ||
215 ((c >= 'A') && (c <= 'Z')) ||
216 ((c >= '0') && (c <= '9')) ||
217 (c == '+') || (c == '%') || (c == '-')) {
218 // alphanumeric character
219 outtext.push_back(c);
220 } else if (c == ' ') {
221 // space
222 outtext.push_back('+');
223 } else {
224 // everything else
225 outtext.push_back('%');
226 c2hex(c, ttmp);
227 outtext += ttmp;
228 }
229
230 here++;
231 }
232
233 return outtext;
234}
235
236
237
238
239static text_t::const_iterator get_next_save_arg (text_t::const_iterator first,
240 text_t::const_iterator last,
241 text_t &argname) {
242 first = getdelimitstr (first, last, '-', argname);
243 return first;
244}
245
246
247// check_save_conf_str checks the configuration string for
248// the saved args and makes sure it does not conflict with
249// the information about the arguments. If an error is encountered
250// it will return false and the program should not produce any
251// output.
252bool check_save_conf_str (const text_t &saveconf,
253 const cgiargsinfoclass &argsinfo,
254 ostream &logout) {
255 outconvertclass text_t2ascii;
256
257 text_tset argsset;
258 text_t::const_iterator saveconfhere = saveconf.begin ();
259 text_t::const_iterator saveconfend = saveconf.end ();
260 text_t argname;
261 const cgiarginfo *info;
262
263 // first check to make sure all saved arguments can be saved
264
265 while (saveconfhere != saveconfend) {
266 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
267
268 if (!argname.empty()) {
269 // save the argument name for later
270 argsset.insert (argname);
271
272 // check the argument
273 info = argsinfo.getarginfo (argname);
274 if (info == NULL) {
275 logout << text_t2ascii << "Error: the cgi argument \"" << argname
276 << "\" is used in the configuration string for the\n"
277 << "saved arguments but does not exist as a valid argument.\n\n";
278 return false;
279 }
280 if (info->savedarginfo == cgiarginfo::mustnot) {
281 logout << text_t2ascii << "Error: the cgi argument \"" << argname
282 << "\" is used in the configuration string for the\n"
283 << "saved arguments but has been specified as an argument whose\n"
284 << "state must not be saved.\n\n";
285 return false;
286 }
287 }
288 }
289
290
291 // next check that all saved arguments that should be saved
292 // are saved
293 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
294 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
295
296 while (argsinfohere != argsinfoend) {
297 if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) &&
298 (argsset.find((*argsinfohere).second.shortname) == argsset.end())) {
299 logout << text_t2ascii << "Error: the cgi argument \""
300 << (*argsinfohere).second.shortname << "\" was specified as needing to\n"
301 << "be save but was not listed in the saved arguments.\n\n";
302 return false;
303 }
304
305 argsinfohere++;
306 }
307
308 return true; // made it, no clashes
309}
310
311
312// create_save_conf_str will create a configuration string
313// based on the information in argsinfo. This method of configuration
314// is not recomended as small changes can produce large changes in
315// the resulting configuration string (for instance a totally different
316// ordering). Only arguments which "must" be saved are included in
317// the resulting string.
318text_t create_save_conf_str (const cgiargsinfoclass &argsinfo,
319 ostream &/*logout*/) {
320 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
321 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
322 text_t saveconf;
323 bool first = true;
324
325 while (argsinfohere != argsinfoend) {
326 // save this argument if it must be saved
327 if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) {
328 if (!first) saveconf.push_back ('-');
329 else first = false;
330 saveconf += (*argsinfohere).second.shortname;
331 }
332
333 argsinfohere++;
334 }
335
336 return saveconf;
337}
338
339
340// expand_save_args will expand the saved arguments based
341// on saveconf placing the results in args if they are not
342// already defined. If it encounters an error it will return false
343// and output more information to logout.
344bool expand_save_args (const cgiargsinfoclass &argsinfo,
345 const text_t &saveconf,
346 cgiargsclass &args,
347 ostream &logout) {
348 outconvertclass text_t2ascii;
349
350 text_t *arg_e = args.getarg("e");
351 if (arg_e == NULL) return true; // no compressed arguments
352 if (arg_e->empty()) return true; // no compressed arguments
353
354 text_t argname, argvalue;
355 const cgiarginfo *argnameinfo;
356
357 text_t::const_iterator saveconfhere = saveconf.begin();
358 text_t::const_iterator saveconfend = saveconf.end();
359
360 text_t::iterator arg_ehere = arg_e->begin();
361 text_t::iterator arg_eend = arg_e->end();
362 while (saveconfhere != saveconfend && arg_ehere != arg_eend) {
363 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
364
365 if (!argname.empty()) {
366 // found another entry
367 argnameinfo = argsinfo.getarginfo (argname);
368
369 if (argnameinfo == NULL) {
370 // no information about the argument could be found
371 // we can't keep going because we don't know whether
372 // this argument is a single or multiple character value
373 logout << text_t2ascii << "Error: the cgi argument \"" << argname
374 << "\" was specified as being a compressed argument\n"
375 << "but no information about it could be found within the "
376 << "cgiargsinfoclass.\n";
377 return false;
378
379 } else {
380
381 // found the argument information
382 if (argnameinfo->multiplechar) {
383 text_t::const_iterator sav = arg_ehere;
384 arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue);
385 // replace any '-' chars escaped with 'Zz'
386 bool first = true;
387 while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) {
388 if (first) argvalue.clear();
389 arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1;
390 while (sav != (arg_ehere-1)) {
391 if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) &&
392 !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav);
393 sav ++;
394 }
395 first = false;
396 }
397
398 argvalue.setencoding(1); // other encoding
399 if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg);
400 } else {
401 args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg);
402 arg_ehere++;
403 }
404 }
405 }
406 }
407
408 return true;
409}
410
411
412// adds the default values for those arguments which have not
413// been specified
414void add_default_args (const cgiargsinfoclass &argsinfo,
415 cgiargsclass &args,
416 ostream &/*logout*/) {
417 cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin ();
418 cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end ();
419
420 while (argsinfohere != argsinfoend) {
421 if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) {
422 args.setdefaultarg ((*argsinfohere).second.shortname,
423 (*argsinfohere).second.argdefault, cgiarg_t::default_arg);
424 }
425 argsinfohere++;
426 }
427}
428
429
430// compress_save_args will compress the arguments and return
431// them in compressed_args. If an error was encountered
432// compressed_args will be set to to "", an error will be
433// written to logout, and the function will return false.
434bool compress_save_args (const cgiargsinfoclass &argsinfo,
435 const text_t &saveconf,
436 cgiargsclass &args,
437 text_t &compressed_args,
438 outconvertclass &outconvert,
439 ostream &logout) {
440 outconvertclass text_t2ascii;
441
442 compressed_args.clear();
443
444 text_t argname, argvalue;
445 const cgiarginfo *argnameinfo;
446
447 text_t::const_iterator saveconfhere = saveconf.begin();
448 text_t::const_iterator saveconfend = saveconf.end();
449
450 while (saveconfhere != saveconfend) {
451 saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname);
452
453 if (!argname.empty()) {
454 // found another entry
455 argnameinfo = argsinfo.getarginfo (argname);
456
457 if (argnameinfo == NULL) {
458 // no information about the argument could be found
459 // we can't keep going because we don't know whether
460 // this argument is a single or multiple character value
461 logout << text_t2ascii << "Error: the cgi argument \"" << argname
462 << "\" was specified as being a compressed argument\n"
463 << "but no information about it could be found within the "
464 << "cgiargsinfoclass.\n";
465 compressed_args.clear();
466 return false;
467
468 } else {
469 // found the argument information
470 if (argnameinfo->multiplechar) {
471 // multiple character argument -- sort out any '-' chars
472 compressed_args += minus_safe (outconvert.convert(args[argname]));
473 if (saveconfhere != saveconfend) compressed_args.push_back ('-');
474
475 } else {
476 // single character argument
477 if (args[argname].size() == 0) {
478 logout << text_t2ascii << "Error: the cgi argument \"" << argname
479 << "\" was specified as being a compressed argument which\n"
480 << "should have a one character value but it was empty.\n\n";
481 compressed_args.clear ();
482 return false;
483
484 } else if (args[argname].size() > 1) {
485 logout << text_t2ascii << "Error: the cgi argument \"" << argname
486 << "\" was specified as being a compressed argument which\n"
487 << "should have a one character value but it had multiple characters.\n\n";
488 compressed_args.clear ();
489 return false;
490 }
491
492 // everything is ok
493 compressed_args += args[argname];
494 }
495 }
496 }
497 }
498
499 return true;
500}
501
502
503// args_tounicode converts any arguments which are not in unicode
504// to unicode using inconvert
505void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) {
506 cgiargsclass::iterator here = args.begin();
507 cgiargsclass::iterator end = args.end();
508
509 while (here != end) {
510 if ((*here).second.value.getencoding() > 0) {
511 (*here).second.value = inconvert.convert((*here).second.value);
512 }
513
514 here++;
515 }
516}
517
518// fcgienv will be loaded with environment name-value pairs
519// if using fastcgi (had to do this as getenv doesn't work
520// with our implementation of fastcgi). if fcgienv is empty
521// we'll simply use getenv
522text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) {
523 if (fcgienv.empty()) {
524 char *n = name.getcstr();
525 char *v = getenv(n);
526 delete n;
527 if (v != NULL) return v;
528 return "";
529
530 } else return fcgienv[name];
531}
Note: See TracBrowser for help on using the repository browser.