/********************************************************************** * * cgiutils.cpp -- general cgi utilities * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: cgiutils.cpp 873 2000-01-25 22:46:54Z sjboddie $ * *********************************************************************/ /* $Log$ Revision 1.13 2000/01/25 22:46:54 sjboddie changes to get fastcgi working properly Revision 1.12 1999/11/08 20:26:38 sjboddie added multiplevalue option to cgiarginfo Revision 1.11 1999/11/01 22:03:35 sjboddie Added ability to handle multiple arguments with the same name (as is the case with multiple checkboxes using the same name). I'm not sure this is the best way to do it but it does the trick for what I currently need. Revision 1.10 1999/09/21 21:41:17 sjboddie fixed a couple of problems in what I committed last Revision 1.9 1999/09/21 11:30:39 sjboddie added ability to escape out '-' in saved args (currently with a 'Z' ???) Revision 1.8 1999/09/07 04:56:53 sjboddie added GPL notice Revision 1.7 1999/08/25 22:27:13 sjboddie prevented cgi_safe from converting '+' and '-'. It was causing problems with query strings containing spaces. The space was being converted to '+', then %2b, then %xx2b over time when saved in compressed args. I hope this won't cause problems elsewhere... Revision 1.6 1999/07/11 01:05:19 rjmcnab Stored origin of cgiarg with argument. Revision 1.5 1999/06/26 01:08:36 rjmcnab Added encoding and decoding of multibyte compresesd arguments. Revision 1.4 1999/06/08 22:03:43 sjboddie query string is now made cgi safe before being added to compressed args Revision 1.3 1999/02/08 01:28:00 rjmcnab Got the receptionist producing something using the statusaction. Revision 1.2 1999/02/05 10:42:43 rjmcnab Continued working on receptionist Revision 1.1 1999/01/08 08:40:56 rjmcnab Moved from lib directory. Revision 1.1 1999/01/08 03:57:45 rjmcnab Initial revision */ #include "cgiutils.h" static unsigned short hexdigit (unsigned short c) { if (c >= '0' && c <= '9') return (c-'0'); if (c >= 'a' && c <= 'f') return (c-'a'+10); if (c >= 'A' && c <= 'F') return (c-'A'+10); return c; } static void c2hex (unsigned short c, text_t &t) { t.clear(); if (c >= 256) { t = "20"; // ' ' return; } unsigned short o1, o2; o1 = (c/16) % 16; o2 = c % 16; if (o1 >= 10) o1 += 'a' - 10; else o1 += '0'; if (o2 >= 10) o2 += 'a' - 10; else o2 += '0'; t.push_back(o1); t.push_back(o2); } // convert %xx and + to their appropriate equivalents void decode_cgi_arg (text_t &argstr) { text_t::iterator in = argstr.begin(); text_t::iterator out = in; text_t::iterator end = argstr.end(); while (in != end) { if (*in == '+') *out = ' '; else if (*in == '%') { unsigned short c = '%'; in++; if (in != end) { c = hexdigit (*in); in++; } if (in != end && c < 16) { // sanity check on the previous character c = c*16 + hexdigit (*in); } *out = c; } else *out = *in; if (in != end) in++; out++; } // remove the excess characters argstr.erase (out, end); } // split up the cgi arguments void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr, cgiargsclass &args) { args.clear(); text_t::iterator here = argstr.begin(); text_t::iterator end = argstr.end(); text_t key, value; // extract out the key=value pairs while (here != end) { // get the next key and value pair here = getdelimitstr (here, end, '=', key); here = getdelimitstr (here, end, '&', value); // convert %xx and + to their appropriate equivalents decode_cgi_arg (value); value.setencoding(1); // other encoding // store this key=value pair if (!key.empty()) { // if arg occurs multiple times (as is the case with // multiple checkboxes using the same name) we'll // create a comma separated list of all the values // (if multiplevalue) if (!args[key].empty()) { const cgiarginfo *info = argsinfo.getarginfo (key); if (info != NULL && info->multiplevalue) args[key] += "," + value; else args.setarg (key, value, cgiarg_t::cgi_arg); } else args.setarg (key, value, cgiarg_t::cgi_arg); } } } text_t minus_safe (const text_t &intext) { text_t outtext; text_t::const_iterator here = intext.begin (); text_t::const_iterator end = intext.end (); while (here != end) { if (*here == '-') outtext += "Z-"; else outtext.push_back (*here); here ++; } outtext = cgi_safe (outtext); return outtext; } text_t cgi_safe (const text_t &intext) { text_t outtext; text_t::const_iterator here = intext.begin (); text_t::const_iterator end = intext.end (); unsigned short c; text_t ttmp; while (here != end) { c = *here; if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || ((c >= '0') && (c <= '9')) || (c == '+') || (c == '%') || (c == '-')) { // alphanumeric character outtext.push_back(c); } else if (c == ' ') { // space outtext.push_back('+'); } else { // everything else outtext.push_back('%'); c2hex(c, ttmp); outtext += ttmp; } here++; } return outtext; } static text_t::const_iterator get_next_save_arg (text_t::const_iterator first, text_t::const_iterator last, text_t &argname) { first = getdelimitstr (first, last, '-', argname); return first; } // check_save_conf_str checks the configuration string for // the saved args and makes sure it does not conflict with // the information about the arguments. If an error is encountered // it will return false and the program should not produce any // output. bool check_save_conf_str (const text_t &saveconf, const cgiargsinfoclass &argsinfo, ostream &logout) { outconvertclass text_t2ascii; text_tset argsset; text_t::const_iterator saveconfhere = saveconf.begin (); text_t::const_iterator saveconfend = saveconf.end (); text_t argname; const cgiarginfo *info; // first check to make sure all saved arguments can be saved while (saveconfhere != saveconfend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // save the argument name for later argsset.insert (argname); // check the argument info = argsinfo.getarginfo (argname); if (info == NULL) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" is used in the configuration string for the\n" << "saved arguments but does not exist as a valid argument.\n\n"; return false; } if (info->savedarginfo == cgiarginfo::mustnot) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" is used in the configuration string for the\n" << "saved arguments but has been specified as an argument whose\n" << "state must not be saved.\n\n"; return false; } } } // next check that all saved arguments that should be saved // are saved cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); while (argsinfohere != argsinfoend) { if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) && (argsset.find((*argsinfohere).second.shortname) == argsset.end())) { logout << text_t2ascii << "Error: the cgi argument \"" << (*argsinfohere).second.shortname << "\" was specified as needing to\n" << "be save but was not listed in the saved arguments.\n\n"; return false; } argsinfohere++; } return true; // made it, no clashes } // create_save_conf_str will create a configuration string // based on the information in argsinfo. This method of configuration // is not recomended as small changes can produce large changes in // the resulting configuration string (for instance a totally different // ordering). Only arguments which "must" be saved are included in // the resulting string. text_t create_save_conf_str (const cgiargsinfoclass &argsinfo, ostream &/*logout*/) { cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); text_t saveconf; bool first = true; while (argsinfohere != argsinfoend) { // save this argument if it must be saved if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) { if (!first) saveconf.push_back ('-'); else first = false; saveconf += (*argsinfohere).second.shortname; } argsinfohere++; } return saveconf; } // expand_save_args will expand the saved arguments based // on saveconf placing the results in args if they are not // already defined. If it encounters an error it will return false // and output more information to logout. bool expand_save_args (const cgiargsinfoclass &argsinfo, const text_t &saveconf, cgiargsclass &args, ostream &logout) { outconvertclass text_t2ascii; text_t *arg_e = args.getarg("e"); if (arg_e == NULL) return true; // no compressed arguments if (arg_e->empty()) return true; // no compressed arguments text_t argname, argvalue; const cgiarginfo *argnameinfo; text_t::const_iterator saveconfhere = saveconf.begin(); text_t::const_iterator saveconfend = saveconf.end(); text_t::iterator arg_ehere = arg_e->begin(); text_t::iterator arg_eend = arg_e->end(); while (saveconfhere != saveconfend && arg_ehere != arg_eend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // found another entry argnameinfo = argsinfo.getarginfo (argname); if (argnameinfo == NULL) { // no information about the argument could be found // we can't keep going because we don't know whether // this argument is a single or multiple character value logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument\n" << "but no information about it could be found within the " << "cgiargsinfoclass.\n"; return false; } else { // found the argument information if (argnameinfo->multiplechar) { text_t::const_iterator sav = arg_ehere; arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue); // replace any '-' chars escaped with 'Z' if (*(arg_ehere-2) == 'Z') { argvalue.clear(); arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1; while (sav != (arg_ehere-1)) { if (!((*sav == 'Z') && (*(sav+1) == '-'))) argvalue.push_back (*sav); sav ++; } } argvalue.setencoding(1); // other encoding if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg); } else { args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg); arg_ehere++; } } } } return true; } // adds the default values for those arguments which have not // been specified void add_default_args (const cgiargsinfoclass &argsinfo, cgiargsclass &args, ostream &/*logout*/) { cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); while (argsinfohere != argsinfoend) { if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) { args.setdefaultarg ((*argsinfohere).second.shortname, (*argsinfohere).second.argdefault, cgiarg_t::default_arg); } argsinfohere++; } } // compress_save_args will compress the arguments and return // them in compressed_args. If an error was encountered // compressed_args will be set to to "", an error will be // written to logout, and the function will return false. bool compress_save_args (const cgiargsinfoclass &argsinfo, const text_t &saveconf, cgiargsclass &args, text_t &compressed_args, outconvertclass &outconvert, ostream &logout) { outconvertclass text_t2ascii; compressed_args.clear(); text_t argname, argvalue; const cgiarginfo *argnameinfo; text_t::const_iterator saveconfhere = saveconf.begin(); text_t::const_iterator saveconfend = saveconf.end(); while (saveconfhere != saveconfend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // found another entry argnameinfo = argsinfo.getarginfo (argname); if (argnameinfo == NULL) { // no information about the argument could be found // we can't keep going because we don't know whether // this argument is a single or multiple character value logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument\n" << "but no information about it could be found within the " << "cgiargsinfoclass.\n"; compressed_args.clear(); return false; } else { // found the argument information if (argnameinfo->multiplechar) { // multiple character argument -- sort out any '-' chars compressed_args += minus_safe (outconvert.convert(args[argname])); if (saveconfhere != saveconfend) compressed_args.push_back ('-'); } else { // single character argument if (args[argname].size() == 0) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument which\n" << "should have a one character value but it was empty.\n\n"; compressed_args.clear (); return false; } else if (args[argname].size() > 1) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument which\n" << "should have a one character value but it had multiple characters.\n\n"; compressed_args.clear (); return false; } // everything is ok compressed_args += args[argname]; } } } } return true; } // args_tounicode converts any arguments which are not in unicode // to unicode using inconvert void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) { cgiargsclass::iterator here = args.begin(); cgiargsclass::iterator end = args.end(); while (here != end) { if ((*here).second.value.getencoding() > 0) { (*here).second.value = inconvert.convert((*here).second.value); } here++; } } // fcgienv will be loaded with environment name-value pairs // if using fastcgi (had to do this as getenv doesn't work // with our implementation of fastcgi). if fcgienv is empty // we'll simply use getenv text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) { if (fcgienv.empty()) { char *n = name.getcstr(); char *v = getenv(n); delete n; if (v != NULL) return v; return ""; } else return fcgienv[name]; }