/********************************************************************** * * cgiutils.cpp -- general cgi utilities * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "cgiutils.h" #include "gsdlunicode.h" #include "unitool.h" // in mg, for output_utf8_char static unsigned short hexdigit (unsigned short c) { if (c >= '0' && c <= '9') return (c-'0'); if (c >= 'a' && c <= 'f') return (c-'a'+10); if (c >= 'A' && c <= 'F') return (c-'A'+10); return c; } static void c2hex (unsigned short c, text_t &t) { t.clear(); if (c >= 256) { t = "20"; // ' ' return; } unsigned short o1, o2; o1 = (c/16) % 16; o2 = c % 16; if (o1 >= 10) o1 += 'a' - 10; else o1 += '0'; if (o2 >= 10) o2 += 'a' - 10; else o2 += '0'; t.push_back(o1); t.push_back(o2); } // convert %xx and + to their appropriate equivalents // IE 6.0 and later use "%u" followed by 4 hex digits... MS IIS extension! void decode_cgi_arg (text_t &argstr) { text_t::iterator in = argstr.begin(); text_t::iterator out = in; text_t::iterator end = argstr.end(); while (in != end) { if (*in == '+') *out = ' '; else if (*in == '%') { unsigned short c = '%'; ++in; if (in != end) { // this is an encoding... if (*in == 'u') { // convert %uHHHH to unicode then current encoding // this assumes a short int is at least 16 bits... ++in; if (in != end) c=hexdigit(*in++) << 12; if (in != end) c+=hexdigit(*in++) << 8; if (in != end) c+=hexdigit(*in++) << 4; if (in != end) c+=hexdigit(*in); /* BAD!! The following assumes the interface is using utf-8. But at this point we don't know what encoding we are using, unless we can parse it out of the string we are currently decoding... */ text_t uni=" "; uni[0]=c; text_t utf8=to_utf8(uni); int last_byte=utf8.size()-1; for (int i=0;imultiplevalue) { text_t newvalue = args[key]; if (args.lookupcgiarg(key).source == cgiarg_t::cgi_arg) newvalue += ","; newvalue += encode_commas(value); args.setarg (key, newvalue, cgiarg_t::cgi_arg); } else { args.setarg (key, value, cgiarg_t::cgi_arg); } } } } text_t encode_commas (const text_t &intext) { text_t outtext; text_t::const_iterator here = intext.begin (); text_t::const_iterator end = intext.end (); while (here != end) { if (*here == ',') outtext += "%2C"; else outtext.push_back (*here); ++here; } return outtext; } text_t decode_commas (const text_t &intext) { text_t outtext; text_t::const_iterator here = intext.begin (); text_t::const_iterator end = intext.end (); while (here != end) { if ((here+2= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || ((c >= '0') && (c <= '9')) || (c == '+') || (c == '%') || (c == '-')) { // alphanumeric character outtext.push_back(c); } else if (c == ' ') { // space outtext.push_back('+'); } else if (c > 255) { // unicode character unsigned char buf[3]; // up to 3 bytes buf[0]='\0';buf[1]='\0';buf[2]='\0'; output_utf8_char(c,buf, buf+2); outtext.push_back('%'); c2hex(buf[0], ttmp); outtext += ttmp; outtext.push_back('%'); c2hex(buf[1], ttmp); outtext += ttmp; if (buf[2]) { outtext.push_back('%'); c2hex(buf[2], ttmp); outtext += ttmp; } } else { // everything else outtext.push_back('%'); c2hex(c, ttmp); outtext += ttmp; } ++here; } return outtext; } static text_t::const_iterator get_next_save_arg (text_t::const_iterator first, text_t::const_iterator last, text_t &argname) { first = getdelimitstr (first, last, '-', argname); return first; } // check_save_conf_str checks the configuration string for // the saved args and makes sure it does not conflict with // the information about the arguments. If an error is encountered // it will return false and the program should not produce any // output. bool check_save_conf_str (const text_t &saveconf, const cgiargsinfoclass &argsinfo, ostream &logout) { outconvertclass text_t2ascii; text_tset argsset; text_t::const_iterator saveconfhere = saveconf.begin (); text_t::const_iterator saveconfend = saveconf.end (); text_t argname; const cgiarginfo *info; // first check to make sure all saved arguments can be saved while (saveconfhere != saveconfend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // save the argument name for later argsset.insert (argname); // check the argument info = argsinfo.getarginfo (argname); if (info == NULL) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" is used in the configuration string for the\n" << "saved arguments but does not exist as a valid argument.\n\n"; return false; } if (info->savedarginfo == cgiarginfo::mustnot) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" is used in the configuration string for the\n" << "saved arguments but has been specified as an argument whose\n" << "state must not be saved.\n\n"; return false; } } } // next check that all saved arguments that should be saved // are saved cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); while (argsinfohere != argsinfoend) { if (((*argsinfohere).second.savedarginfo == cgiarginfo::must) && (argsset.find((*argsinfohere).second.shortname) == argsset.end())) { logout << text_t2ascii << "Error: the cgi argument \"" << (*argsinfohere).second.shortname << "\" was specified as needing to\n" << "be save but was not listed in the saved arguments.\n\n"; return false; } ++argsinfohere; } return true; // made it, no clashes } // create_save_conf_str will create a configuration string // based on the information in argsinfo. This method of configuration // is not recomended as small changes can produce large changes in // the resulting configuration string (for instance a totally different // ordering). Only arguments which "must" be saved are included in // the resulting string. text_t create_save_conf_str (const cgiargsinfoclass &argsinfo, ostream &/*logout*/) { cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); text_t saveconf; bool first = true; while (argsinfohere != argsinfoend) { // save this argument if it must be saved if ((*argsinfohere).second.savedarginfo == cgiarginfo::must) { if (!first) saveconf.push_back ('-'); else first = false; saveconf += (*argsinfohere).second.shortname; } ++argsinfohere; } return saveconf; } // expand_save_args will expand the saved arguments based // on saveconf placing the results in args if they are not // already defined. If it encounters an error it will return false // and output more information to logout. bool expand_save_args (const cgiargsinfoclass &argsinfo, const text_t &saveconf, cgiargsclass &args, ostream &logout) { outconvertclass text_t2ascii; text_t *arg_e = args.getarg("e"); if (arg_e == NULL) return true; // no compressed arguments if (arg_e->empty()) return true; // no compressed arguments text_t argname, argvalue; const cgiarginfo *argnameinfo; text_t::const_iterator saveconfhere = saveconf.begin(); text_t::const_iterator saveconfend = saveconf.end(); text_t::iterator arg_ehere = arg_e->begin(); text_t::iterator arg_eend = arg_e->end(); while (saveconfhere != saveconfend && arg_ehere != arg_eend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // found another entry argnameinfo = argsinfo.getarginfo (argname); if (argnameinfo == NULL) { // no information about the argument could be found // we can't keep going because we don't know whether // this argument is a single or multiple character value logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument\n" << "but no information about it could be found within the " << "cgiargsinfoclass.\n"; return false; } else { // found the argument information if (argnameinfo->multiplechar) { text_t::const_iterator sav = arg_ehere; arg_ehere = getdelimitstr (arg_ehere, arg_eend, '-', argvalue); // replace any '-' chars escaped with 'Zz' bool first = true; while ((*(arg_ehere-3) == 'Z') && (*(arg_ehere-2) == 'z')) { if (first) argvalue.clear(); arg_ehere = (findchar (arg_ehere, arg_eend, '-')) + 1; while (sav != (arg_ehere-1)) { if (!((*sav == 'Z') && (*(sav+1) == 'z') && (*(sav+2) == '-')) && !((*(sav-1) == 'Z') && (*sav == 'z') && (*(sav+1) == '-'))) argvalue.push_back (*sav); ++sav; } first = false; } argvalue.setencoding(1); // other encoding if (!argvalue.empty()) args.setdefaultarg (argname, argvalue, cgiarg_t::compressed_arg); } else { args.setdefaultcarg (argname,*arg_ehere, cgiarg_t::compressed_arg); ++arg_ehere; } } } } return true; } // adds the default values for those arguments which have not // been specified void add_default_args (const cgiargsinfoclass &argsinfo, cgiargsclass &args, ostream &/*logout*/) { cgiargsinfoclass::const_iterator argsinfohere = argsinfo.begin (); cgiargsinfoclass::const_iterator argsinfoend = argsinfo.end (); while (argsinfohere != argsinfoend) { if ((*argsinfohere).second.defaultstatus != cgiarginfo::none) { args.setdefaultarg ((*argsinfohere).second.shortname, (*argsinfohere).second.argdefault, cgiarg_t::default_arg); } ++argsinfohere; } } // compress_save_args will compress the arguments and return // them in compressed_args. If an error was encountered // compressed_args will be set to to "", an error will be // written to logout, and the function will return false. bool compress_save_args (const cgiargsinfoclass &argsinfo, const text_t &saveconf, cgiargsclass &args, text_t &compressed_args, outconvertclass &outconvert, ostream &logout) { outconvertclass text_t2ascii; compressed_args.clear(); text_t argname, argvalue; const cgiarginfo *argnameinfo; text_t::const_iterator saveconfhere = saveconf.begin(); text_t::const_iterator saveconfend = saveconf.end(); while (saveconfhere != saveconfend) { saveconfhere = get_next_save_arg (saveconfhere, saveconfend, argname); if (!argname.empty()) { // found another entry argnameinfo = argsinfo.getarginfo (argname); if (argnameinfo == NULL) { // no information about the argument could be found // we can't keep going because we don't know whether // this argument is a single or multiple character value logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument\n" << "but no information about it could be found within the " << "cgiargsinfoclass.\n"; compressed_args.clear(); return false; } else { // found the argument information if (argnameinfo->multiplechar) { // multiple character argument -- sort out any '-' chars if (args["w"]=="utf-16be") // browsers don't like \0 in urls... compressed_args += minus_safe (args[argname]); else compressed_args += minus_safe (outconvert.convert(args[argname])); if (saveconfhere != saveconfend) compressed_args.push_back ('-'); } else { // single character argument if (args[argname].size() == 0) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument which\n" << "should have a one character value but it was empty.\n\n"; compressed_args.clear (); return false; } else if (args[argname].size() > 1) { logout << text_t2ascii << "Error: the cgi argument \"" << argname << "\" was specified as being a compressed argument which\n" << "should have a one character value but it had multiple characters.\n\n"; compressed_args.clear (); return false; } // everything is ok compressed_args += args[argname]; } } } } return true; } // args_tounicode converts any arguments which are not in unicode // to unicode using inconvert void args_tounicode (cgiargsclass &args, inconvertclass &inconvert) { cgiargsclass::iterator here = args.begin(); cgiargsclass::iterator end = args.end(); while (here != end) { if ((*here).second.value.getencoding() > 0) { (*here).second.value = inconvert.convert((*here).second.value); } ++here; } } // fcgienv will be loaded with environment name-value pairs // if using fastcgi (had to do this as getenv doesn't work // with our implementation of fastcgi). if fcgienv is empty // we'll simply use getenv text_t gsdl_getenv (const text_t &name, text_tmap &fcgienv) { if (fcgienv.empty()) { char *n = name.getcstr(); char *v = getenv(n); delete []n; if (v != NULL) return v; return g_EmptyText; } else return fcgienv[name]; }