/********************************************************************** * * receptionist.cpp -- a web interface for the gsdl * Copyright (C) 1999 The New Zealand Digital Library Project * * PUT COPYRIGHT NOTICE HERE * * $Id: receptionist.cpp 388 1999-07-15 06:02:05Z rjmcnab $ * *********************************************************************/ /* $Log$ Revision 1.19 1999/07/15 06:02:05 rjmcnab Moved the setting of argsinfo into the constructor. Added the configuration command argdefault (as used by the actions). Added code to output the correct charset based on the page encoding so that the user does not need to specify the encoding used for a particular page. Revision 1.18 1999/07/11 01:05:20 rjmcnab Stored origin of cgiarg with argument. Revision 1.17 1999/07/10 22:18:26 rjmcnab Added calls to define_external_cgiargs. Revision 1.16 1999/06/27 21:49:03 sjboddie fixed a couple of version conflicts - tidied up some small things Revision 1.15 1999/06/26 01:14:32 rjmcnab Made a couple of changes to handle different encodings. Revision 1.14 1999/06/09 00:08:36 sjboddie query string macro (_cgiargq_) is now made html safe before being set Revision 1.13 1999/06/08 04:29:31 sjboddie added argsinfo to the call to check_cgiargs to make it easy to set args to their default if they're found to be screwed up Revision 1.12 1999/04/30 01:59:42 sjboddie lots of stuff - getting documentaction working (documentaction replaces old browseaction) Revision 1.11 1999/03/25 03:06:43 sjboddie altered receptionist slightly so it now passes *collectproto to define_internal_macros and define_external_macros - need it for browseaction Revision 1.10 1999/03/05 03:53:54 sjboddie fixed some bugs Revision 1.9 1999/02/28 20:00:16 rjmcnab Fixed a few things. Revision 1.8 1999/02/25 21:58:59 rjmcnab Merged sources. Revision 1.7 1999/02/21 22:33:55 rjmcnab Lots of stuff :-) Revision 1.6 1999/02/11 01:24:05 rjmcnab Fixed a few compiler warnings. Revision 1.5 1999/02/08 01:28:02 rjmcnab Got the receptionist producing something using the statusaction. Revision 1.4 1999/02/05 10:42:46 rjmcnab Continued working on receptionist Revision 1.3 1999/02/04 10:00:56 rjmcnab Developed the idea of an "action" and having them define the cgi arguments which they need and how those cgi arguments function. Revision 1.2 1999/02/04 01:17:27 rjmcnab Got it outputing something. */ #include "receptionist.h" #include "fileutil.h" #include "cgiutils.h" #include "htmlutils.h" #include "OIDtools.h" #include #include receptionist::receptionist () { // create a list of cgi arguments // this must be done before the configuration cgiarginfo ainfo; ainfo.shortname = "e"; ainfo.longname = "compressed arguments"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::good; ainfo.argdefault = ""; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "a"; ainfo.longname = "action"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = ""; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); // w=western ainfo.shortname = "w"; ainfo.longname = "encoding"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::weak; ainfo.argdefault = "w"; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "nw"; ainfo.longname = "new encoding"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = ""; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "c"; ainfo.longname = "collection"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = ""; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); // 0=text+graphics, 1=text ainfo.shortname = "v"; ainfo.longname = "version"; ainfo.multiplechar = false; ainfo.defaultstatus = cgiarginfo::weak; ainfo.argdefault = "0"; ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, ainfo); // 0=normal, 1=big ainfo.shortname = "f"; ainfo.longname = "query box size"; ainfo.multiplechar = false; ainfo.defaultstatus = cgiarginfo::weak; ainfo.argdefault = "0"; ainfo.savedarginfo = cgiarginfo::can; argsinfo.addarginfo (NULL, ainfo); // the interface language name should use the ISO 639 // standard ainfo.shortname = "l"; ainfo.longname = "interface language"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::weak; ainfo.argdefault = "en"; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); } // configure should be called for each line in the // configuration files to configure the receptionist and everything // it contains. The configuration should take place after everything // has been added but before the initialisation. void receptionist::configure (const text_t &key, const text_tarray &cfgline) { // configure the receptionist if (cfgline.size() >= 1) { cgiarginfo *info = NULL; if (key == "gsdlhome") configinfo.gsdlhome = cfgline[0]; else if (key == "collection") { configinfo.collection = cfgline[0]; // also need to set the default arg to this collection if ((info = argsinfo.getarginfo("c")) != NULL) { info->defaultstatus = cgiarginfo::good; info->argdefault = cfgline[0]; } } else if (key == "collectdir") configinfo.collectdir = cfgline[0]; else if (key == "httpprefix") configinfo.httpprefix = cfgline[0]; else if (key == "httpimg") configinfo.httpimg = cfgline[0]; else if (key == "gwcgi") configinfo.gwcgi = cfgline[0]; else if (key == "macrofiles") configinfo.macrofiles = cfgline; else if (key == "saveconf") configinfo.saveconf = cfgline[0]; else if ((key == "argdefault") && (cfgline.size() == 2) && ((info = argsinfo.getarginfo(cfgline[0])) != NULL)) { if (info->defaultstatus <= cgiarginfo::config) { info->defaultstatus = cgiarginfo::config; info->argdefault = cfgline[1]; } } } // configure the actions actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) (*actionhere).second.a->configure(key, cfgline); actionhere++; } // configure the protocols recptprotolistclass::iterator protohere = protocols.begin (); recptprotolistclass::iterator protoend = protocols.end (); while (protohere != protoend) { assert ((*protohere).p != NULL); if ((*protohere).p != NULL) (*protohere).p->configure(key, cfgline); protohere++; } } void receptionist::configure (const text_t &key, const text_t &value) { text_tarray cfgline; cfgline.push_back (value); configure(key, cfgline); } // init should be called after all the actions, protocols, and // converters have been added to the receptionist and after everything // has been configured but before any pages are created. // It returns true on success and false on failure. If false is // returned getpage should not be called (without producing // meaningless output), instead an error page should be // produced by the calling code. bool receptionist::init (ostream &logout) { // first configure collectdir text_t thecollectdir = configinfo.gsdlhome; if (!configinfo.collection.empty()) { // collection specific mode if (!configinfo.collectdir.empty()) { // has already been configured thecollectdir = configinfo.collectdir; } else { // decide where collectdir is by searching for collect.cfg // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and // then $GSDLHOME/etc/collect.cfg thecollectdir = filename_cat (configinfo.gsdlhome, "collect"); thecollectdir = filename_cat (thecollectdir, configinfo.collection); text_t filename = filename_cat (thecollectdir, "etc"); filename = filename_cat (filename, "collect.cfg"); if (!file_exists(filename)) thecollectdir = configinfo.gsdlhome; } } configure("collectdir", thecollectdir); // read in the macro files if (!read_macrofiles (logout)) return false; // there must be at least one action defined if (actions.empty()) { logout << "Error: no actions have been added to the receptionist\n"; return false; } // add the cgi arguments from the actions actionptrmap::iterator here = actions.begin (); actionptrmap::iterator end = actions.end (); while (here != end) { assert ((*here).second.a != NULL); if ((*here).second.a != NULL) { if (!argsinfo.addarginfo (&logout, (*here).second.a->getargsinfo())) return false; } here++; } // create a saveconf string if there isn't one already if (configinfo.saveconf.empty()) configinfo.saveconf = create_save_conf_str (argsinfo, logout); // check the saveconf string if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout)) return false; // set a random seed srand (time(NULL)); // make the output converters remove all the zero-width spaces convertinfoclass::iterator converthere = converters.begin (); convertinfoclass::iterator convertend = converters.end (); text_t defaultconvertname; while (converthere != convertend) { assert ((*converthere).second.outconverter != NULL); if ((*converthere).second.outconverter != NULL) { (*converthere).second.outconverter->set_rzws(1); if (defaultconvertname.empty()) defaultconvertname = (*converthere).second.name; } converthere++; } // set default converter if no good one has been defined if (!defaultconvertname.empty()) { cgiarginfo *ainfo = argsinfo.getarginfo ("w"); if ((ainfo != NULL) && (ainfo->defaultstatus < cgiarginfo::config)) { ainfo->defaultstatus = cgiarginfo::good; ainfo->argdefault = defaultconvertname; } } // init the actions actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { if (((*actionhere).second.a == NULL) || !(*actionhere).second.a->init(logout)) return false; actionhere++; } // init the protocols recptprotolistclass::iterator protohere = protocols.begin (); recptprotolistclass::iterator protoend = protocols.end (); while (protohere != protoend) { if (((*protohere).p == NULL) || !(*protohere).p->init(logout)) return false; protohere++; } return true; } // parse_cgi_args parses cgi arguments into an argument class. // This function should be called for each page request. It returns false // if there was a major problem with the cgi arguments. bool receptionist::parse_cgi_args (const text_t &argstr, cgiargsclass &args, ostream &logout) { outconvertclass text_t2ascii; // get an initial list of cgi arguments args.clear(); split_cgi_args (argstr, args); // expand the compressed argument (if there was one) if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false; // add the defaults add_default_args (argsinfo, args, logout); // get the input encoding text_t &arg_w = args["w"]; inconvertclass defaultinconvert; inconvertclass *inconvert = converters.get_inconverter (arg_w); if (inconvert == NULL) inconvert = &defaultinconvert; // see if the next page will have a different encoding if (args.getarg("nw") != NULL) arg_w = args["nw"]; // convert arguments which aren't in unicode to unicode args_tounicode (args, *inconvert); // decide on the output conversion class (needed for checking the external // cgi arguments) rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); // check the main cgi arguments if (!check_mainargs (args, logout)) return false; // check the arguments for the action action *a = actions.getaction (args["a"]); if (a != NULL) { if (!a->check_cgiargs (argsinfo, args, logout)) return false; } else { // the action was not found!! logout << text_t2ascii << "Error: the action \"" << args["a"] << "\" could not be found.\n"; return false; } // check external cgi arguments for each action actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) { if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter, configinfo.saveconf, logout)) return false; } actionhere++; } // the action might have changed but we will assume that // the cgiargs were checked properly when the change was made return true; } // produce_cgi_page will call get_cgihead_info and // produce_content in the appropriate way to output a cgi header and // the page content (if needed). If a page could not be created it // will return false bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout, ostream &logout) { outconvertclass text_t2ascii; response_t response; text_t response_data; // produce cgi header get_cgihead_info (args, response, response_data, logout); if (response == location) { // I've forgotten how to do this :-/ return true; } else if (response == content) { // content response contentout << text_t2ascii << "Content-type: " << response_data << "\n\n"; } else { // unknown response logout << "Error: get_cgihead_info returned an unknown response type.\n"; return false; } // produce cgi page if (!produce_content (args, contentout, logout)) return false; // flush contentout contentout << flush; return true; } // get_cgihead_info determines the cgi header information for // a set of cgi arguments. If response contains location then // response_data contains the redirect address. If reponse // contains content then reponse_data contains the content-type. // Note that images can now be produced by the receptionist. void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response, text_t &response_data, ostream &logout) { outconvertclass text_t2ascii; // get the action action *a = actions.getaction (args["a"]); if (a != NULL) { a->get_cgihead_info (args, response, response_data, logout); } else { // the action was not found!! logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \"" << args["a"] << "\" could not be found.\n"; response = content; response_data = "text/html"; } // add the encoding information if (response == content) { if (args["w"] == "u") { response_data += "; charset=UTF-8"; } else { response_data += "; charset=ISO-8859-1"; } } } // produce the page content bool receptionist::produce_content (cgiargsclass &args, ostream &contentout, ostream &logout) { // decide on the output conversion class text_t &arg_w = args["w"]; rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); // decide on the protocol used for communicating with // the collection server recptproto *collectproto = NULL; if (!args["c"].empty()) { collectproto = protocols.getrecptproto (args["c"], logout); } // produce the page using the desired action action *a = actions.getaction (args["a"]); if (a != NULL) { if (a->uses_display(args)) prepare_page (a, args, collectproto, (*outconverter), logout); if (!a->do_action (args, collectproto, disp, (*outconverter), contentout, logout)) return false; } else { // the action was not found!! outconvertclass text_t2ascii; logout << text_t2ascii << "Error receptionist::produce_content: the action \"" << args["a"] << "\" could not be found.\n"; contentout << (*outconverter) << "\n" << "\n" << "Error\n" << "\n" << "\n" << "

Oops!

\n" << "Undefined Page. The action \"" << args["a"] << "\" could not be found.\n" << "\n" << "\n"; } return true; } // returns the compressed argument ("e") corresponding to the argument // list. This can be used to save preferences between sessions. text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) { // decide on the output conversion class text_t &arg_w = args["w"]; rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); text_t compressed_args; if (compress_save_args (argsinfo, configinfo.saveconf, args, compressed_args, *outconverter, logout)) return compressed_args; return ""; } // will read in all the macro files. If one is not found an // error message will be written to logout and the method will // return false. bool receptionist::read_macrofiles (ostream &logout) { outconvertclass text_t2ascii; // redirect the error output to logout disp.setlogout (&logout); // load up the default macro files, the collection directory // is searched first for the file (if this is being used in // collection specific mode) and then the main directory text_t colmacrodir = filename_cat (configinfo.collectdir, "macros"); text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros"); text_tarray::iterator arrhere = configinfo.macrofiles.begin(); text_tarray::iterator arrend = configinfo.macrofiles.end(); text_t filename; while (arrhere != arrend) { // filename is used as a flag to indicate whether // the macro file has been found filename.clear(); // try in the collection directory if this is being // run in collection specific mode if (!configinfo.collection.empty()) { filename = filename_cat (colmacrodir, *arrhere); if (!file_exists (filename)) filename.clear (); } // if we haven't found the macro file yet try in // the main macro directory if (filename.empty()) { filename = filename_cat (gsdlmacrodir, *arrhere); if (!file_exists (filename)) filename.clear (); } // see if we found the file or not if (filename.empty()) { logout << text_t2ascii << "Error: the macro file \"" << *arrhere << "\" could not be found.\n"; if (configinfo.collection.empty()) { logout << text_t2ascii << "It should be in " << gsdlmacrodir << ".\n\n"; } else { logout << text_t2ascii << "It should be in either " << colmacrodir << " or in " << gsdlmacrodir << ".\n\n"; } return false; } else { // found the file disp.loaddefaultmacros(filename); } arrhere++; } // success return true; } // check_mainargs will check all the main arguments. If a major // error is found it will return false and no cgi page should // be created using the arguments. bool receptionist::check_mainargs (cgiargsclass &args, ostream &/*logout*/) { // if this receptionist is running in collection dependant mode // then it should always set the collection argument to the // collection if (!configinfo.collection.empty()) args["c"] = configinfo.collection; // argument "v" can only be 0 or 1. Use the default value // if it is out of range int arg_v = args.getintarg ("v"); if (arg_v != 0 && arg_v != 1) { cgiarginfo *vinfo = argsinfo.getarginfo ("v"); if (vinfo != NULL) args["v"] = vinfo->argdefault; } // argument "f" can only be 0 or 1. Use the default value // if it is out of range int arg_f = args.getintarg ("f"); if (arg_f != 0 && arg_f != 1) { cgiarginfo *finfo = argsinfo.getarginfo ("f"); if (finfo != NULL) args["f"] = finfo->argdefault; } return true; } // prepare_page sets up page parameters, sets display macros // and opens the page ready for output void receptionist::prepare_page (action *a, cgiargsclass &args, recptproto *collectproto, outconvertclass &outconvert, ostream &logout) { // set up page parameters text_t pageparams; bool first = true; if (!args["c"].empty()) { pageparams += "collection=" + args["c"]; first = false;} if (args.getintarg("u") == 1) if (first) {pageparams += "style=htmlonly"; first = false;} else pageparams += ",style=htmlonly"; if (args.getintarg("v") == 1) if (first) {pageparams += "version=text"; first = false;} else pageparams += ",version=text"; if (args.getintarg("f") == 1) if (first) {pageparams += ",queryversion=big"; first = false;} else pageparams += ",queryversion=big"; if (args["l"] != "en") if (first) pageparams += ",language=" + args["l"]; else pageparams += ",language=" + args["l"]; // open the page disp.openpage(pageparams, MACROPRECEDENCE); // define general macros define_general_macros (args, outconvert, logout); // define external macros for each action actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) (*actionhere).second.a->define_external_macros (disp, args, collectproto, logout); actionhere++; } // define internal macros for the current action a->define_internal_macros (disp, args, collectproto, logout); } void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/, ostream &logout) { disp.setmacro ("gwcgi", "Global", configinfo.gwcgi); disp.setmacro ("httpimg", "Global", configinfo.httpimg); disp.setmacro ("httpprefix", "Global", configinfo.httpprefix); disp.setmacro("compressedoptions", "Global", get_compressed_arg(args, logout)); // set _cgiargX_ macros for each cgi argument cgiargsclass::const_iterator argshere = args.begin(); cgiargsclass::const_iterator argsend = args.end(); while (argshere != argsend) { if ((*argshere).first == "q") // need to escape special characters from query string disp.setmacro ("cgiargq", "Global", html_safe((*argshere).second.value)); else disp.setmacro ("cgiarg" + (*argshere).first, "Global", (*argshere).second.value); argshere ++; } }