/********************************************************************** * * receptionist.cpp -- a web interface for the gsdl * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ // following line required to get fstream.filedesc() on darwin (Mac OS X) // gcc 2.91 automatically defines this in stream.h #define _STREAM_COMPAT 1 #include "receptionist.h" #include "recptprototools.h" #include "fileutil.h" #include "cgiutils.h" #include "htmlutils.h" #include "gsdltools.h" #include "gsdltimes.h" #include "OIDtools.h" #include "securitytools.h" #include #include #include // for open() #include // for open() flags // following 2 are for printing Last-Modified http header. #include #include #if defined (GSDL_USE_IOS_H) #include #else #include #endif void recptconf::clear () { gsdlhome.clear(); collecthome.clear(); dbhome.clear(); collectinfo.erase(collectinfo.begin(), collectinfo.end()); collection.clear(); collectdir.clear(); httpprefix.clear(); httpweb.clear(); gwcgi.clear(); macrofiles.erase(macrofiles.begin(), macrofiles.end()); saveconf.clear(); usecookies = false; logcgiargs = false; LogDateFormat = LocalTime; maintainer.clear(); MailServer.clear(); LogEvents = Disabled; EmailEvents = Disabled; EmailUserEvents = false; languages.erase(languages.begin(), languages.end()); encodings.erase(encodings.begin(), encodings.end()); site_auth = false; HomePageType = "images"; HomePageCols = 3; // these default page parameters can always be overriden // in the configuration file pageparams.erase(pageparams.begin(), pageparams.end()); pageparams["c"] = ""; pageparams["l"] = "en"; #ifdef MACROPRECEDENCE macroprecedence = MACROPRECEDENCE; #else macroprecedence.clear(); #endif } void collectioninfo_t::clear () { gsdl_gsdlhome.clear(); gsdl_dbhome.clear(); info_loaded = false; info.clear(); } void languageinfo_t::clear () { longname.clear(); defaultencoding.clear(); } receptionist::receptionist () { // create a list of cgi arguments // this must be done before the configuration cgiarginfo ainfo; ainfo.shortname = "e"; ainfo.longname = "compressed arguments"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::good; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "a"; ainfo.longname = "action"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); // w=western ainfo.shortname = "w"; ainfo.longname = "encoding"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "nw"; ainfo.longname = "new encoding"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "c"; ainfo.longname = "collection"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); // the interface language name should use the ISO 639 // standard ainfo.shortname = "l"; ainfo.longname = "interface language"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::weak; ainfo.argdefault = "en"; ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, ainfo); ainfo.shortname = "nl"; ainfo.longname = "new language"; ainfo.multiplechar = false; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = "0"; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); // the GSDL_UID (cookie) ainfo.shortname = "z"; ainfo.longname = "gsdl uid"; ainfo.multiplechar = true; ainfo.defaultstatus = cgiarginfo::none; ainfo.argdefault = g_EmptyText; ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, ainfo); } void receptionist::add_action (action *theaction) { // make sure we have an action to add if (theaction == NULL) return; // add this action to the list of actions actions.addaction(theaction); // add the cgi arguments from this action argsinfo.addarginfo (NULL, *(theaction->getargsinfo())); } void receptionist::add_browser (browserclass *thebrowser) { // make sure we have a browser to add if (thebrowser == NULL) return; // add this browser to the list of browsers browsers.addbrowser(thebrowser); } void receptionist::setdefaultbrowser (const text_t &browsername) { browsers.setdefaultbrowser (browsername); } // configure should be called for each line in the // configuration files to configure the receptionist and everything // it contains. The configuration should take place after everything // has been added but before the initialisation. void receptionist::configure (const text_t &key, const text_tarray &cfgline) { // configure the receptionist if (cfgline.size() >= 1) { cgiarginfo *info = NULL; if (key == "gsdlhome") { configinfo.gsdlhome = cfgline[0]; if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0]; } else if (key == "collecthome") configinfo.collecthome = cfgline[0]; else if (key == "gdbmhome") configinfo.dbhome = cfgline[0]; else if (key == "collection") { configinfo.collection = cfgline[0]; // also need to set the default arg to this collection if ((info = argsinfo.getarginfo("c")) != NULL) { info->defaultstatus = cgiarginfo::good; info->argdefault = cfgline[0]; } } else if (key == "collectdir") configinfo.collectdir = cfgline[0]; else if (key == "httpprefix") configinfo.httpprefix = cfgline[0]; else if (key == "httpweb") configinfo.httpweb = cfgline[0]; else if (key == "gwcgi") configinfo.gwcgi = cfgline[0]; else if (key == "macrofiles") { // want to append to macrofiles (i.e. may be several config files // contributing, maybe from several collections). text_tarray::const_iterator here = cfgline.begin(); text_tarray::const_iterator end = cfgline.end(); while (here != end) { configinfo.macrofiles.insert (*here); ++here; } } else if (key == "saveconf") configinfo.saveconf = cfgline[0]; else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true"); else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true"); else if (key == "maintainer") configinfo.maintainer = cfgline[0]; else if (key == "MailServer") configinfo.MailServer = cfgline[0]; else if (key == "LogDateFormat") { if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime; else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute; } else if (key == "LogEvents") { if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents; else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents; } else if (key == "EmailEvents") { if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents; else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents; } else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true"); else if (key == "pageparam") { if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1]; else configinfo.pageparams[cfgline[0]] = ""; } else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0]; else if (key == "collectinfo") { if (cfgline.size() == 3) { // for backwards compatability with older collections that only use // gsdlhome and dbhome collectioninfo_t cinfo; cinfo.gsdl_gsdlhome = cfgline[1]; cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect"); cinfo.gsdl_dbhome = cfgline[2]; configinfo.collectinfo[cfgline[0]] = cinfo; } else if (cfgline.size() >= 4) { collectioninfo_t cinfo; cinfo.gsdl_gsdlhome = cfgline[1]; cinfo.gsdl_collecthome = cfgline[2]; cinfo.gsdl_dbhome = cfgline[3]; configinfo.collectinfo[cfgline[0]] = cinfo; } } // Read in the value for the site_auth directive either true or false else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true"); else if (key == "site_group") joinchar(cfgline,',',configinfo.site_group); else if (key == "SiteFormat") { if (cfgline[0] == "HomePageType") { configinfo.HomePageType = cfgline[1]; } else if (cfgline[0] == "HomePageCols") { configinfo.HomePageCols = cfgline[1].getint(); } } else if (key == "cgiarg") { // get shortname bool seen_defaultstatus = false; text_t subkey, subvalue; text_t shortname; text_t::const_iterator cfglinesub_here; text_tarray::const_iterator cfgline_here = cfgline.begin(); text_tarray::const_iterator cfgline_end = cfgline.end(); while (cfgline_here != cfgline_end) { cfglinesub_here = getdelimitstr((*cfgline_here).begin(), (*cfgline_here).end(), '=', subkey); if (subkey == "shortname") { shortname = substr (cfglinesub_here, (*cfgline_here).end()); } ++cfgline_here; } // if we found the shortname process the line again filling in values if (!shortname.empty()) { cgiarginfo &chinfo = argsinfo[shortname]; chinfo.shortname = shortname; // in case this is a new argument cfgline_here = cfgline.begin(); while (cfgline_here != cfgline_end) { cfglinesub_here = getdelimitstr((*cfgline_here).begin(), (*cfgline_here).end(), '=', subkey); subvalue = substr (cfglinesub_here, (*cfgline_here).end()); if (subkey == "longname") chinfo.longname = subvalue; else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true"); else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true"); else if (subkey == "defaultstatus") { seen_defaultstatus = true; if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none; else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak; else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good; else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config; else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative; } else if (subkey == "argdefault") { chinfo.argdefault = subvalue; if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config; } else if (subkey == "savedarginfo") { if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot; else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can; else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must; } ++cfgline_here; } } } else if (key == "Encoding") { configure_encoding (cfgline); } else if (key == "Language") { text_t subkey, subvalue, shortname; languageinfo_t lang; text_t::const_iterator cfglinesub_here; text_tarray::const_iterator cfgline_here = cfgline.begin(); text_tarray::const_iterator cfgline_end = cfgline.end(); while (cfgline_here != cfgline_end) { cfglinesub_here = getdelimitstr((*cfgline_here).begin(), (*cfgline_here).end(), '=', subkey); if (subkey == "shortname") { shortname = substr (cfglinesub_here, (*cfgline_here).end()); } else if (subkey == "longname") { lang.longname = substr (cfglinesub_here, (*cfgline_here).end()); } else if (subkey == "default_encoding") { lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end()); } ++cfgline_here; } if (!shortname.empty()) { if (lang.longname.empty()) lang.longname = shortname; configinfo.languages[shortname] = lang; } } } // configure the actions actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) (*actionhere).second.a->configure(key, cfgline); ++actionhere; } // configure the protocols recptprotolistclass::iterator protohere = protocols.begin (); recptprotolistclass::iterator protoend = protocols.end (); while (protohere != protoend) { assert ((*protohere).p != NULL); comerror_t err; if ((*protohere).p != NULL) (*protohere).p->configure(key, cfgline, err); ++protohere; } // configure the browsers browserptrmap::iterator browserhere = browsers.begin (); browserptrmap::iterator browserend = browsers.end (); while (browserhere != browserend) { assert ((*browserhere).second.b != NULL); if ((*browserhere).second.b != NULL) (*browserhere).second.b->configure(key, cfgline); ++browserhere; } } void receptionist::configure (const text_t &key, const text_t &value) { text_tarray cfgline; cfgline.push_back (value); configure(key, cfgline); } // init should be called after all the actions and protocols have been // added to the receptionist and after everything has been configured but // before any pages are created. It returns true on success and false on // failure. If false is returned getpage should not be called (without // producing meaningless output), instead an error page should be produced // by the calling code. bool receptionist::init (ostream &logout) { // first configure collectdir if (!configinfo.collection.empty()) { // collection specific mode text_t collectdir = configinfo.gsdlhome; if (!configinfo.collectdir.empty()) { // has already been configured collectdir = configinfo.collectdir; } else { // decide where collectdir is by searching for collect.cfg // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and // then $GSDLHOME/etc/collect.cfg collectdir = filename_cat (configinfo.gsdlhome, "collect"); collectdir = filename_cat (collectdir, configinfo.collection); text_t filename = filename_cat (collectdir, "etc"); filename = filename_cat (filename, "collect.cfg"); if (!file_exists(filename)) collectdir = configinfo.gsdlhome; } configure("collectdir", collectdir); } else { text_t collecthome; if (configinfo.collecthome.empty()) { collecthome = filename_cat(configinfo.gsdlhome,"collect"); } else { collecthome = configinfo.collecthome; } configure("collecthome", collecthome); // for backwards compatability collectdir set to gsdlhome // (possible it could now be removed) configure("collectdir", configinfo.gsdlhome); } // read in the macro files if (!read_macrofiles (logout)) return false; // there must be at least one action defined if (actions.empty()) { logout << "Error: no actions have been added to the receptionist\n"; return false; } // there must be at least one browser defined if (browsers.empty()) { logout << "Error: no browsers have been added to the receptionist\n"; return false; } // create a saveconf string if there isn't one already if (configinfo.saveconf.empty()) configinfo.saveconf = create_save_conf_str (argsinfo, logout); // check the saveconf string if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout)) return false; // set a random seed srand (time(NULL)); // if maintainer email address is something dodgy (for now I'll define // dodgy as being anything that doesn't contain '@') disable EmailEvents // and EmailUserEvents (we don't strictly need to disable EmailUserEvents // in this case but we will as it seems likely that MailServer will also // be screwed up if maintainer is). text_t::const_iterator maintainer_end = configinfo.maintainer.end (); text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(), maintainer_end, '@'); if (maintainer_here == maintainer_end) { configinfo.EmailEvents = Disabled; configinfo.EmailUserEvents = Disabled; } else { // if MailServer isn't set it should default to mail.maintainer-domain if (configinfo.MailServer.empty()) { configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end); } } // init the actions actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { if (((*actionhere).second.a == NULL) || !(*actionhere).second.a->init(logout)) return false; ++actionhere; } // init the protocols recptprotolistclass::iterator protohere = protocols.begin (); recptprotolistclass::iterator protoend = protocols.end (); while (protohere != protoend) { comerror_t err; if (((*protohere).p == NULL) || !(*protohere).p->init(err, logout)) return false; ++protohere; } // init the browsers browserptrmap::iterator browserhere = browsers.begin (); browserptrmap::iterator browserend = browsers.end (); while (browserhere != browserend) { if (((*browserhere).second.b == NULL) || !(*browserhere).second.b->init(logout)) return false; ++browserhere; } return true; } // get the default encoding for the given language - if it fails for any // reason return "" text_t receptionist::get_default_encoding (const text_t &language) { // make sure language is valid if (configinfo.languages.find(language) == configinfo.languages.end()) return ""; text_t default_encoding = configinfo.languages[language].defaultencoding; // make sure the encoding is valid if (converters.find(default_encoding) == converters.end()) { // we don't support the encoding specified as default for this language if (configinfo.encodings.size()==1) { // only 1 encoding specified in main.cfg, so use it return configinfo.encodings.begin()->second; } return ""; } return default_encoding; } // parse_cgi_args parses cgi arguments into an argument class. // This function should be called for each page request. It returns false // if there was a major problem with the cgi arguments. bool receptionist::parse_cgi_args (const text_t &argstr, fileupload_tmap &fileuploads, cgiargsclass &args, ostream &logout, text_tmap &fcgienv) { // get an initial list of cgi arguments args.clear(); split_cgi_args (argsinfo, argstr, args); // expand the compressed argument (if there was one) if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false; // add the defaults add_default_args (argsinfo, args, logout); // add any file upload arguments add_fileupload_args(argsinfo, args, fileuploads, logout); // get the cookie if (configinfo.usecookies) get_cookie(args["z"], fcgienv); // if we're changing languages, set the encoding to the default for the new language if (args["nl"] == "1") { args["nw"] = get_default_encoding(args["l"]); } // get the input encoding // if encoding isn't set, set it to the default for the current language if ((args.getarg("w") == NULL) || args["w"].empty()) { args["w"] = get_default_encoding(args["l"]); } text_t &arg_w = args["w"]; inconvertclass defaultinconvert; inconvertclass *inconvert = converters.get_inconverter (arg_w); if (inconvert == NULL) inconvert = &defaultinconvert; // see if the next page will have a different encoding if (args.getarg("nw") != NULL) arg_w = args["nw"]; // convert arguments which aren't in unicode to unicode args_tounicode (args, *inconvert); // decide on the output conversion class (needed for checking the external // cgi arguments) rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); // check the main cgi arguments if (!check_mainargs (args, logout)) return false; // check the arguments for the action action *a = actions.getaction (args["a"]); if (a != NULL) { if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false; } else { // the action was not found!! outconvertclass text_t2ascii; logout << text_t2ascii << "Error: the action \"" << args["a"] << "\" could not be found.\n"; return false; } // check external cgi arguments for each action actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) { if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter, configinfo.saveconf, logout)) return false; } ++actionhere; } // the action might have changed but we will assume that // the cgiargs were checked properly when the change was made return true; } // Returns true if cookie already existed, false if it was generated bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv) { // See if we can get the GSDL_UID cookie text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv); if (!cookiestring.empty()) // This should really be handled by the findword function... { // Check if the cookie contains GSDL_UID text_t gsdl_uid = "GSDL_UID="; text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid); if (gsdl_uid_start != cookiestring.end()) { // Yes, so extract its value cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';')); return true; } } // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496 cookie.clear(); text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv); time_t ttime = time(NULL); if (!host.empty()) { cookie += host; cookie.push_back ('-'); } cookie += text_t(ttime); return false; } // Same as above but just tests if cookie exists bool receptionist::get_cookie (text_tmap &fcgienv) { text_t cookie_jar = ""; return get_cookie(cookie_jar, fcgienv); } bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) { // see if we want to log the cgi arguments if (!configinfo.logcgiargs) return true; text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv); text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv); if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv); text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv); cgiargsclass::const_iterator args_here = args.begin(); cgiargsclass::const_iterator args_end = args.end(); text_t argstr; bool first = true; while (args_here != args_end) { if (!first) argstr += ", "; argstr += (*args_here).first + "=" + (*args_here).second.value; first = false; ++args_here; } text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt"); text_t logstr = script_name; logstr += " " + host; logstr += " ["; if (configinfo.LogDateFormat == UTCTime) { logstr += get_date (false); } else if (configinfo.LogDateFormat == Absolute) { time_t ttime = time(NULL); logstr += ttime; } else { // LocalTime logstr += get_date (true); } logstr += "] (" + argstr + ") \""; logstr += browser; logstr += "\"\n"; return append_logstr (logfile, logstr, logout); } bool receptionist::append_logstr (const text_t &filename, const text_t &logstr, ostream &logout) { char *lfile = filename.getcstr(); int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777); //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777); if (fd == -1) { logout << "Error: Couldn't open file " << lfile << "\n"; delete []lfile; return false; } // lock_val is set to 0 if file is locked successfully int lock_val = 1; GSDL_LOCK_FILE (fd); if (lock_val == 0) { // Write the string out in UTF-8 text_t tmp_log_str_utf8 = to_utf8(logstr); char *buffer = tmp_log_str_utf8.getcstr(); size_t num_chars = tmp_log_str_utf8.size(); write(fd, buffer, num_chars); GSDL_UNLOCK_FILE (fd); delete []buffer; } else { logout << "Error: Couldn't lock file " << lfile << "\n"; close(fd); delete []lfile; return false; } close(fd); delete []lfile; return true; } text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args, ostream &logout) { text_t outstring; outconvertclass text_t2ascii; action *a = actions.getaction (args["a"]); if (a != NULL) { prepare_page (a, args, text_t2ascii, logout); } disp.expandstring (displayclass::defaultpackage, astring, outstring); return outstring; } // produce_cgi_page will call get_cgihead_info and // produce_content in the appropriate way to output a cgi header and // the page content (if needed). If a page could not be created it // will return false bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout, ostream &logout, text_tmap &fcgienv) { outconvertclass text_t2ascii; response_t response; text_t response_data; // produce cgi header get_cgihead_info (args, response, response_data, logout, fcgienv); if (response == location) { // location response (url may contain macros!!) response_data = expandmacros (response_data, args, logout); contentout << text_t2ascii << "Location: " << response_data << "\n\n"; contentout << flush; return true; } else if (response == content) { // content response #ifdef GSDL_NOCACHE contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past tm *tm_ptr = NULL; time_t t = time(NULL); tm_ptr = gmtime (&t); if (tm_ptr != NULL) { char *timestr = new char[128]; strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr); contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified delete []timestr; } contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1 contentout << "Pragma: no-cache\n"; // HTTP/1.0 #else // use the later of build.cfg and collect.cfg modification times // as the Last-Modified: header, for caching values struct stat file_info; time_t latest=0; text_t collectname=""; collectname=args["c"]; if (collectname != "") { text_t collecthome; if (!configinfo.collecthome.empty()) { collecthome = configinfo.collecthome; } else { collecthome=filename_cat(configinfo.gsdlhome,"collect"); } text_t collectdir=filename_cat(collecthome,collectname); text_t buildcfg=filename_cat(collectdir,"index"); buildcfg=filename_cat(buildcfg,"build.cfg"); char *buildcfg_ptr=buildcfg.getcstr(); text_t collectcfg=filename_cat(collectdir,"etc"); collectcfg=filename_cat(collectcfg,"collect.cfg"); char *collectcfg_ptr=collectcfg.getcstr(); if (stat(buildcfg_ptr, &file_info)) { // we got an error. Currently don't handle error :( // logout << } else { latest=file_info.st_mtime; } if (stat(collectcfg_ptr, &file_info)) { // error - unhandled for now } else { if (latest0) { // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format // c library takes care of mem for this string... (has \n at end!!!!) // latest is currently local time, convert to UTC. struct tm* utc_latest; utc_latest=gmtime(&latest); contentout << "Last-Modified: " << asctime(utc_latest); } } // end of collection != "" #endif contentout << text_t2ascii << "Content-type: " << response_data << "\n\n"; } else if (response == undecided_location) { // Wait until later to output the target location // Used for the "I'm feeling lucky" functionality } else { // unknown response logout << "Error: get_cgihead_info returned an unknown response type.\n"; return false; } // produce cgi page if (!produce_content (args, contentout, logout)) return false; // flush contentout contentout << flush; return true; } // get_cgihead_info determines the cgi header information for // a set of cgi arguments. If response contains location then // response_data contains the redirect address. If reponse // contains content then reponse_data contains the content-type. // Note that images can now be produced by the receptionist. // Note also, alternative for get_cgihead_info below which // stores the information in a text_tmap so it is more easily digested void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response, text_t &response_data, ostream &logout, text_tmap &fcgienv) { outconvertclass text_t2ascii; // get the action action *a = actions.getaction (args["a"]); if (a != NULL) { a->get_cgihead_info (args, &protocols, response, response_data, logout); } else { // the action was not found!! logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \"" << args["a"] << "\" could not be found.\n"; response = content; response_data = "text/html"; } // add the encoding information if (response == content) { if (converters.find(args["w"]) != converters.end()) { response_data += "; charset=" + args["w"]; } else { // default to latin 1 response_data += "; charset=ISO-8859-1"; } // add cookie if required if (configinfo.usecookies && !get_cookie(fcgienv)) response_data += "\nSet-Cookie: GSDL_UID=" + args["z"] + "; expires=Fri, 25-Dec-2037 00:00:00 GMT"; } } // Alternative version of get_cgihead_info, stores fielded infomation // in text_tmap rather than concatenated string void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers, ostream &logout, text_tmap &fcgienv) { response_t response; text_t response_data; // get the action action *a = actions.getaction (args["a"]); if (a != NULL) { a->get_cgihead_info (args, &protocols, response, response_data, logout); } else { // the action was not found!! outconvertclass text_t2ascii; logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \"" << args["a"] << "\" could not be found.\n"; response = content; response_data = "text/html"; } if (response == location) { response_data = expandmacros(response_data, args, logout); headers["Location"] = response_data; return; } // add the encoding information if (response == content) { if (converters.find(args["w"]) != converters.end()) { headers["content-encoding"] = args["w"]; response_data += "; charset=" + args["w"]; } else { // default to utf-8 headers["content-encoding"] = "utf-8"; response_data += "; charset=utf-8"; } headers["content-type"] = response_data; } } // produce the page content bool receptionist::produce_content (cgiargsclass &args, ostream &contentout, ostream &logout) { // decide on the output conversion class text_t &arg_w = args["w"]; rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781) if (arg_w=="utf-16be") { contentout << '\xfe' << '\xff' ; } recptproto *collectproto = protocols.getrecptproto (args["c"], logout); if (collectproto != NULL) { // get browsers to process OID text_t OID = args["d"]; if (OID.empty()) OID = args["cl"]; if (!OID.empty()) { text_tset metadata; text_tarray OIDs; OIDs.push_back (OID); if (!is_top(OID)) OIDs.push_back (OID + ".pr"); FilterResponse_t response; metadata.insert ("childtype"); if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) { text_t classifytype; if (!response.docInfo[0].metadata["childtype"].values[0].empty()) classifytype = response.docInfo[0].metadata["childtype"].values[0]; else if (!is_top (OID)) { // not sure why this is occasionally not set, but it will // cause a segfault... possibly if built with no_text? jrm21 if (response.docInfo[1].metadata.find("childtype") == response.docInfo[1].metadata.end()) { cerr << "receptionist: no childtype element in metadata map!" << endl; } else { if (!response.docInfo[1].metadata["childtype"].values[0].empty()) classifytype = response.docInfo[1].metadata["childtype"].values[0]; } } browserclass *b = browsers.getbrowser (classifytype); b->processOID (args, collectproto, logout); } } // translate "d" and "cl" arguments if required translate_OIDs (args, collectproto, logout); } // produce the page using the desired action action *a = actions.getaction (args["a"]); if (a != NULL) { if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout); if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout)) return false; } else { // the action was not found!! outconvertclass text_t2ascii; logout << text_t2ascii << "Error receptionist::produce_content: the action \"" << args["a"] << "\" could not be found.\n"; contentout << (*outconverter) << "\n" << "\n" << "Error\n" << "\n" << "\n" << "

Oops!

\n" << "Undefined Page. The action \"" << args["a"] << "\" could not be found.\n" << "\n" << "\n"; } return true; } // returns the compressed argument ("e") corresponding to the argument // list. This can be used to save preferences between sessions. text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) { // decide on the output conversion class text_t &arg_w = args["w"]; rzwsoutconvertclass defaultoutconverter; rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w); if (outconverter == NULL) outconverter = &defaultoutconverter; outconverter->reset(); text_t compressed_args; if (compress_save_args (argsinfo, configinfo.saveconf, args, compressed_args, *outconverter, logout)) return compressed_args; return g_EmptyText; } // will read in all the macro files. If one is not found an // error message will be written to logout and the method will // return false. bool receptionist::read_macrofiles (ostream &logout) { outconvertclass text_t2ascii; // redirect the error output to logout ostream *savedlogout = disp.setlogout (&logout); // unload any macros that were previously loaded - this allows us to call // this function a second time to reload all the macro files (useful for // reading in changed macro files in server versions of greenstone) disp.unloaddefaultmacros(); // load up the default macro files, the collection directory // is searched first for the file (if this is being used in // collection specific mode) and then the main directory(s) text_t colmacrodir = filename_cat (configinfo.collectdir, "macros"); text_tset maindirs; text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros"); maindirs.insert (gsdlmacrodir); colinfo_tmap::iterator colhere = configinfo.collectinfo.begin(); colinfo_tmap::iterator colend = configinfo.collectinfo.end(); while (colhere != colend) { if (!((*colhere).second.gsdl_gsdlhome).empty()) { gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros"); maindirs.insert (gsdlmacrodir); } ++colhere; } text_tset::iterator arrhere = configinfo.macrofiles.begin(); text_tset::iterator arrend = configinfo.macrofiles.end(); text_t filename; while (arrhere != arrend) { bool foundfile = false; // try in the collection directory if this is being // run in collection specific mode if (!configinfo.collection.empty()) { filename = filename_cat (colmacrodir, *arrhere); if (file_exists (filename)) { disp.loaddefaultmacros(filename); foundfile = true; } } // if we haven't found the macro file yet try in // the main macro directory(s) // if file is found in more than one main directory // we'll load all copies if (!foundfile) { text_tset::const_iterator dirhere = maindirs.begin(); text_tset::const_iterator dirend = maindirs.end(); while (dirhere != dirend) { filename = filename_cat (*dirhere, *arrhere); if (file_exists (filename)) { disp.loaddefaultmacros(filename); foundfile = true; } ++dirhere; } } // see if we found the file or not if (!foundfile) { logout << text_t2ascii << "Error: the macro file \"" << *arrhere << "\" could not be found.\n"; if (configinfo.collection.empty()) { text_t dirs; joinchar (maindirs, ", ", dirs); logout << text_t2ascii << "It should be in either of the following directories (" << dirs << ").\n\n"; } else { logout << text_t2ascii << "It should be in either " << colmacrodir << " or in " << gsdlmacrodir << ".\n\n"; } // don't crap out if a macro file is missing //disp.setlogout (savedlogout); //return false; } ++arrhere; } // success // reset logout to what it was disp.setlogout (savedlogout); return true; } // Go through the list of macro files looking to see // if any exist in the collectoin specific area. If they // do then read them in and add them to the set of existing // current macros void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout) { outconvertclass text_t2ascii; // disp.unloadcollectionmacros(); // redirect the error output to logout ostream *savedlogout = disp.setlogout (&logout); text_t colmacrodir = filename_cat (configinfo.collecthome,collection, "macros"); if (directory_exists (colmacrodir)) { text_tset::iterator arrhere = configinfo.macrofiles.begin(); text_tset::iterator arrend = configinfo.macrofiles.end(); text_t filename; while (arrhere != arrend) { filename = filename_cat (colmacrodir, *arrhere); if (file_exists (filename)) { disp.loadcollectionmacros(filename); } ++arrhere; } } // reset logout to what it was disp.setlogout (savedlogout); } // check_mainargs will check all the main arguments. If a major // error is found it will return false and no cgi page should // be created using the arguments. bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) { if(configinfo.site_auth) { args["uan"] = "1"; args["ug"] = configinfo.site_group; } // if this receptionist is running in collection dependant mode // then it should always set the collection argument to the // collection if (!configinfo.collection.empty()) args["c"] = configinfo.collection; // if current collection uses ccscols make sure // "ccs" argument is set and make "cc" default to // all collections in "ccs" if (args["a"] != "config" && !args["c"].empty()) { text_t &arg_c = args["c"]; recptproto *collectproto = protocols.getrecptproto (arg_c, logout); if (collectproto == NULL) { // oops, this collection isn't valid outconvertclass text_t2ascii; logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n"; // args["c"].clear(); } else { ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout); if(cinfo->authenticate == "collection") { args["uan"] = "1"; args["ug"] = cinfo->auth_group; } if (cinfo != NULL) { if (!cinfo->ccsCols.empty()) { args["ccs"] = 1; if (args["cc"].empty()) { text_tarray::const_iterator col_here = cinfo->ccsCols.begin(); text_tarray::const_iterator col_end = cinfo->ccsCols.end(); bool first = true; while (col_here != col_end) { // make sure it's a valid collection if (protocols.getrecptproto (*col_here, logout) != NULL) { if (!first) args["cc"].push_back (','); args["cc"] += *col_here; first = false; } ++col_here; } } } } else { logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n"; } } } // argument "v" can only be 0 or 1. Use the default value // if it is out of range int arg_v = args.getintarg ("v"); if (arg_v != 0 && arg_v != 1) { cgiarginfo *vinfo = argsinfo.getarginfo ("v"); if (vinfo != NULL) args["v"] = vinfo->argdefault; } // argument "f" can only be 0 or 1. Use the default value // if it is out of range int arg_f = args.getintarg ("f"); if (arg_f != 0 && arg_f != 1) { cgiarginfo *finfo = argsinfo.getarginfo ("f"); if (finfo != NULL) args["f"] = finfo->argdefault; } return true; } // translate_OIDs translates the "d" and "cl" arguments to their correct values // if they use the tricky ".fc", ".lc" type syntax. void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto, ostream &logout) { FilterResponse_t response; FilterRequest_t request; comerror_t err; text_t &arg_d = args["d"]; text_t &arg_cl = args["cl"]; text_t &collection = args["c"]; // do a call to translate OIDs if required request.filterName = "NullFilter"; request.filterResultOptions = FROID; if (!arg_d.empty() && needs_translating (arg_d)) { request.docSet.push_back (arg_d); collectproto->filter (collection, request, response, err, logout); arg_d = response.docInfo[0].OID; request.clear(); } // we'll also check here that the "cl" argument has a "classify" doctype // (in case ".fc" or ".lc" have screwed up) if (needs_translating (arg_cl)) { request.fields.insert ("doctype"); request.docSet.push_back (arg_cl); request.filterResultOptions = FRmetadata; collectproto->filter (collection, request, response, err, logout); // set to original value (without .xx stuff) if doctype isn't "classify" if (response.docInfo[0].metadata["doctype"].values[0] != "classify") strip_suffix (arg_cl); else arg_cl = response.docInfo[0].OID; } } // prepare_page sets up page parameters, sets display macros // and opens the page ready for output void receptionist::prepare_page (action *a, cgiargsclass &args, outconvertclass &outconvert, ostream &logout) { // set up page parameters text_t pageparams; bool first = true; text_tmap::iterator params_here = configinfo.pageparams.begin(); text_tmap::iterator params_end = configinfo.pageparams.end(); while (params_here != params_end) { // page params are those from main.cfg (eg pageparam v 0) plus // two defaults set in recptconf.clear() (c="" and l=en) // This used to check if the current value of the page param // == the default value, then don't add in it the list // but if l=en, and there is a macro with [l=en], then it doesn't // find it. // so now all page params will go into the list. I assume this will // mean more attempts to find each macro, but nothing worsee than // that. --kjdon //if (args[(*params_here).first] != (*params_here).second) { if (first) first = false; else pageparams += ","; pageparams += (*params_here).first; pageparams += "="; pageparams += args[(*params_here).first]; // } ++params_here; } // open the page disp.openpage(pageparams, configinfo.macroprecedence); disp.unloadcollectionmacros(); text_t collection = args["c"]; if (!collection.empty()) { read_collection_macrofiles(collection,logout); } // define external macros for each action actionptrmap::iterator actionhere = actions.begin (); actionptrmap::iterator actionend = actions.end (); while (actionhere != actionend) { assert ((*actionhere).second.a != NULL); if ((*actionhere).second.a != NULL) { (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout); } ++actionhere; } // define internal macros for the current action a->define_internal_macros (disp, args, &protocols, logout); // define general macros. the defining of general macros is done here so that // the last possible version of the cgi arguments are used define_general_macros (args, outconvert, logout); } void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/, ostream &logout) { text_t &collection = args["c"]; disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome)); disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi); disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb); disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix); // This perhaps should be done with gsdl_getenv() which takes the // 'fcgienv' parameter (for fast-cgi), however if changed to this, this // additional parameter would need to be passed into here (not sure how // that would effect any virtual inheritence going on), or else moved // higher up the calling to chain to, e.g., produce_cgi_page() char* remote_addr = getenv("REMOTE_ADDR"); if (remote_addr != NULL) { text_t remote_addr_t(remote_addr); disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t); } char* remote_host = getenv("REMOTE_HOST"); if (remote_host != NULL) { text_t remote_host_t(remote_host); disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t); } else { // setting this to "unknown" is easier to deal with in format/macro // statements, rather than testing for _remoteHost_ disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown"); } text_t compressedoptions = get_compressed_arg(args, logout); disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions)); // need a decoded version of compressedoptions for use within forms // as browsers encode values from forms before sending to server // (e.g. %25 becomes %2525) decode_cgi_arg (compressedoptions); if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode. // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings? compressedoptions = to_uni(compressedoptions); } text_t dmacrovalue = dm_safe(compressedoptions); disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dmacrovalue); disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(dmacrovalue)); #if defined (__WIN32__) disp.setmacro ("win32", displayclass::defaultpackage, "1"); #endif // set _cgiargX_ macros for each cgi argument cgiargsclass::const_iterator argshere = args.begin(); cgiargsclass::const_iterator argsend = args.end(); while (argshere != argsend) { text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp") if (((*argshere).first == "q") || ((*argshere).first == "qa") || ((*argshere).first == "qtt") || ((*argshere).first == "qty") || ((*argshere).first == "qp") || ((*argshere).first == "qpl") || ((*argshere).first == "qr") || ((*argshere).first == "q2")) { // need to escape special characters from query string macrovalue = html_safe(macrovalue); } else if ((*argshere).first == "hp") { if(!isValidURLProtocol(macrovalue)) { macrovalue = encodeForURL(macrovalue); // URL has invalid protocol like javascript:, so URL encode it } } else { macrovalue = dm_safe(macrovalue); } // set the default value for the macro disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue); // set macros for the encoded versions of the same value. Uses the functions in securitytools.h // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet text_t htmlsafe = encodeForHTML(macrovalue); text_t attrsafe = encodeForHTMLAttr(macrovalue); text_t urlsafe = encodeForURL(macrovalue); text_t jssafe = encodeForJavascript(macrovalue); // with default setting will return \\x and \\u for macro files text_t csssafe = encodeForCSS(macrovalue); // not yet used anywhere, but is available for use in macros text_t sqlsafe = encodeForSQL(macrovalue); disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe); disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe); disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe); disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe); disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe); disp.setmacro ("cgiarg" + (*argshere).first + "Sqlsafe", displayclass::defaultpackage, sqlsafe); ++argshere; } // set collection specific macros if (!collection.empty()) { recptproto *collectproto = protocols.getrecptproto (collection, logout); if (collectproto != NULL) { FilterResponse_t response; text_tset metadata; get_info ("collection", collection, args["l"], metadata, false, collectproto, response, logout); if (!response.docInfo[0].metadata.empty()) { MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin(); MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end(); while (here != end) { if (((*here).first != "haschildren") && ((*here).first != "hasnext") && ((*here).first != "hasprevious")) { // check for args in form name:lang text_t name = g_EmptyText; text_t lang = g_EmptyText; bool colonfound=false; text_t::const_iterator a = (*here).first.begin(); text_t::const_iterator b = (*here).first.end(); while (a !=b) { if (*a==':') { colonfound=true; } else { if (colonfound) lang.push_back(*a); else name.push_back(*a); } ++a; } if (!lang.empty()) { if (args["l"]==lang) { disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]); } } else { // the default one disp.setcollectionmacro(displayclass::defaultpackage, (*here).first, "", (*here).second.values[0]); } } ++here; } } text_t iconcollection; disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection); if (!iconcollection.empty()) { ColInfoResponse_t cinfo; comerror_t err; collectproto->get_collectinfo (collection, cinfo, err, logout); if (iconcollection[0]=='/' && !cinfo.httpdomain.empty()) { // local but with full path iconcollection = "http://" + cinfo.httpdomain + iconcollection; disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection); } } } } if (!collection.empty()) { ColInfoResponse_t cinfo; comerror_t err; recptproto *collectproto = protocols.getrecptproto (collection, logout); if (collectproto != NULL) { collectproto->get_collectinfo (collection, cinfo, err, logout); // This part of the code used to use "cinfo.httpprefix" regardless // of the value it contained. Since // this can come back with an empty (in the case of gsdl_mod), the // URL produced was invalid. // // Changed to test for empty first, and use configinfo.httpprefix as // a "backup" // // Point to consider: since configinfo.httpprefix has been offically // set as "httpprefix" in macros, it seems to make more sense to use // always use that version and not the cinfo version at all. text_t httpprefix = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix; text_t httpcollection; if (!cinfo.httpdomain.empty()) httpcollection = "http://"; httpcollection += cinfo.httpdomain + httpprefix + "/collect/" + collection; disp.setmacro ("httpcollection", displayclass::defaultpackage, httpcollection); // as of gsdl 2.53, collect.cfg can specify macros if (cinfo.collection_macros.size() > 0) { collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin(); collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end(); while (this_macro != done_macro) { text_t package = "Global"; text_t macroname = this_macro->first; // if this macro name is AAA:bbb then extract the package name text_t::const_iterator thischar, donechar; thischar = macroname.begin(); donechar = macroname.end(); while (thischar < donechar) { if (*thischar == ':') { package = substr(macroname.begin(),thischar); macroname = substr(thischar+1,donechar); break; } ++thischar; } text_tmap params_map = this_macro->second; text_tmap::const_iterator this_param = params_map.begin(); text_tmap::const_iterator done_param = params_map.end(); while (this_param != done_param) { disp.setcollectionmacro(package, macroname, this_param->first, this_param->second); ++this_param; } ++this_macro; } } // col macros } // collectproto != NULL } } // gets collection info from cache if found or // calls collection server (and updates cache) // returns NULL if there's an error ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto, const text_t &collection, ostream &logout) { // check the cache colinfo_tmap::iterator it = configinfo.collectinfo.find (collection); if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) { // found it return &((*it).second.info); } // not cached, get info from collection server if (collectproto == NULL) { logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n"; return NULL; } comerror_t err; if (it == configinfo.collectinfo.end()) { collectioninfo_t cinfo; collectproto->get_collectinfo (collection, cinfo.info, err, logout); if (err != noError) { outconvertclass text_t2ascii; logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \"" << get_comerror_string (err) << "\"while getting collectinfo\n"; return NULL; } cinfo.info_loaded = true; configinfo.collectinfo[collection] = cinfo; return &(configinfo.collectinfo[collection].info); } else { collectproto->get_collectinfo (collection, (*it).second.info, err, logout); if (err != noError) { outconvertclass text_t2ascii; logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \"" << get_comerror_string (err) << "\"while getting collectinfo\n"; return NULL; } (*it).second.info_loaded = true; return &((*it).second.info); } } // removes a collection from the cache so that the next // call to get_collectinfo_ptr() for that collection will // retrieve the collection info from the collection server void receptionist::uncache_collection (const text_t &collection) { colinfo_tmap::iterator it = configinfo.collectinfo.find (collection); if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) { (*it).second.info_loaded = false; } } // Handles an "Encoding" line from a configuration file - note that the // configinfo.encodings map is a bit of a hack (to be fixed when the // configuration files are tidied up). void receptionist::configure_encoding (const text_tarray &cfgline) { text_t subkey, subvalue, shortname, longname, mapfile; int multibyte = 0; text_t::const_iterator cfglinesub_here; text_tarray::const_iterator cfgline_here = cfgline.begin(); text_tarray::const_iterator cfgline_end = cfgline.end(); while (cfgline_here != cfgline_end) { if (*cfgline_here == "multibyte") { multibyte = 1; } else { cfglinesub_here = getdelimitstr((*cfgline_here).begin(), (*cfgline_here).end(), '=', subkey); if (subkey == "shortname") { shortname = substr (cfglinesub_here, (*cfgline_here).end()); } else if (subkey == "longname") { longname = substr (cfglinesub_here, (*cfgline_here).end()); } else if (subkey == "map") { mapfile = substr (cfglinesub_here, (*cfgline_here).end()); } } ++cfgline_here; } if (!shortname.empty()) { if (longname.empty()) longname = shortname; // add the converter if (shortname == "utf-8") { utf8inconvertclass *utf8inconvert = new utf8inconvertclass(); utf8outconvertclass *utf8outconvert = new utf8outconvertclass(); utf8outconvert->set_rzws(1); add_converter (shortname, utf8inconvert, utf8outconvert); configinfo.encodings[longname] = shortname; } else if (shortname == "utf-16be") { // we use the default input converter as this shouldn't ever be used // for converting from unicode... inconvertclass *inconverter = new inconvertclass(); utf16outconvertclass *outconverter = new utf16outconvertclass(); add_converter (shortname, inconverter, outconverter); configinfo.encodings[longname] = shortname; } else if (!mapfile.empty()) { if (mapfile == "8859_1.ump") { // iso-8859-1 is a special case as it'll always be supported by the // standard converter class and therefore doesn't need to use its // mapping file inconvertclass *inconvert = new inconvertclass(); rzwsoutconvertclass *outconvert = new rzwsoutconvertclass(); outconvert->set_rzws(1); add_converter (shortname, inconvert, outconvert); configinfo.encodings[longname] = shortname; } else { text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile); text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile); if (file_exists(to_uc_map) && file_exists(from_uc_map)) { mapinconvertclass *mapinconvert = new mapinconvertclass(); mapinconvert->setmapfile (to_uc_map, 0x003F); mapinconvert->set_multibyte (multibyte); mapoutconvertclass *mapoutconvert = new mapoutconvertclass(); mapoutconvert->setmapfile (from_uc_map, 0x3F); mapoutconvert->set_multibyte (multibyte); mapoutconvert->set_rzws(1); add_converter (shortname, mapinconvert, mapoutconvert); configinfo.encodings[longname] = shortname; } } } } }