/********************************************************************** * * cgiwrapper.cpp -- output pages using the cgi protocol * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include #include #ifdef __WIN32__ #include #endif #include "gsdlconf.h" #include "cgiwrapper.h" #include "gsdlsitecfg.h" #include "maincfg.h" #include "fileutil.h" #include "cgiutils.h" #include #include #if defined(GSDL_USE_OBJECTSPACE) # include # include #elif defined(GSDL_USE_IOS_H) # include # include #else # include # include #endif #ifdef USE_FASTCGI #include "fcgiapp.h" #endif #include "authenaction.h" #include "browseaction.h" #include "collectoraction.h" #include "depositoraction.h" #include "documentaction.h" #include "dynamicclassifieraction.h" #include "extlinkaction.h" #include "pageaction.h" #ifdef ENABLE_MGPP #include "phindaction.h" #endif #include "pingaction.h" #include "queryaction.h" #if defined(USE_SQLITE) #include "sqlqueryaction.h" #endif #include "tipaction.h" #include "statusaction.h" #include "usersaction.h" #include "configaction.h" #include "vlistbrowserclass.h" #include "hlistbrowserclass.h" #include "datelistbrowserclass.h" #include "invbrowserclass.h" #include "pagedbrowserclass.h" #include "htmlbrowserclass.h" #include "phindbrowserclass.h" #ifdef USE_FASTCGI // used to output the text from receptionist class fcgistreambuf : public streambuf { public: fcgistreambuf (); int sync (); int overflow (int ch); int underflow () {return EOF;} void fcgisbreset() {fcgx_stream = NULL; other_ostream = NULL;}; void set_fcgx_stream(FCGX_Stream *newone) {fcgx_stream=newone;}; void set_other_ostream(ostream *newone) {other_ostream=newone;}; private: FCGX_Stream *fcgx_stream; ostream *other_ostream; }; fcgistreambuf::fcgistreambuf() { fcgisbreset(); if (base() == ebuf()) allocate(); setp (base(), ebuf()); }; int fcgistreambuf::sync () { if ((fcgx_stream != NULL) && (FCGX_PutStr (pbase(), out_waiting(), fcgx_stream) < 0)) { fcgx_stream = NULL; } if (other_ostream != NULL) { char *thepbase=pbase(); for (int i=0;i 0), and are in the collection's\n" << " index directory (i.e. NOT the building directory)\n\n"; recptprotolistclass *protos = recpt.get_recptprotolist_ptr(); recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); bool is_z3950 = false; bool found_valid_col = false; while (rprotolist_here != rprotolist_end) { comerror_t err; if ((*rprotolist_here).p == NULL) continue; else if (is_z3950==false && (*rprotolist_here).p->get_protocol_name(err) == "z3950proto") { cout << "\nZ39.50 Servers: (always public)\n" << "---------------\n"; is_z3950=true; } text_tarray collist; (*rprotolist_here).p->get_collection_list (collist, err, cerr); if (err == noError) { text_tarray::iterator collist_here = collist.begin(); text_tarray::iterator collist_end = collist.end(); while (collist_here != collist_end) { cout << text_t2ascii << *collist_here; int spaces = (22 - (*collist_here).size()); if (spaces < 2) spaces = 2; text_t outspaces; for (int i = 0; i < spaces; ++i) outspaces.push_back (' '); cout << text_t2ascii << outspaces; ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr); if (cinfo != NULL) { if (cinfo->isPublic) cout << "public "; else cout << "private"; if (cinfo->buildDate > 0) { cout << " running "; found_valid_col = true; } else { cout << " not running"; } } cout << "\n"; ++collist_here; } } is_z3950=false; ++rprotolist_here; } // end of while loop if (!found_valid_col) { cout << "WARNING: No \"running\" collections were found. You need to\n"; cout << " build one of the above collections\n"; } cout << "\n------------------------------------------------------------\n"; cout << "------------------------------------------------------------\n\n"; cout << "receptionist running in command line debug mode\n"; cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n"; } void add_all_actions(receptionist& recpt, userdbclass* udb, keydbclass* kdb) { // the list of actions. #ifdef GSDL_USE_TIP_ACTION tipaction* atipaction = new tipaction(); recpt.add_action (atipaction); #endif #ifdef GSDL_USE_STATUS_ACTION statusaction *astatusaction = new statusaction(); astatusaction->set_receptionist (&recpt); recpt.add_action (astatusaction); #endif pageaction *apageaction = new pageaction(); apageaction->set_receptionist (&recpt); recpt.add_action (apageaction); #ifdef GSDL_USE_PING_ACTION recpt.add_action (new pingaction()); #endif queryaction *aqueryaction = new queryaction(); aqueryaction->set_receptionist (&recpt); recpt.add_action (aqueryaction); #if defined(USE_SQLITE) sqlqueryaction *asqlqueryaction = new sqlqueryaction(); asqlqueryaction->set_receptionist (&recpt); recpt.add_action (asqlqueryaction); #endif documentaction *adocumentaction = new documentaction(); adocumentaction->set_receptionist (&recpt); recpt.add_action (adocumentaction); #ifdef GSDL_USE_USERS_ACTION usersaction *ausersaction = new usersaction(); ausersaction->set_userdb(udb); recpt.add_action (ausersaction); #endif #ifdef GSDL_USE_EXTLINK_ACTION extlinkaction *aextlinkaction = new extlinkaction(); aextlinkaction->set_receptionist(&recpt); recpt.add_action (aextlinkaction); #endif #ifdef GSDL_USE_AUTHEN_ACTION authenaction *aauthenaction = new authenaction(); aauthenaction->set_userdb(udb); aauthenaction->set_keydb(kdb); aauthenaction->set_receptionist(&recpt); recpt.add_action (aauthenaction); #endif #ifdef GSDL_USE_COLLECTOR_ACTION collectoraction *acollectoraction = new collectoraction(); acollectoraction->set_receptionist (&recpt); recpt.add_action(acollectoraction); #endif #ifdef GSDL_USE_DEPOSITOR_ACTION depositoraction *adepositoraction = new depositoraction(); adepositoraction->set_receptionist (&recpt); recpt.add_action(adepositoraction); #endif #ifdef GSDL_USE_BROWSE_ACTION browseaction *abrowseaction = new browseaction(); abrowseaction->set_receptionist (&recpt); recpt.add_action(abrowseaction); #endif #ifdef GSDL_USE_PHIND_ACTION // Phind uses MPPP,do we also need to check if ENABLE_MGPP is set?? phindaction *aphindaction = new phindaction(); recpt.add_action(aphindaction); #endif #ifdef GSDL_USE_GTI_ACTION gtiaction *agtiaction = new gtiaction(); agtiaction->set_receptionist(&recpt); recpt.add_action(agtiaction); #endif dynamicclassifieraction *adynamicclassifieraction = new dynamicclassifieraction(); adynamicclassifieraction->set_receptionist(&recpt); recpt.add_action(adynamicclassifieraction); #if defined(USE_MYSQL) || defined(USE_ACCESS) orderaction *aorderaction = new orderaction(); aorderaction->set_receptionist(&recpt); recpt.add_action(aorderaction); #endif // action that allows collections to be added, released etc. when server // is persistent (e.g. fastcgi or when Greenstone is configured as an // Apache module). Presumably this includes Windows server.exe as well // Want to always include it in list of actions even if compiling // Greenstone to be used in a non-persistent way (e.g. library.cgi). // This is so the e-variable that is formed is consistent between the // persisent executable and the non-persistent executable // configaction *aconfigaction = new configaction(); aconfigaction->set_receptionist(&recpt); recpt.add_action(aconfigaction); } void add_all_browsers(receptionist& recpt) { // list of browsers vlistbrowserclass *avlistbrowserclass = new vlistbrowserclass(); avlistbrowserclass->set_receptionist(&recpt); recpt.add_browser (avlistbrowserclass); recpt.setdefaultbrowser ("VList"); hlistbrowserclass *ahlistbrowserclass = new hlistbrowserclass(); ahlistbrowserclass->set_receptionist(&recpt); recpt.add_browser (ahlistbrowserclass); #ifdef GSDL_USE_DATELIST_BROWSER datelistbrowserclass *adatelistbrowserclass = new datelistbrowserclass(); recpt.add_browser (adatelistbrowserclass); #endif invbrowserclass *ainvbrowserclass = new invbrowserclass(); recpt.add_browser (ainvbrowserclass); #ifdef GSDL_USE_PAGED_BROWSER pagedbrowserclass *apagedbrowserclass = new pagedbrowserclass(); recpt.add_browser (apagedbrowserclass); #endif #ifdef GSDL_USE_HTML_BROWSER htmlbrowserclass *ahtmlbrowserclass = new htmlbrowserclass(); recpt.add_browser (ahtmlbrowserclass); #endif #ifdef GSDL_USE_PHIND_BROWSER phindbrowserclass *aphindbrowserclass = new phindbrowserclass();; recpt.add_browser (aphindbrowserclass); #endif } // cgiwrapper does everything necessary to output a page // using the cgi protocol. If this is being run for a particular // collection then "collection" should be set, otherwise it // should equal "". void cgiwrapper (receptionist &recpt, text_t collection) { int numrequests = 0; bool debug = false; const recptconf &configinfo = recpt.get_configinfo (); // find out whether this is being run as a cgi-script // or a fastcgi script #ifdef USE_FASTCGI fcgistreambuf outbuf; int isfastcgi = !FCGX_IsCGI(); FCGX_Stream *fcgiin, *fcgiout, *fcgierr; FCGX_ParamArray fcgienvp; #else int isfastcgi = 0; #endif // we need gsdlhome to do fileupload stuff, so moved this configure stuff before the get argstr stuff // init stuff - we can't output error pages directly with // fastcgi so the pages are stored until we can output them text_t errorpage; outconvertclass text_t2ascii; // set defaults int maxrequests = 10000; recpt.configure ("collection", collection); char *script_name = getenv("SCRIPT_NAME"); if (script_name != NULL) recpt.configure("gwcgi", script_name); else recpt.configure("gwcgi", "/gsdl"); // read in the configuration files. text_t gsdlhome; text_t collecthome; configurator gsdlconfigurator(&recpt); if (!site_cfg_read (gsdlconfigurator, gsdlhome, collecthome, maxrequests)) { // couldn't find the site configuration file page_errorsitecfg (errorpage, debug, 0); } else if (gsdlhome.empty()) { // no gsdlhome in gsdlsite.cfg page_errorsitecfg (errorpage, debug, 1); } else if (!directory_exists(gsdlhome)) { // gsdlhome not a valid directory page_errorsitecfg (errorpage, debug, 1); } else if (!main_cfg_read (recpt, gsdlhome, collecthome, collection)) { // couldn't find the main configuration file page_errormaincfg (gsdlhome, collection, debug, errorpage); } else if (configinfo.collectinfo.empty() && false) { // commented out for corba // don't have any collections page_errorcollect (gsdlhome, errorpage, debug); } // set up the httpweb variable if it hasn't been defined yet if (configinfo.httpweb.empty()) { recpt.configure("httpweb", configinfo.httpprefix+"/web"); } // get the query string if it is not being run as a fastcgi // script text_t argstr = g_EmptyText; fileupload_tmap fileuploads; cgiargsclass args; char *aURIStr; if (!isfastcgi) { char *request_method_str = getenv("REQUEST_METHOD"); char *content_length_str = getenv("CONTENT_LENGTH"); if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 && content_length_str != NULL) { // POST form data long content_length = (content_length_str ? atoi(content_length_str) : 0); if (content_length > 0) { #ifdef __WIN32__ // On Windows it is important that standard input be read in binary // mode, otherwise end of line "" is turned into only // which breaks the MIME standard (and our parsing code!) int result = _setmode( _fileno( stdin ), _O_BINARY ); if( result == -1 ) { cerr << "Warning: Failed to set standard input to binary mode." << endl; cerr << " Parsing of multi-part MIME will most likely fail" << endl; } #endif long length = content_length; unsigned char * buffer = new unsigned char[content_length]; int chars_read = fread(buffer,1,content_length,stdin); if (chars_read != content_length) { cerr << "Warning: mismatch between CONTENT_LENGTH and data read from standard in" << endl; } argstr.setcarr((char *)buffer, content_length); text_t content_type; char *content_type_str = getenv("CONTENT_TYPE"); if (content_type_str) content_type = content_type_str; argstr = parse_post_data(content_type, argstr, fileuploads, gsdlhome); } } else { aURIStr = getenv("QUERY_STRING"); if ((request_method_str != NULL && strcmp(request_method_str, "GET") == 0) || aURIStr != NULL) { // GET form data if (aURIStr != NULL) argstr = aURIStr; } else { // debugging from command line debug = true; } } } if (debug) { cout << "Configuring Greenstone...\n"; cout << flush; } if (errorpage.empty()) { // initialise the library software if (debug) { cout << "Initializing...\n"; cout << flush; } text_t error_file = filename_cat (gsdlhome, "etc", "error.txt"); char *eout = error_file.getcstr(); ofstream errout (eout, ios::app); delete []eout; if (!recpt.init(errout)) { // an error occurred during the initialisation errout.close(); page_errorinit(gsdlhome, debug, errorpage); } errout.close(); } if (debug && errorpage.empty()) { // get query string from command line print_debug_info (recpt); char cinURIStr[1024]; cin.get(cinURIStr, 1024); argstr = cinURIStr; } // cgi scripts only deal with one request if (!isfastcgi) maxrequests = 1; // Page-request loop. If this is not being run as a fastcgi // process then only one request will be processed and then // the process will exit. while (numrequests < maxrequests) { #ifdef USE_FASTCGI if (isfastcgi) { if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break; char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp); char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp); if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 && content_length_str != NULL) { // POST form data int content_length = text_t(content_length_str).getint(); if (content_length > 0) { argstr.clear(); int c; do { c = FCGX_GetChar (fcgiin); if (c < 0) break; argstr.push_back (c); --content_length; } while (content_length > 0); } } else { // GET form data aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp); if (aURIStr != NULL) argstr = aURIStr; else argstr = g_EmptyText; } } #endif // get output streams ready #ifdef USE_FASTCGI outbuf.fcgisbreset (); if (isfastcgi) outbuf.set_fcgx_stream (fcgiout); else outbuf.set_other_ostream (&cout); ostream pageout (&outbuf); #else #define pageout cout #endif // if using fastcgi we'll load environment into a map, // otherwise simply pass empty map (can't get environment // variables using getenv() while using FCGX versions // of fastcgi - at least I can't ;-) - Stefan) text_tmap fastcgienv; #ifdef USE_FASTCGI if (isfastcgi) { for(; *fcgienvp != NULL; ++fcgienvp) { text_t fvalue = *fcgienvp; text_t::const_iterator begin = fvalue.begin(); text_t::const_iterator end = fvalue.end(); text_t::const_iterator equals_sign = findchar (begin, end, '='); if (equals_sign != end) fastcgienv[substr(begin, equals_sign)] = substr(equals_sign+1, end); } } #endif // temporarily need to configure gwcgi here when using fastcgi as I can't // get it to pass the SCRIPT_NAME environment variable to the initial // environment (if anyone can work out how to do this using the apache // server, let me know). Note that this overrides the gwcgi field in // site.cfg (which it shouldn't do) but I can't at present set gwcgi // from site.cfg as I have old receptionists laying around that wouldn't // appreciate it. The following 5 lines of code should be deleted once // I either a: get the server to pass SCRIPT_NAME at initialization // time or b: convert all the collections using old receptionists over // to this version and uncomment gwcgi in the site.cfg file -- Stefan. #ifdef USE_FASTCGI if (isfastcgi) { recpt.configure("gwcgi", fastcgienv["SCRIPT_NAME"]); } #endif // if there has been no error so far, perform the production of the // output page if (errorpage.empty()) { text_t error_file = filename_cat (gsdlhome, "etc", "error.txt"); char *eout = error_file.getcstr(); ofstream errout (eout, ios::app); delete []eout; #if defined(__WIN32__) && defined(GSDL_USE_IOS_H) // old Windows compilers (VC++4.2) cerr = errout; #else // can't do this anymore according to c++ standard... // cerr = errout; // ... but can do this instead streambuf* errbuf = cerr.rdbuf(errout.rdbuf()); #endif // parse the cgi arguments and produce the resulting page if there // has been no errors so far if (!recpt.parse_cgi_args (argstr, fileuploads, args, errout, fastcgienv)) { errout.close (); page_errorparseargs(gsdlhome, debug, errorpage); } else { // produce the output page if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) { errout.close (); page_errorcgipage(gsdlhome, debug, errorpage); } recpt.log_cgi_args (args, errout, fastcgienv); errout.close (); } #if !defined(__WIN32__) || !defined(GSDL_USE_IOS_H) // restore the cerr buffer cerr.rdbuf(errbuf); #endif } // clean up any files that were uploaded fileupload_tmap::const_iterator this_file = fileuploads.begin(); fileupload_tmap::const_iterator end_file = fileuploads.end(); while (this_file != end_file) { if (file_exists((*this_file).second.tmp_name)) { char *thefile = (*this_file).second.tmp_name.getcstr(); unlink(thefile); delete [] thefile; } ++this_file; } // there was an error, output the error page if (!errorpage.empty()) { pageout << text_t2ascii << errorpage; errorpage.clear(); numrequests = maxrequests; // make this the last page } pageout << flush; // finish with the output streams #ifdef USE_FASTCGI if (isfastcgi) FCGX_Finish(); #endif ++numrequests; } return; }