/********************************************************************** * * mod_gsdl.cpp -- front end for apache 1.3 or 2.x module * Copyright (C) 2003 DL Consulting Ltd * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include using namespace std; #include "httpd.h" #include "http_core.h" #include "http_config.h" #include "http_protocol.h" #include "http_main.h" #include "util_script.h" #include "ap_config.h" #include "http_log.h" #if _APACHE_MOD >= 2 #include "ap_compat.h" // In addition to the backward compatible features defined in // ap_compat.h, define typedef apr_pool_t ap_pool; // map old type to new apr_* version // In Apache 2.x, ap_log_error takes extra parameter -- status code // from the previous command; define macro to automatically add in // surrogate value of 0 for this #define ap_log_error(file,line,level,server,fmt) \ ap_log_error(file,line,level,0,server,fmt) #endif #if defined(USE_MYSQL) #include "mysqlclass.h" #endif #if defined(USE_ACCESS) #include "accessclass.h" #endif #include "maincfg.h" #include "string_pool.h" #include "receptionist.h" #include "cgiwrapper.h" #include "cgiutils.h" #include "fileutil.h" #include "nullproto.h" #include "collectserver.h" #include "filter.h" #include "browsefilter.h" //#ifdef ENABLE_MG //#include "mgqueryfilter.h" //#include "mgsearch.h" //#endif #include "infodbclass.h" #include "collectset.h" #include "gdbmclass.h" #include "action.h" #include "ispersistentaction.h" #include "browserclass.h" #define GSDL_USE_OSTRINGSTREAM 1 // ostrstream is deprecated => switch to using ostringstream // Doing this with the above define so it is easy to switch // back to ostrstream this backwards compatability is needed // for some reason. #if defined(GSDL_USE_OSTRINGSTREAM) # include #else # if defined(GSDL_USE_IOS_H) # if defined(__WIN32__) # include // vc4 # else # include # endif # else # include # endif #endif receptionist recpt; nullproto nproto; extern "C" module MODULE_VAR_EXPORT gsdl_module; typedef struct { char *config_gsdlhome; char *config_collecthome; char *config_httpprefix; char *config_httpweb; } gsdl_config; static void *gsdl_create_config(ap_pool *p, server_rec *s) { #if _APACHE_MOD >= 2 gsdl_config *cfg = (gsdl_config *)apr_pcalloc(p, sizeof(gsdl_config)); #else gsdl_config *cfg = (gsdl_config *)ap_pcalloc(p, sizeof(gsdl_config)); #endif return (void *)cfg; } static void *gsdl_create_dir_config(ap_pool *p, char *path) { return (gsdl_config *) gsdl_create_config(p, NULL); } static const char *gsdl_cmd(cmd_parms *cmd, void *mconfig, char *val) { server_rec *s = cmd->server; gsdl_config *cfg = (gsdl_config *) ap_get_module_config(s->module_config, &gsdl_module); if (strcmp(cmd->cmd->name, "gsdlhome") == 0) { cfg->config_gsdlhome = val; } else if (strcmp(cmd->cmd->name, "collecthome") == 0) { cfg->config_collecthome = val; } else if (strcmp(cmd->cmd->name, "httpprefix") == 0) { cfg->config_httpprefix = val; } else if (strcmp(cmd->cmd->name, "httpweb") == 0) { cfg->config_httpweb = val; } return NULL; } #if _APACHE_MOD < 2 static void gsdl_exit(void* d) { #else static apr_status_t gsdl_exit(void* d) { #endif #if defined(USE_ACCESS) // shut down com CoUninitialize(); #endif #if _APACHE_MOD >= 2 return APR_SUCCESS; #endif } #if _APACHE_MOD < 2 static void gsdl_init(server_rec *s, ap_pool *p) { #else static void gsdl_init(ap_pool *p, server_rec *s) { #endif // Register gsdl_exit has cleanup operation for when pchild (process id // of child) exits #if _APACHE_MOD >= 2 apr_pool_cleanup_register(p,s, apr_pool_cleanup_null, gsdl_exit); #else ap_register_cleanup(p,s, ap_null_cleanup, gsdl_exit); #endif recpt.loaded = false; gsdl_config *cfg = (gsdl_config *) ap_get_module_config(s->module_config, &gsdl_module); if (!cfg->config_gsdlhome) { ap_log_error("", 0, APLOG_ERR, s, "gsdlhome not set\n"); return; } text_t gsdlhome = cfg->config_gsdlhome; if (!directory_exists(gsdlhome)) { ap_log_error("", 0, APLOG_ERR, s, "gsdlhome directory does not exist\n"); return; } if (!cfg->config_collecthome) { ap_log_error("", 0, APLOG_ERR, s, "collecthome not set\n"); return; } text_t collecthome = cfg->config_collecthome; if (!directory_exists(collecthome)) { ap_log_error("", 0, APLOG_ERR, s, "collecthome directory does not exist\n"); return; } if (!cfg->config_httpprefix) { ap_log_error("", 0, APLOG_ERR, s, "httpprefix not set\n"); return; } if (!cfg->config_httpweb) { ap_log_error("", 0, APLOG_ERR, s, "httpweb not set\n"); return; } text_t httpprefix = cfg->config_httpprefix; text_t httpweb = cfg->config_httpweb; text_t collection = ""; collectset *cservers = new collectset(); cservers->add_all_collections(gsdlhome,collecthome); // set up the null protocol nproto.set_collectset(cservers); // add the protocol to the receptionist recpt.add_protocol(&nproto); // add database object to receptionist #if defined(USE_MYSQL) mysqlclass *db = new mysqlclass(); #elif defined(USE_ACCESS) // start up com if(FAILED(CoInitialize(NULL))) { cout << "CoInitialize failed\n"; exit(-1); } accessclass *db = new accessclass(); #else // not using either mysql or access database - we'll use the gdbmclass // class as a filler (but most likely won't work) - only useful // if creating a binary that doesn't require access to the database dbclass *db = new gdbmclass(gsdlhome); #endif #if defined(USE_MYSQL) || defined(USE_ACCESS) recpt.add_dbclass(db); #endif userdbclass *udb = new userdbclass(gsdlhome); keydbclass *kdb = new keydbclass(gsdlhome); // do these two database classes ever get deleted in the module version // of Greenstone? // librarymain.cpp deletes these when its finished its function // does comparable code need to be put into "gsdl_deinit()" routine // whatever that might be called in reality? add_all_actions(recpt,udb,kdb,isPersistent); // apache is a persistent server with mod_gsdl add_all_browsers(recpt); // configure everything recpt.configure("gsdlhome", gsdlhome); recpt.configure("collecthome", collecthome); recpt.configure("collection", collection); recpt.configure("httpweb", httpweb); recpt.configure("httpprefix", httpprefix); // set httpprefixfull - if httpprefix is a relative path we need to get // the server name and port from the web server environment (note that // gwcgi and gwcgifull will be configured for each page request (in gsdl_handler) // as we need to know that directory info etc.) text_t httpprefixfull = httpprefix; if ((httpprefixfull.size() < 7) || (substr(httpprefixfull.begin(), httpprefixfull.begin()+7) != "http://")) { if (s->server_hostname && s->port) { httpprefixfull = "http://" + text_t(s->server_hostname) + ":" + text_t(s->port) + httpprefixfull; } } recpt.configure("httpprefixfull", httpprefixfull); // read in main.cfg file if (!main_cfg_read(recpt, gsdlhome, collecthome, collection)) { ap_log_error("", 0, APLOG_ERR, s, "failed to read main.cfg file\n"); return; } text_t error_file = filename_cat (gsdlhome, "etc", "error.txt"); char *eout = error_file.getcstr(); ofstream errout (eout, ios::app); delete eout; // initialise the library software if (!recpt.init(errout)) { ap_log_error("", 0, APLOG_ERR, s, "Greenstone failed to initialize\n"); errout.close(); return; } errout.close(); // set flag so gsdl_handler can check that everything initialized // successfully recpt.loaded = true; } #if _APACHE_MOD < 2 typedef const char *(*CMD_HAND_TYPE) (); static command_rec gsdl_cmds[] = { {"gsdlhome", (CMD_HAND_TYPE)gsdl_cmd, (void *)XtOffsetOf(gsdl_config, config_gsdlhome), OR_ALL, TAKE1, "GSDLHOME directory."}, {"collecthome", (CMD_HAND_TYPE)gsdl_cmd, (void *)XtOffsetOf(gsdl_config, config_collecthome), OR_ALL, TAKE1, "COLLECTHOME directory."}, {"httpprefix", (CMD_HAND_TYPE)gsdl_cmd, (void *)XtOffsetOf(gsdl_config, config_httpprefix), OR_ALL, TAKE1, "GSDLHOME http path."}, {"httpweb", (CMD_HAND_TYPE)gsdl_cmd, (void *)XtOffsetOf(gsdl_config, config_httpweb), OR_ALL, TAKE1, "GSDLHOME http web path."}, {NULL, NULL, NULL, 0, cmd_how(0), NULL} }; #else /* static const char *set_max_cache_size(cmd_parms *parms, void *in_struct_ptr, const char *arg) { apr_size_t val; return NULL; } AP_INIT_TAKE1("MCacheSize", set_max_cache_size, NULL, RSRC_CONF, "The maximum amount of memory used by the cache in KBytes"), */ // const char *(*cmd_func)(); static const command_rec gsdl_cmds[] = { AP_INIT_TAKE1("gsdlhome", (cmd_func)gsdl_cmd, NULL, OR_ALL,"GSDLHOME directory."), AP_INIT_TAKE1("collecthome", (cmd_func)gsdl_cmd, NULL, OR_ALL,"COLLECTHOME directory."), AP_INIT_TAKE1("httpprefix", (cmd_func)gsdl_cmd, NULL, OR_ALL,"GSDLHOME http path."), AP_INIT_TAKE1("httpweb", (cmd_func)gsdl_cmd, NULL, OR_ALL,"GSDLHOME http web path."), {NULL} }; #endif static int gsdl_handler(request_rec *r) { if (strcmp(r->handler, "gsdl")) { return DECLINED; } // check that everything has been initialized if (!recpt.loaded) { ap_log_error("", 0, APLOG_ERR, r->server, "gsdl_handler: attempt to use uninitialized receptionist - aborting\n"); return 500; } gsdl_config *cfg = (gsdl_config *) ap_get_module_config(r->server->module_config, &gsdl_module); if (!cfg->config_gsdlhome) { ap_log_error("", 0, APLOG_ERR, r->server, "gsdl_handler: gsdlhome not set\n"); return 500; } if (!cfg->config_collecthome) { ap_log_error("", 0, APLOG_ERR, r->server, "gsdl_handler: collecthome not set\n"); return 500; } // configure gwcgi and gwcgifull recpt.configure("gwcgi", r->uri); recpt.configure("gwcgifull", "http://" + text_t(r->server->server_hostname) + ":" + text_t(r->server->port) + r->uri); string_pool str_pool; text_t gsdlhome = cfg->config_gsdlhome; text_t error_file = filename_cat (gsdlhome, "etc", "error.txt"); char *eout = str_pool.get_cstr_from_pool(error_file); ofstream errout (eout, ios::app); text_t argstr; fileupload_tmap fileuploads; char *request_method_str = getenv("REQUEST_METHOD"); char *content_length_str = getenv("CONTENT_LENGTH"); if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 && content_length_str != NULL) { // POST form data long content_length = (content_length_str ? atoi(content_length_str) : 0); if (content_length > 0) { #ifdef __WIN32__ // On Windows it is important that standard input be read in binary // mode, otherwise end of line "" is turned into only // which breaks the MIME standard (and our parsing code!) int result = _setmode( _fileno( stdin ), _O_BINARY ); if( result == -1 ) { cerr << "Warning: Failed to set standard input to binary mode." << endl; cerr << " Parsing of multi-part MIME will most likely fail" << endl; } #endif long length = content_length; unsigned char * buffer = new unsigned char[content_length]; int chars_read = fread(buffer,1,content_length,stdin); if (chars_read != content_length) { cerr << "Warning: mismatch between CONTENT_LENGTH and data read from standard in" << endl; } argstr.setcarr((char *)buffer, content_length); text_t content_type; char *content_type_str = getenv("CONTENT_TYPE"); if (content_type_str) content_type = content_type_str; argstr = parse_post_data(content_type, argstr, fileuploads, gsdlhome); } } else { if (r->args) argstr = r->args; } // parse the cgi arguments and produce the resulting page if there // have been no errors so far cgiargsclass args; text_tmap fastcgienv; // Not being used in this case if (!recpt.parse_cgi_args (argstr, fileuploads, args, errout, fastcgienv)) { errout.close(); ap_log_error("", 0, APLOG_ERR, r->server, "parse_cgi_args failed\n"); return 500; } #ifdef GSDL_USE_OSTRINGSTREAM ostringstream pageout; #else ostrstream pageout; #endif // get http headers text_tmap headers; recpt.get_cgihead_info(args, headers, errout, fastcgienv); if (headers.find("Location") != headers.end()) { // redirect char *val = str_pool.get_cstr_from_pool(headers.find("Location")->second); #if _APACHE_MOD >= 2 apr_table_set(r->headers_out, "location", val); #else ap_table_set(r->headers_out, "location", val); #endif return HTTP_MOVED_TEMPORARILY; } text_tmap::iterator here = headers.begin(); text_tmap::iterator end = headers.end(); while (here != end) { text_t namet = (*here).first; lc(namet); char *name = str_pool.get_cstr_from_pool(namet); char *val = str_pool.get_cstr_from_pool((*here).second); if ((*here).first == "content-type") { #if _APACHE_MOD < 2 r->content_type = val; #else char* val_copy = strdup(val); // if note freed by r->content_type => Memory leak! ap_set_content_type(r, val_copy); #endif } else if ((*here).first == "content-encoding") { r->content_encoding = val; } else { #if _APACHE_MOD >= 2 apr_table_set(r->headers_out, name, val); #else ap_table_set(r->headers_out, name, val); #endif } here++; } ap_send_http_header(r); if (!r->header_only) { if (!recpt.produce_content(args, pageout, errout)) { ap_log_error("", 0, APLOG_ERR, r->server, "produce_content failed\n"); return 500; } #ifdef GSDL_USE_OSTRINGSTREAM string out = pageout.str(); ap_rwrite(out.c_str(), out.size(), r); #else char *out = pageout.str(); ap_rwrite(out, pageout.pcount(), r); pageout.rdbuf()->freeze(0); #endif } errout.close(); return OK; } #if _APACHE_MOD < 2 static const handler_rec gsdl_handlers[] = { { "gsdl", gsdl_handler }, { NULL, NULL } }; extern "C" { module MODULE_VAR_EXPORT gsdl_module = { STANDARD_MODULE_STUFF, NULL, /* module \initializer */ gsdl_create_dir_config, /* create per-dir config structures */ NULL, /* merge per-dir config structures */ gsdl_create_config, /* create per-server config structures */ NULL, /* merge per-server config structures */ gsdl_cmds, /* table of config file commands */ gsdl_handlers, /* [#8] MIME-typed-dispatched handlers */ NULL, /* [#1] URI to filename translation */ NULL, /* [#4] validate user id from request */ NULL, /* [#5] check if the user is ok _here_ */ NULL, /* [#3] check access by host address */ NULL, /* [#6] determine MIME type */ NULL, /* [#7] pre-run fixups */ NULL, /* [#9] log a transaction */ NULL, /* [#2] header parser */ gsdl_init, /* child_init */ NULL, /* child_exit */ NULL /* [#0] post read-request */ #ifdef EAPI ,NULL, /* EAPI: add_module */ NULL, /* EAPI: remove_module */ NULL, /* EAPI: rewrite_command */ NULL, /* EAPI: new_connection */ NULL /* EAPI: close_connection */ #endif }; }; #else static void gsdl_register_hooks(ap_pool *p) { ap_hook_child_init(gsdl_init,NULL,NULL,APR_HOOK_MIDDLE); ap_hook_handler(gsdl_handler, NULL, NULL, APR_HOOK_MIDDLE); } extern "C" { module AP_MODULE_DECLARE_DATA gsdl_module = { STANDARD20_MODULE_STUFF, gsdl_create_dir_config, /* create per-dir config structures */ NULL, /* merge per-dir config structures */ gsdl_create_config, /* create per-server config structures */ NULL, /* merge per-server config structures */ gsdl_cmds, /* command handlers */ gsdl_register_hooks, /* set up other request processing hooks */ }; }; #endif