/********************************************************************** * * collectoraction.cpp -- * Copyright (C) 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ // note that the collectoraction relies on having direct access to a // collections configuration file. this breaks the separation between // receptionist and collection server and so is not suitable (at least // in its current form) for use when collection servers are separate // from the receptionist (e.g. when using the CORBA protocol). // following line required to get fstream.filedesc() on darwin (Mac OS X) #define _STREAM_COMPAT 1 // required for utsname on solaris??? #define _XOPEN_SOURCE 1 #define _XOPEN_SOURCE_EXTENDED 1 #include "collectoraction.h" #include "OIDtools.h" #include "fileutil.h" #include "cfgread.h" #include "gsdltools.h" #include "gsdltimes.h" #include "nullproto.h" #include "argdb.h" #include "cgiutils.h" #if !defined (__WIN32__) #include #include #endif collectoraction::collectoraction () { recpt = NULL; disabled = true; do_mkcol = false; badsources = false; failedsources.erase(failedsources.begin(), failedsources.end()); gsdlosc = NULL; gsdlhomec = NULL; pathc = NULL; cgiarginfo arg_ainfo; arg_ainfo.shortname = "a"; arg_ainfo.longname = "action"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "collector"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "p"; arg_ainfo.longname = "page"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "intro"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // temporary directory name for this collector // session arg_ainfo.shortname = "bc1tmp"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1fullname"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1dirname"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1contactemail"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1aboutdesc"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1clone"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1clonecol"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); // set when cloning option has changed arg_ainfo.shortname = "bc1clonechanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // only set when one of the fields was changed in // the "collection info" page arg_ainfo.shortname = "bc1infochanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // only set when cfg file is altered from within // "configure collection" page arg_ainfo.shortname = "bc1cfgchanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "cfgfile"; arg_ainfo.longname = "configuration file contents"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1dodelete"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // will be set if we arrived at the "configure collection" page // via the "changing an existing collection" page arg_ainfo.shortname = "bc1econf"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // will be set if we arrived at the "source data" page // via the "changing an existing collection" page arg_ainfo.shortname = "bc1esrce"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1inputnum"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "3"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1input"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1inputtype"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = ""; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); // will be set when we've just come from the "source data" page arg_ainfo.shortname = "bc1fromsrce"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); } collectoraction::~collectoraction () { if (gsdlosc != NULL) delete gsdlosc; if (gsdlhomec != NULL) delete gsdlhomec; if (pathc != NULL) delete pathc; } void collectoraction::configure (const text_t &key, const text_tarray &cfgline) { if ((key == "collector") && (cfgline.size() == 1) && (cfgline[0] == "true" || cfgline[0] == "on" || cfgline[0] == "enabled")) { disabled = false; } else { // call the parent class to deal with the things which // are not dealt with here action::configure (key, cfgline); } } bool collectoraction::init (ostream & /*logout*/) { // set up GSDLOS, GSDLHOME and PATH environment variables text_t gsdlos, path; unsigned int path_separator = ':'; #if defined (__WIN32__) gsdlos = "windows"; path_separator = ';'; path = filename_cat (gsdlhome, "bin", "windows", "perl", "bin;"); #else struct utsname *buf = new struct utsname(); int i = uname (buf); if (i == -1) gsdlos = "linux"; // uname failed else gsdlos.setcstr (buf->sysname); delete buf; lc (gsdlos); #endif pathc = getenv ("PATH"); path += filename_cat (gsdlhome, "bin", gsdlos); path.push_back (path_separator); path += filename_cat (gsdlhome, "bin", "script"); if (pathc != NULL) { path.push_back (path_separator); path += pathc; } path = "PATH=" + path; gsdlos = "GSDLOS=" + gsdlos; text_t setgsdlhome = "GSDLHOME=" + gsdlhome; // these will be cleaned up in the destructor gsdlosc = gsdlos.getcstr(); gsdlhomec = setgsdlhome.getcstr(); pathc = path.getcstr(); putenv (gsdlosc); putenv (gsdlhomec); putenv (pathc); return true; } bool collectoraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, ostream &logout) { text_t ¤t_page = args["p"]; // note that the "bildstatus" and "bildframe1" pages don't actually do anything // functional so we don't need to worry about authenticating them (it's the // underlying "bild" page that does the building (and creates the frameset)) // This helps us overcome a bit of a problem we have with multiple pages trying // to read from the key.db database at the same time. if (current_page != "intro" && current_page != "bildstatus" && current_page != "bildframe1") { // authenticate the user if authentication is available args["uan"] = 1; args["ug"] = "colbuilder"; } if (current_page == "new" || current_page == "existing") { // assign (and create) a temporary directory assign_tmpname (args, logout); // clean up any old builds left laying about in the tmp directory // (note that it's possible this could take some time if there's a huge // partially built collection laying about so we'll make it an asynchronous // system call) gsdl_system ("perl -S cleantmp.pl", false, logout); } if (current_page != "intro" && current_page != "bildstatus" && current_page != "bildframe1" && current_page != "new") { // update arguments that were saved to the harddrive text_tmap saved_args; saved_args["bc1fullname"] = ""; saved_args["bc1contactemail"] = ""; saved_args["bc1aboutdesc"] = ""; saved_args["bc1clone"] = ""; saved_args["bc1clonecol"] = ""; saved_args["bc1inputnum"] = ""; saved_args["bc1input"] = ""; saved_args["bc1inputtype"] = ""; // update the argdb database with any arguments that were set // by previous page text_tmap::iterator here = saved_args.begin(); text_tmap::iterator end = saved_args.end(); while (here != end) { if (args.lookupcgiarg((*here).first).source != cgiarg_t::default_arg) { (*here).second = args[(*here).first]; } here++; } argdb *args_on_disk = new argdb(filename_cat(gsdlhome, "tmp", args["bc1tmp"], "argdb.db")); if (!args_on_disk->update_args(saved_args)) { // error } // update args from argdb saved_args.erase(saved_args.begin(), saved_args.end()); if (!args_on_disk->get_args(saved_args)) { // error } delete args_on_disk; here = saved_args.begin(); end = saved_args.end(); while (here != end) { if (!(*here).second.empty()) { args[(*here).first] = (*here).second; } here ++; } } if (args["bc1infochanged"] == "1") { if (args["bc1dirname"].empty()) { // we've just come from the "collection information" page for the // first time so we'll need to create the collection with mkcol.pl // and set up bc1dirname - we do this part here instead of in do_action // because the bc1dirname argument must be set to its new value before // the compressedoptions macros are set. args["bc1dirname"] = get_directory_name (args["bc1fullname"]); text_t createfile = filename_cat (gsdlhome, "tmp", args["bc1tmp"], ".create"); if (!file_exists (createfile)) { // we could do the mkcol.pl here but I guess it's nicer to do it in do_action() do_mkcol = true; } else { // .create file already exists but bc1dirname wasn't set ... this should only be // able to occur when the "reload" (and possibly the "back" and "forward" buttons) // have been used to get us here. // we'll check that the bc1dirname directory exists (in case of the unlikely // possibility that get_directory_name returned a different value this time // than it did originally). text_t coldir = filename_cat (get_collectdir(args), args["bc1dirname"]); if (!directory_exists (coldir)) { message = "reloaderror"; return true; } } } else { // "collection information" has been changed after collection already exists // so we'll need to update the cfg file. update_cfgfile_partial (args, false, logout); } } if (args["bc1cfgchanged"] == "1") { // configuration file has been changed from the "configure collection" // page. we need to update the file on disk and catch bc1 arguments up // with changes. update_cfgfile_complete (args, logout); } if (args["bc1clonechanged"] == "1") { // cloning option has been changed on "source data" page. if it was turned // on we want to create a new collect.cfg file using the bc1clonecol cfg file // as a model (we'll save the old file as collect.cfg.org). if cloning was // turned off we'll revert to using the collect.cfg.org file (which will need // updating in case the bc1 arguments have been altered since cloning was // turned on). update_cfgfile_clone (args, logout); // if cloning has just been turned on we'll also copy the rest of the files // (excluding collect.cfg which we've already done) from the cloned collections // etc directory to the new collection. if (args["bc1clone"] == "1") { text_t clone_etc = filename_cat(gsdlhome, "collect", args["bc1clonecol"], "etc"); text_t new_etc = filename_cat(get_collectdir(args), args["bc1dirname"], "etc"); text_tarray files; if (read_dir (clone_etc, files)) { text_tarray::const_iterator here = files.begin(); text_tarray::const_iterator end = files.end(); while (here != end) { if (*here != "collect.cfg" && *here != "collect.cfg.org") { file_copy (filename_cat(clone_etc, *here), filename_cat(new_etc, *here)); } here ++; } } else { outconvertclass text_t2ascii; logout < cfgarray; // read in cfg file ifstream cfg_in (cfgfilec); if (cfg_in) { text_tarray cfgline; while (read_cfg_line(cfg_in, cfgline) >= 0) { if (cfgline.size () >= 2) { if (cfgline[0] == "creator" || cfgline[0] == "maintainer") { cfgline[1] = args["bc1contactemail"]; } else if (cfgline[0] == "collectionmeta") { if (cfgline[1] == "collectionname") { cfgline[2] = args["bc1fullname"]; } else if (cfgline[1] == "collectionextra") { cfgline[2] = carriage_replace (args["bc1aboutdesc"], 0); } else if (clone && (cfgline[1] == "iconcollection" || cfgline[1] == "iconcollectionsmall")) { cfgline[2] = ""; } } } cfgarray.push_back (cfgline); } cfg_in.close(); // now write cfg file back out #ifdef __WIN32__ ofstream cfg_out (cfgfilec, ios::binary); #else ofstream cfg_out (cfgfilec); #endif if (cfg_out) { // lock the file int fd = GSDL_GET_FILEDESC(cfg_out); int lock_val = 1; GSDL_LOCK_FILE (fd); if (lock_val != 0) { logout << "Error: Couldn't lock file " << cfgfilec << "\n"; cfg_out.close(); message = "tmpfail"; } else { vector::const_iterator this_line = cfgarray.begin(); vector::const_iterator end_line = cfgarray.end(); while (this_line != end_line) { write_cfg_line (cfg_out, *this_line); this_line ++; } GSDL_UNLOCK_FILE (fd); cfg_out.close(); } } else { logout << "collectoraction::update_cfgfile_partial: unable to open " << cfgfilec << " for output\n"; message = "tmpfail"; } } else { logout << "collectoraction::update_cfgfile_partial: unable to open " << cfgfilec << " for input\n"; message = "tmpfail"; } delete cfgfilec; } // replace configuration file on disk with that in the cfgfile argument and // catch other bc1 arguments up with those the new cfgfile contains void collectoraction::update_cfgfile_complete (cgiargsclass &args, ostream &logout) { text_t cfgfile = filename_cat(get_collectdir(args), args["bc1dirname"], "etc", "collect.cfg"); char *cfgfilec = cfgfile.getcstr(); #ifdef __WIN32__ // make sure collect.cfg isn't read-only _chmod (cfgfilec, _S_IREAD | _S_IWRITE); ofstream cfg_out (cfgfilec, ios::binary); #else ofstream cfg_out (cfgfilec); #endif if (cfg_out) { // lock the file int fd = GSDL_GET_FILEDESC(cfg_out); int lock_val = 1; GSDL_LOCK_FILE (fd); if (lock_val != 0) { logout << "Error: Couldn't lock file " << cfgfilec << "\n"; cfg_out.close(); message = "tmpfail"; } else { outconvertclass text_t2ascii; cfg_out << text_t2ascii << args["cfgfile"]; GSDL_UNLOCK_FILE (fd); cfg_out.close(); // now that we've written the file we'll read it back again and // update our bc1 arguments ifstream cfg_in (cfgfilec); if (cfg_in) { text_tarray cfgline; while (read_cfg_line(cfg_in, cfgline) >= 0) { if (cfgline.size () >= 2) { if (cfgline[0] == "creator") { args["bc1contactemail"] = cfgline[1]; } else if (cfgline[0] == "collectionmeta") { if (cfgline[1] == "collectionname") { args["bc1fullname"] = cfgline[2]; } else if (cfgline[1] == "collectionextra") { args["bc1aboutdesc"] = carriage_replace (cfgline[2], 1); } } } } cfg_in.close(); } else { logout << "collectoraction::update_cfgfile_complete: unable to open " << cfgfilec << " for input\n"; message = "tmpfail"; } } } else { logout << "collectoraction::update_cfgfile_complete: unable to open " << cfgfilec << " for output\n"; message = "tmpfail"; } delete cfgfilec; } void collectoraction::get_cgihead_info (cgiargsclass &/*args*/, recptprotolistclass * /*protos*/, response_t &response,text_t &response_data, ostream &/*logout*/) { response = content; response_data = "text/html"; } // return html for buttons used in collector bar // color may be "green", "grey", or "yellow" // type may be: // "info" --> "collection information" button // "srce" --> "source data" button // "conf" --> "configure collection" button // "bild" --> "build collection" button // "view" --> "view collection" button // if enabled is true button will be flashy rollover type and // will be hyperlinked text_t collectoraction::get_button (const text_t &thispage, const text_t &color, const text_t &type, bool enabled) { if ((color != "green" && color != "grey" && color != "yellow") || (type != "info" && type != "srce" && type != "conf" && type != "bild" && type != "view")) return ""; text_t prefix = "gc"; if (color == "grey") prefix = "nc"; else if (color == "yellow") prefix = "yc"; text_t httpicon = "httpicon" + prefix + type; if (enabled) { text_t gsmacro = "_gsimage_"; if (thispage == "info" || thispage == "srce" || thispage == "conf" || thispage == "bildcancel" || thispage == "bildfail") { gsmacro = "_gsjimage_"; } else if (type == "view") { // view button is special case as it needs a target=_top gsmacro = "_gstimage_"; } return "" + gsmacro + "(_collector:http" + type + "_,_collector:" + httpicon + "of_,_collector:" + httpicon + "on_," + type + ",_collector:text" + type + "_)\n"; } else { return "_icon" + prefix + type + "of_\n"; } } // set the _fullnamemenu_ macro (and _warnindex_ and _selectedindex_ if // we're on the "srce" page) void collectoraction::set_fullnamemenu (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { if (recpt == NULL) { logout << "ERROR (collectoraction::set_fullnamemenu): This action does not contain\n" << " information about any receptionists. The method set_receptionist was\n" << " probably not called from the module which instantiated this action.\n"; return; } text_t ¤t_page = args["p"]; text_t currentname = args["bc1dirname"]; if (current_page == "srce") currentname = args["bc1clonecol"]; text_tarray dirnames; text_tarray fullnames; vector write_protected; bool is_selected = false; int selected_index = 0; int index = 0; recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { if ((*rprotolist_here).p != NULL) { // don't include z39.50 collections comerror_t err = noError; if ((*rprotolist_here).p->get_protocol_name (err) == "z3950proto") { rprotolist_here ++; continue; } text_tarray collist; (*rprotolist_here).p->get_collection_list (collist, err, logout); if (err == noError) { text_tarray::iterator collist_here = collist.begin(); text_tarray::iterator collist_end = collist.end(); FilterResponse_t response; text_tset metadata; metadata.insert ("collectionname"); while (collist_here != collist_end) { ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); if (cinfo != NULL) { text_t collectionname = *collist_here; if (!cinfo->collectionmeta["collectionname"].empty()) { // get collection name from the collection cfg file collectionname = cinfo->collectionmeta["collectionname"]; } else if (get_info ("collection", *collist_here, metadata, false, (*rprotolist_here).p, response, logout)) { // get collection name from gdbm file collectionname = response.docInfo[0].metadata["collectionname"].values[0]; } dirnames.push_back(*collist_here); fullnames.push_back(collectionname); // check to see if the collection is writable if (collection_protected (*collist_here)) write_protected.push_back(true); else write_protected.push_back(false); if (*collist_here == currentname) { is_selected = true; selected_index = index; } index ++; } collist_here ++; } } } rprotolist_here ++; } bool first = true; text_t warnindex; text_t fullnamemenu = "\n"; fullnamemenu += "