/********************************************************************** * * wizardaction.cpp -- * Copyright (C) 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "gsdl_modules_cfg.h" #ifdef GSDL_USE_WIZARD_ACTION // note that the wizardaction relies on having direct access to a // collections configuration file. this breaks the separation between // receptionist and collection server and so is not suitable (at least // in its current form) for use when collection servers are separate // from the receptionist (e.g. when using the CORBA protocol). // following line required to get fstream.filedesc() on darwin (Mac OS X) #define _STREAM_COMPAT 1 // required for utsname on solaris??? #ifndef _XOPEN_SOURCE #define _XOPEN_SOURCE 1 #endif // This was added for Solaris, but it makes things worse on Solaris for me... // #define _XOPEN_SOURCE_EXTENDED 1 #include "wizardaction.h" #include "OIDtools.h" #include "fileutil.h" #include "cfgread.h" #include "gsdltools.h" #include "gsdltimes.h" #include "argdb.h" #include "cgiutils.h" #include #include #if !defined (__WIN32__) #include #include #endif wizardaction::wizardaction () { recpt = NULL; disabled = true; macro_prefix = ""; } wizardaction::~wizardaction () { } void wizardaction::configure (const text_t &key, const text_tarray &cfgline) { const text_t& action_name = get_action_name(); if ((key == action_name) && (cfgline.size() == 1) && (cfgline[0] == "true" || cfgline[0] == "on" || cfgline[0] == "enabled")) { disabled = false; } else { // call the parent class to deal with the things which // are not dealt with here action::configure (key, cfgline); } } bool wizardaction::init (ostream & /*logout*/) { return set_gsdl_env_vars(gsdlhome); } void wizardaction::get_cgihead_info (cgiargsclass &/*args*/, recptprotolistclass * /*protos*/, response_t &response,text_t &response_data, ostream &/*logout*/) { response = content; response_data = "text/html"; } bool wizardaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, recptprotolistclass * /*protos*/, ostream &logout) { text_t current_page = args["p"]; if (current_page != "intro" && current_page != "bildframe1" && current_page != "new" && current_page != "select") { // update arguments that were saved to the harddrive text_tmap saved_args; // update the argdb database with any arguments that were set // by previous page as denoted by prefix (eg. di1 or bc1) cgiargsclass::const_iterator args_here = args.begin(); cgiargsclass::const_iterator args_end = args.end(); while (args_here != args_end) { text_t args_name = (*args_here).first; int prefix_len = macro_prefix.size(); int args_name_len = args_name.size(); if(args_name_len >= prefix_len) { // Only now can we substring args_name by prefix_len text_t args_prefix; if(args_name_len > prefix_len) { // substring args_prefix = substr(args_name.begin(),args_name.begin()+prefix_len); } else { args_prefix = args_name; } if (args_prefix == macro_prefix) { saved_args[args_name] = args[args_name]; } } ++args_here; } argdb *args_on_disk = new argdb(gsdlhome,args[macro_prefix+"tmp"]); if (!args_on_disk->update_args(saved_args)) { // error logout << "wizardaction: argdb::update_args failed\n"; } // update args from argdb saved_args.erase(saved_args.begin(), saved_args.end()); if (!args_on_disk->get_args(saved_args)) { // error logout << "wizardaction: argdb::get_args failed\n"; } delete args_on_disk; text_tmap::iterator sa_here = saved_args.begin(); text_tmap::iterator sa_end = saved_args.end(); while (sa_here != sa_end) { if (!(*sa_here).second.empty()) { args[(*sa_here).first] = (*sa_here).second; } ++sa_here; } bool first = true; } return true; } // tests if collection is write protected (currently just checks if // collect.cfg file is writable bool wizardaction::collection_protected (const text_t &collection) { text_t cfgfile = filename_cat(collecthome, collection, "etc", "collect.cfg"); if (file_writable(cfgfile)) return false; return true; } // set the _statusline_ macro void wizardaction::set_statusline (displayclass &disp, cgiargsclass &args, ostream & /*logout*/) { // the build command creates .bld.download, .bld.import, and .bld.build files (in that // order) and deletes them (also in that order) when each stage is complete. the .bld // file is the concatenation of all these files. text_t bld_file = filename_cat (gsdlhome, "tmp", args[macro_prefix+"tmp"], args[macro_prefix+"dirname"] + ".bld"); text_t statusline; if (file_exists (bld_file + ".download")) { statusline = "_collector:textdownloadingfiles_
\n"; statusline += dm_safe(file_tail(bld_file + ".download", 1, 0)); } else if (file_exists (bld_file + ".import")) { statusline = "_collector:textimportingcollection_
\n"; statusline += dm_safe(file_tail(bld_file + ".import", 1, 0)); } else if (file_exists (bld_file + ".build")) { statusline = "_collector:textbuildingcollection_
\n"; statusline += dm_safe(file_tail(bld_file + ".build", 1, 0)); } else { statusline += "_collector:textcreatingcollection_
\n"; statusline += dm_safe(file_tail(bld_file, 1, 0)); } disp.setmacro ("statusline", "collector", statusline); disp.setmacro ("statusline", "depositor", statusline); } // if sw = 0 replace all carriage returns in intext with the string "\n" // else replace all occurances of "\n" with a carriage return text_t wizardaction::carriage_replace (const text_t &intext, int sw) { text_t outtext; text_t::const_iterator here = intext.begin(); text_t::const_iterator end = intext.end(); while (here != end) { if (sw == 0) { if (*here == '\n') { if ((here+1) != end && *(here+1) == '\r') ++here; outtext += "\\n"; } else if (*here == '\r') { if ((here+1) != end && *(here+1) == '\n') ++here; outtext += "\\n"; } else { outtext.push_back (*here); } } else if (*here == '\\' && (here+1) != end && *(here+1) == 'n') { outtext.push_back ('\n'); ++here; } else { outtext.push_back (*here); } ++here; } return outtext; } // create a short directory name from fullname text_t wizardaction::get_directory_name (const text_t &fullname) { text_t shortname; if (fullname.empty()) { shortname = "coll"; } else { // first make all lowercase and remove any dodgy characters // (i.e. anything not [a-z] text_t::const_iterator here = fullname.begin(); text_t::const_iterator end = fullname.end(); while (here != end) { if ((*here >= 'A' && *here <= 'Z') || (*here >= 'a' && *here <= 'z') || (*here == ' ')) { if (*here >= 'A' && *here <= 'Z') shortname.push_back (*here+32); else if (*here == ' ') { while ((*(here+1)) == ' ') ++here; shortname.push_back (*here); } else shortname.push_back (*here); } ++here; } text_tarray words; splitchar (shortname.begin(), shortname.end(), ' ', words); int num_words = words.size(); if (num_words == 0) { shortname = "coll"; } else { shortname.clear(); int use_words = (num_words <= 6) ? num_words : 6; unsigned int substr_len = 6 / use_words; for (int i = 0; i < use_words; ++i) { if (words[i].size() < substr_len) shortname += words[i]; else shortname += substr (words[i].begin(), words[i].begin()+substr_len); } } } // check to see if shortname is unique text_t fulldirname = filename_cat (collecthome, shortname); if (directory_exists (fulldirname)) { int version = 0; text_t newname; do { ++version; newname = shortname; newname.push_back ('v'); newname.appendint (version); fulldirname = filename_cat (collecthome, newname); } while (directory_exists (fulldirname)); shortname = newname; } return shortname; } // assigns a temporary directory name for this collector session // and creates temporary directory // returns false if it couldn't create the directory bool wizardaction::assign_tmpname (cgiargsclass &args, ostream &logout) { int i = 0; text_t tmpname = "tbuild"; while (directory_exists (filename_cat (gsdlhome, "tmp", tmpname + text_t(i)))) { ++i; } tmpname.appendint (i); text_t fulltmpdir = filename_cat (gsdlhome, "tmp", tmpname); if (!mk_dir (fulltmpdir)) { outconvertclass text_t2ascii; logout << text_t2ascii << "wizardaction::assign_tmpname unable to create directory (" << fulltmpdir << ")\n"; return false; } args[macro_prefix + "tmp"] = tmpname; return true; } // set the _fullnamemenu_ macro (and _warnindex_ and _selectedindex_ if // we're on the "srce" page) void wizardaction::set_fullnamemenu (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { if (recpt == NULL) { logout << "ERROR (wizardaction::set_fullnamemenu): This action does not contain\n" << " information about any receptionists. The method set_receptionist was\n" << " probably not called from the module which instantiated this action.\n"; return; } text_t ¤t_page = args["p"]; text_t currentname = args[macro_prefix+"dirname"]; if (current_page == "srce") currentname = args[macro_prefix + "clonecol"]; text_tarray dirnames; text_tarray fullnames; vector write_protected; text_tarray build_type; bool is_selected = false; int selected_index = 0; int index = 0; recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { if ((*rprotolist_here).p != NULL) { // don't include z39.50 collections comerror_t err = noError; if ((*rprotolist_here).p->get_protocol_name (err) == "z3950proto") { ++rprotolist_here; continue; } text_tarray collist; (*rprotolist_here).p->get_collection_list (collist, err, logout); if (err == noError) { text_tarray::iterator collist_here = collist.begin(); text_tarray::iterator collist_end = collist.end(); while (collist_here != collist_end) { ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); if (cinfo != NULL) { text_t collectionname = cinfo->get_collectionmeta("collectionname", args["l"]); if (collectionname.empty()) { collectionname = *collist_here; } dirnames.push_back(*collist_here); fullnames.push_back(collectionname); // check to see if the collection is writable if (collection_protected (*collist_here)) write_protected.push_back(true); else write_protected.push_back(false); // remember build type for collection (mg, mgpp, lucene, ...) // used to determine cgi arg 'ct' later on and minus option to 'build' build_type.push_back(cinfo->buildType); if (*collist_here == currentname) { is_selected = true; selected_index = index; } ++index; } ++collist_here; } } } ++rprotolist_here; } bool first = true; text_t warnindex; text_t fullnamemenu = "\n"; buildtype_jsarray += ");"; text_t action_name = get_action_name(); disp.setmacro ("fullnamemenu", action_name, fullnamemenu); disp.setmacro ("buildtypearray", action_name, buildtype_jsarray); } // checks to see if any of the plugins in pluginset occur in // collections configuration file bool wizardaction::uses_weird_plugin (const text_t &collection) { text_tset pluginset; pluginset.insert ("HBPlug"); text_t cfgfile_content; text_t cfgfile_name = filename_cat (collecthome, collection, "etc", "collect.cfg"); text_t pluginstr, pluginname; if (read_file (cfgfile_name, cfgfile_content)) { text_t::const_iterator here = cfgfile_content.begin(); text_t::const_iterator end = cfgfile_content.end(); while (here != end) { here = findchar (here, end, 'p'); if (here == end) break; if ((here+6 < end) && (substr (here, here+6) == "plugin")) { getdelimitstr (here+6, end, '\n', pluginstr); text_t::const_iterator hp = pluginstr.begin(); text_t::const_iterator ep = pluginstr.end(); bool found = false; // remove any leading whitespace, trailing options etc. while (hp != ep) { if (*hp == '\t' || *hp == ' ' || *hp == '\n') { if (found) break; } else { pluginname.push_back (*hp); found = true; } ++hp; } text_tset::const_iterator it = pluginset.find (pluginname); if (it != pluginset.end()) return true; // found matching plugin pluginname.clear(); } ++here; } } return false; } void wizardaction::gsdl_build (cgiargsclass &args, ostream &logout) { outconvertclass text_t2ascii; //check to see if the tbuild directory exists text_t tmpdir = filename_cat (gsdlhome, "tmp", args[macro_prefix+"tmp"]); if (!directory_exists (tmpdir)) { message = "tmpfail"; return; } //check to see if collection name specified text_t &collection = args[macro_prefix+"dirname"]; if (collection.empty()) { message = "nocollection"; return; } // check for a .build file - if it exists then we've already built // the collection (or are in the process of building it) text_t buildfile = filename_cat (tmpdir, ".build"); if (file_exists (buildfile)) { return; } else { // create the .build file (this file is just a place holder to let any future // pages know that we've already been here) char *buildfilec = buildfile.getcstr(); ofstream bfile_out (buildfilec); delete []buildfilec; if (bfile_out) { bfile_out << "collection building\n"; bfile_out.close(); } else { message = "tmpfail"; return; } } //FLAG! const recptconf &rcinfo = recpt->get_configinfo (); // create the event header file if LogEvents, EmailEvents or // EmailUserEvents options are turned on. bool logevents = (rcinfo.LogEvents == CollectorEvents || rcinfo.LogEvents == AllEvents || rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents || rcinfo.EmailUserEvents); text_t ehead_file = filename_cat (tmpdir, "ehead.txt"); if (logevents) { if (!create_event_header_file (ehead_file, args, logout)) { logevents = false; } } text_t collectdir = get_collectdir (args); // set up build options //text_t options = "-make_writable -remove_import -out \""; text_t options = "-make_writable -out \""; options += filename_cat (tmpdir, collection + ".bld"); options += "\" -collectdir \"" + collectdir + "\" -statsfile \""; options += filename_cat(collectdir, collection, "etc", "import.log") + "\""; if (args[macro_prefix+"esrce"] == 1) { // we're adding data to an existing collection options += " -save_archives -append -manifest"; } text_tarray inputvalues, inputtypes; splitchar (args[macro_prefix+"input"].begin(), args[macro_prefix+"input"].end(), ',', inputvalues); splitchar (args[macro_prefix+"inputtype"].begin(), args[macro_prefix+"inputtype"].end(), ',', inputtypes); int numvalues = inputvalues.size(); int numtypes = inputtypes.size(); for (int i = 0; i < numvalues; ++i) { if (!inputvalues[i].empty()) { text_t type = "file://"; // default if (i < numtypes) type = inputtypes[i]; options += " -download \"" + remove_trailing_slashes(type + format_url(decode_commas(inputvalues[i]))) + "\""; } } if (logevents) { if (rcinfo.LogEvents == CollectorEvents || rcinfo.LogEvents == AllEvents) options += " -log_events"; if (rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents) { options += " -mail_server " + rcinfo.MailServer; options += " -email_events " + rcinfo.maintainer; if (rcinfo.EmailUserEvents) options += "," + args[macro_prefix+"contactemail"]; } else if (rcinfo.EmailUserEvents) { options += " -mail_server " + rcinfo.MailServer; options += " -email_events " + args[macro_prefix+"contactemail"]; } options += " -event_header " + ehead_file; } text_t indextype = args[macro_prefix+"buildtype"]; if(indextype == "") { #ifdef ENABLE_MG indextype = "mg"; #else cerr << "Unable to choose mg as the default indextype, since the Greenstone installation was not compiled with MG enabled." << endl; #endif } options += " -indextype \"" + indextype + "\""; text_t optionfile = filename_cat (tmpdir, "build.opt"); char *optionfilec = optionfile.getcstr(); ofstream ofile_out (optionfilec); delete []optionfilec; if (!ofile_out) { message = "tmpfail"; return; } ofile_out << text_t2ascii << options << "\n"; ofile_out.close(); // if we're altering an existing collection we need to kill off // the existing collection server - we do this for the local library // (and any other persistent version of the library) as the existing // database file can't be deleted while the collection server holds it open if ((args[macro_prefix+"econf"] == 1) || (args[macro_prefix+"esrce"] == 1)) { remove_colservr (collection, logout); } // set up the build command - build.bat has some issues with quoting // on win2k when gsdlhome contains spaces so we'll avoid using // "perl -S" here in favor of calling the "build" perl script explicitly text_t build_cmd = "perl \"" + filename_cat (gsdlhome, "bin", "script", "build"); build_cmd += "\" -optionfile \"" + optionfile + "\" " + collection; // run build command in background (i.e. asynchronously) gsdl_system (build_cmd, false, logout); } void wizardaction::gsdl_cancel_build (cgiargsclass &args, ostream &logout) { // I really wanted to do what this perl script does from within the library // c++ code. I ran into some problems though (like how do you write a portable // "rm -r" in c++?). One day I'll spend some time sorting it out ... maybe. text_t cancel_cmd = "perl -S cancel_build.pl -collectdir \""; cancel_cmd += filename_cat (gsdlhome, "tmp", args[macro_prefix+"tmp"]) + "\" "; cancel_cmd += args[macro_prefix+"dirname"]; // To be on the safe side we'll make this a synchronous call // so that all tidying up is done before the user has a chance // to do anything else (like start rebuilding their collection). // This means that for a big collection where there's lots of // stuff to delete etc. it might take a while before the "build // cancelled" page appears. gsdl_system (cancel_cmd, true, logout); } text_t wizardaction::get_collectdir (cgiargsclass &args) { if ((args[macro_prefix+"econf"] == 1) || (args[macro_prefix+"esrce"] == 1)) { // we're adding to a collection in place return collecthome; } else { return filename_cat (gsdlhome, "tmp", args[macro_prefix+"tmp"]); } } // create and initialize a new collection server and // add it to the null protocol. void wizardaction::create_colserver (const text_t &collection, ostream &logout) { recptprotolistclass *protos = recpt->get_recptprotolist_ptr(); recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { comerror_t err = noError; if ((*rprotolist_here).p != NULL) { if ((*rprotolist_here).p->get_protocol_name (err) == "nullproto") { // create collection server and add it to nullproto (*rprotolist_here).p->add_collection (collection, recpt, gsdlhome, collecthome, dbhome); // make sure gsdlhome is configured text_tarray tmp; tmp.push_back (gsdlhome); (*rprotolist_here).p->configure ("gsdlhome", tmp, err); // re-initialize the null protocol if (!(*rprotolist_here).p->init (err, logout)) { logout << "wizardaction::create_colserver: nullproto init failed\n"; } return; } } ++rprotolist_here; } logout << "wizardaction::create_colserver: no valid nullproto found\n"; } // delete a collection server from the null protocol void wizardaction::remove_colservr (const text_t &collection, ostream &logout) { recpt->uncache_collection (collection); recptprotolistclass *protos = recpt->get_recptprotolist_ptr(); recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { comerror_t err = noError; if ((*rprotolist_here).p != NULL) { if ((*rprotolist_here).p->get_protocol_name (err) == "nullproto") { (*rprotolist_here).p->remove_collection (collection, logout); return; } } ++rprotolist_here; } logout << "wizardaction::create_colserver: no valid nullproto found\n"; } bool wizardaction::create_event_header_file (const text_t &filename, cgiargsclass &args, ostream &logout) { outconvertclass text_t2ascii; char *filenamec = filename.getcstr(); ofstream eheadfile (filenamec); delete []filenamec; if (eheadfile) { eheadfile << text_t2ascii << get_event_header (args); eheadfile.close(); return true; } logout << text_t2ascii << "wizardaction::create_event_header ERROR: Couldn't create " << "Event Header file " << filename << ". Event logging disabled\n"; return false; } text_t wizardaction::get_event_header (cgiargsclass &args) { text_t header = "Greenstone Username: " + args["un"] + "\n"; header += "Collection: " + args[macro_prefix+"dirname"] + "\n"; header += "Collection Creator: " + args[macro_prefix+"contactemail"] + "\n"; header += "GSDLHOME: " + gsdlhome + "\n"; header += "Build Location: " + get_collectdir(args) + "\n"; return header; } // format_url simply strips "http://", "ftp://", or "file://" off the // beginning of url if they're there text_t wizardaction::format_url (const text_t &url) { text_t::const_iterator begin = url.begin(); text_t::const_iterator end = url.end(); if (url.size() >= 7) { text_t prefix = substr(begin, begin+7); if (prefix == "http://" || prefix == "file://") { return substr(begin+7, end); } } if (url.size() >= 6) { if (substr(begin, begin+6) == "ftp://") { return substr(begin+6, end); } } return url; } text_t wizardaction::remove_trailing_slashes (text_t str) { while (*(str.end()-1) == '\\') { str.pop_back(); } return str; } #endif //GSDL_USE_WIZARD_ACTION