/********************************************************************** * * collectoraction.cpp -- * Copyright (C) 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "gsdl_modules_cfg.h" #ifdef GSDL_USE_COLLECTOR_ACTION // note that the collectoraction relies on having direct access to a // collections configuration file. this breaks the separation between // receptionist and collection server and so is not suitable (at least // in its current form) for use when collection servers are separate // from the receptionist (e.g. when using the CORBA protocol). // following line required to get fstream.filedesc() on darwin (Mac OS X) #define _STREAM_COMPAT 1 // required for utsname on solaris??? #define _XOPEN_SOURCE 1 // This was added for Solaris, but it makes things worse on Solaris for me... // #define _XOPEN_SOURCE_EXTENDED 1 #include "collectoraction.h" #include "OIDtools.h" #include "fileutil.h" #include "cfgread.h" #include "gsdltools.h" #include "gsdltimes.h" #include "argdb.h" #include "cgiutils.h" #include #include #include #if !defined (__WIN32__) #include #include #endif collectoraction::collectoraction () : wizardaction() { macro_prefix = "bc1"; do_mkcol = false; badsources = false; failedsources.erase(failedsources.begin(), failedsources.end()); cgiarginfo arg_ainfo; arg_ainfo.shortname = "a"; arg_ainfo.longname = "action"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "collector"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "p"; arg_ainfo.longname = "page"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "intro"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // temporary directory name for this collector // session arg_ainfo.shortname = "bc1tmp"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1fullname"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1dirname"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1contactemail"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1aboutdesc"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1clone"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1clonecol"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); // set when cloning option has changed arg_ainfo.shortname = "bc1clonechanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // only set when one of the fields was changed in // the "collection info" page arg_ainfo.shortname = "bc1infochanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // only set when cfg file is altered from within // "configure collection" page arg_ainfo.shortname = "bc1cfgchanged"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "cfgfile"; arg_ainfo.longname = "configuration file contents"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1dodelete"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); // will be set if we arrived at the "configure collection" page // via the "changing an existing collection" page arg_ainfo.shortname = "bc1econf"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // will be set if we arrived at the "source data" page // via the "changing an existing collection" page arg_ainfo.shortname = "bc1esrce"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1inputnum"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "3"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1input"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); arg_ainfo.shortname = "bc1inputtype"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk argsinfo.addarginfo (NULL, arg_ainfo); // will be set when we've just come from the "source data" page arg_ainfo.shortname = "bc1fromsrce"; arg_ainfo.longname = "collector specific"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::mustnot; argsinfo.addarginfo (NULL, arg_ainfo); } collectoraction::~collectoraction () { } bool collectoraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, recptprotolistclass * /*protos*/, ostream &logout) { text_t ¤t_page = args["p"]; // note that the "bildstatus" and "bildframe1" pages don't actually do anything // functional so we don't need to worry about authenticating them (it's the // underlying "bild" page that does the building (and creates the frameset)) // This helps us overcome a bit of a problem we have with multiple pages trying // to read from the key.gdb database at the same time. if (current_page != "intro" && current_page != "bildstatus" && current_page != "bildframe1") { // authenticate the user if authentication is available args["uan"] = 1; args["ug"] = "all-collections-editor"; } if (current_page == "new" || current_page == "existing") { // assign (and create) a temporary directory if (assign_tmpname (args, logout)==false) { // there was an error creating the tmp dir message="tmpfail"; return true; // true because we could still parse the arguments } // clean up any old builds left laying about in the tmp directory // (note that it's possible this could take some time if there's a huge // partially built collecton laying about so we'll make it an asynchronous // system call) gsdl_system ("perl -S cleantmp.pl", false, logout); } if (current_page != "intro" && current_page != "bildstatus" && current_page != "bildframe1" && current_page != "new") { // update arguments that were saved to the harddrive text_tmap saved_args; saved_args["bc1fullname"] = g_EmptyText; saved_args["bc1contactemail"] = g_EmptyText; saved_args["bc1aboutdesc"] = g_EmptyText; saved_args["bc1clone"] = g_EmptyText; saved_args["bc1clonecol"] = g_EmptyText; saved_args["bc1inputnum"] = g_EmptyText; saved_args["bc1input"] = g_EmptyText; saved_args["bc1inputtype"] = g_EmptyText; // update the argdb database with any arguments that were set // by previous page text_tmap::iterator here = saved_args.begin(); text_tmap::iterator end = saved_args.end(); while (here != end) { if (args.lookupcgiarg((*here).first).source != cgiarg_t::default_arg) { (*here).second = args[(*here).first]; } ++here; } text_t argfile = filename_cat(gsdlhome, "tmp", args["bc1tmp"], "argdb.gdb"); argdb *args_on_disk = new argdb(argfile); if (!args_on_disk->update_args(saved_args)) { // error logout << "collectoraction: argdb::update_args failed (" << argfile << ")\n"; } // update args from argdb saved_args.erase(saved_args.begin(), saved_args.end()); if (!args_on_disk->get_args(saved_args)) { // error logout << "collectoraction: argdb::get_args failed (" << argfile << ")\n"; } delete args_on_disk; here = saved_args.begin(); end = saved_args.end(); while (here != end) { if (!(*here).second.empty()) { args[(*here).first] = (*here).second; } ++here; } } if (args["bc1infochanged"] == "1") { if (args["bc1dirname"].empty()) { // we've just come from the "collection information" page for the // first time so we'll need to create the collection with mkcol.pl // and set up bc1dirname - we do this part here instead of in do_action // because the bc1dirname argument must be set to its new value before // the compressedoptions macros are set. args["bc1dirname"] = get_directory_name (args["bc1fullname"]); text_t createfile = filename_cat (gsdlhome, "tmp", args["bc1tmp"], ".create"); if (!file_exists (createfile)) { // we could do the mkcol.pl here but I guess it's nicer to do it in do_action() do_mkcol = true; } else { // .create file already exists but bc1dirname wasn't set ... this should only be // able to occur when the "reload" (and possibly the "back" and "forward" buttons) // have been used to get us here. // we'll check that the bc1dirname directory exists (in case of the unlikely // possibility that get_directory_name returned a different value this time // than it did originally). text_t coldir = filename_cat (get_collectdir(args), args["bc1dirname"]); if (!directory_exists (coldir)) { message = "reloaderror"; return true; } } } else { // "collection information" has been changed after collection already exists // so we'll need to update the cfg file. update_cfgfile_partial (args, false, logout); } } if (args["bc1cfgchanged"] == "1") { // configuration file has been changed from the "configure collection" // page. we need to update the file on disk and catch bc1 arguments up // with changes. update_cfgfile_complete (args, logout); } if (args["bc1clonechanged"] == "1") { // cloning option has been changed on "source data" page. if it was turned // on we want to create a new collect.cfg file using the bc1clonecol cfg file // as a model (we'll save the old file as collect.cfg.org). if cloning was // turned off we'll revert to using the collect.cfg.org file (which will need // updating in case the bc1 arguments have been altered since cloning was // turned on). update_cfgfile_clone (args, logout); // if cloning has just been turned on we'll also copy the rest of the files // (excluding collect.cfg which we've already done) from the cloned collections // etc directory to the new collection. if (args["bc1clone"] == "1") { text_t clone_etc = filename_cat(collecthome, args["bc1clonecol"], "etc"); text_t new_etc = filename_cat(get_collectdir(args), args["bc1dirname"], "etc"); text_tarray files; if (read_dir (clone_etc, files)) { text_tarray::const_iterator here = files.begin(); text_tarray::const_iterator end = files.end(); while (here != end) { if (*here != "collect.cfg" && *here != "collect.cfg.org") { file_copy (filename_cat(clone_etc, *here), filename_cat(new_etc, *here)); } ++here; } } else { outconvertclass text_t2ascii; logout < cfgarray; // read in cfg file ifstream cfg_in (cfgfilec); if (cfg_in) { text_tarray cfgline; while (read_cfg_line(cfg_in, cfgline) >= 0) { if (cfgline.size () >= 2) { if (cfgline[0] == "creator" || cfgline[0] == "maintainer") { cfgline[1] = args["bc1contactemail"]; } else if (cfgline[0] == "collectionmeta") { if (cfgline[1] == "collectionname") { cfgline[2] = args["bc1fullname"]; } else if (cfgline[1] == "collectionextra") { cfgline[2] = carriage_replace (args["bc1aboutdesc"], 0); } else if (clone && (cfgline[1] == "iconcollection" || cfgline[1] == "iconcollectionsmall")) { cfgline[2] = g_EmptyText; } } } cfgarray.push_back (cfgline); } cfg_in.close(); // now write cfg file back out int fd=open(cfgfilec, O_WRONLY | O_CREAT | O_TRUNC #if defined(__WIN32__) | O_BINARY #endif , 432 ); if (fd != -1) { // lock the file int lock_val = 1; GSDL_LOCK_FILE (fd); if (lock_val != 0) { logout << "Error: Couldn't lock file " << cfgfilec << "\n"; close(fd); message = "tmpfail"; } else { vector::const_iterator this_line = cfgarray.begin(); vector::const_iterator end_line = cfgarray.end(); while (this_line != end_line) { write_cfg_line (fd, *this_line); ++this_line; } GSDL_UNLOCK_FILE (fd); close(fd); } } else { logout << "collectoraction::update_cfgfile_partial: unable to open " << cfgfilec << " for output\n"; message = "tmpfail"; } } else { logout << "collectoraction::update_cfgfile_partial: unable to open " << cfgfilec << " for input\n"; message = "tmpfail"; } delete []cfgfilec; } // replace configuration file on disk with that in the cfgfile argument and // catch other bc1 arguments up with those the new cfgfile contains void collectoraction::update_cfgfile_complete (cgiargsclass &args, ostream &logout) { text_t cfgfile = filename_cat(get_collectdir(args), args["bc1dirname"], "etc", "collect.cfg"); char *cfgfilec = cfgfile.getcstr(); #ifdef __WIN32__ // make sure collect.cfg isn't read-only _chmod (cfgfilec, _S_IREAD | _S_IWRITE); #endif int fd=open(cfgfilec, O_WRONLY | O_CREAT | O_TRUNC #if defined(__WIN32__) | O_BINARY #endif , 432 ); if (fd) { // lock the file int lock_val = 1; GSDL_LOCK_FILE (fd); if (lock_val != 0) { logout << "Error: Couldn't lock file " << cfgfilec << "\n"; close(fd); message = "tmpfail"; } else { outconvertclass text_t2ascii; text_t2ascii.setinput(&args["cfgfile"]); size_t buffersize=args["cfgfile"].size(); char *buffer=new char[buffersize]; buffer[0]='\n'; // just in case something goes wrong... size_t num_chars; convertclass::status_t status; text_t2ascii.convert(buffer, buffersize, num_chars, status); // ignore status - assume it is "finished" as buffer is big enough write(fd, buffer, num_chars); GSDL_UNLOCK_FILE (fd); close(fd); delete []buffer; // now that we've written the file we'll read it back again and // update our bc1 arguments ifstream cfg_in (cfgfilec); if (cfg_in) { text_tarray cfgline; while (read_cfg_line(cfg_in, cfgline) >= 0) { if (cfgline.size () >= 2) { if (cfgline[0] == "creator") { args["bc1contactemail"] = cfgline[1]; } else if (cfgline[0] == "collectionmeta") { if (cfgline[1] == "collectionname") { args["bc1fullname"] = cfgline[2]; } else if (cfgline[1] == "collectionextra") { args["bc1aboutdesc"] = carriage_replace (cfgline[2], 1); } } } } cfg_in.close(); } else { logout << "collectoraction::update_cfgfile_complete: unable to open " << cfgfilec << " for input\n"; message = "tmpfail"; } } } else { logout << "collectoraction::update_cfgfile_complete: unable to open " << cfgfilec << " for output\n"; message = "tmpfail"; } delete []cfgfilec; } // return html for buttons used in collector bar // color may be "green", "grey", or "yellow" // type may be: // "info" --> "collection information" button // "srce" --> "source data" button // "conf" --> "configure collection" button // "bild" --> "build collection" button // "view" --> "view collection" button // if enabled is true button will be flashy rollover type and // will be hyperlinked // Wendy left a comment suggesting this be move, but where to // was not specified. Into wizardaction.cpp? // set the _fullnamemenu_ macro (and _warnindex_ and _selectedindex_ if // we're on the "srce" page) void collectoraction::set_fullnamemenu (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { if (recpt == NULL) { logout << "ERROR (collectoraction::set_fullnamemenu): This action does not contain\n" << " information about any receptionists. The method set_receptionist was\n" << " probably not called from the module which instantiated this action.\n"; return; } text_t ¤t_page = args["p"]; text_t currentname = args[macro_prefix+"dirname"]; if (current_page == "srce") currentname = args[macro_prefix + "clonecol"]; text_tarray dirnames; text_tarray fullnames; vector write_protected; bool is_selected = false; int selected_index = 0; int index = 0; recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { if ((*rprotolist_here).p != NULL) { // don't include z39.50 collections comerror_t err = noError; if ((*rprotolist_here).p->get_protocol_name (err) == "z3950proto") { ++rprotolist_here; continue; } text_tarray collist; (*rprotolist_here).p->get_collection_list (collist, err, logout); if (err == noError) { text_tarray::iterator collist_here = collist.begin(); text_tarray::iterator collist_end = collist.end(); while (collist_here != collist_end) { ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); if (cinfo != NULL) { text_t collectionname = cinfo->get_collectionmeta("collectionname", args["l"]); if (collectionname.empty()) { collectionname = *collist_here; } dirnames.push_back(*collist_here); fullnames.push_back(collectionname); // check to see if the collection is writable if (collection_protected (*collist_here)) write_protected.push_back(true); else write_protected.push_back(false); if (*collist_here == currentname) { is_selected = true; selected_index = index; } ++index; } ++collist_here; } } } ++rprotolist_here; } bool first = true; text_t warnindex; text_t fullnamemenu = "\n"; fullnamemenu += "\n"; for (int i = 0; i < index; ++i) { // don't want write protected collections in list on "change existing // collection" page if (write_protected[i] && current_page == "existing") continue; fullnamemenu += "