/********************************************************************** * * collectserver.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "collectserver.h" #include "OIDtools.h" #include #include "display.h" void check_if_valid_buildtype(const text_t& buildtype) { if (buildtype=="mg") { #ifndef ENABLE_MG cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl; #endif } else if (buildtype=="mgpp") { #ifndef ENABLE_MGPP cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl; #endif } else if (buildtype=="lucene") { #ifndef ENABLE_LUCENE cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl; #endif } else { cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl; } } void check_if_valid_infodbtype(const text_t& infodbtype) { if (infodbtype=="gdbm") { #ifndef USE_GDBM cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl; #endif } else if (infodbtype=="gdbm-txtgz") { #ifndef USE_GDBM cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl; #endif } else if (infodbtype=="jdbm") { #ifndef USE_JDBM cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl; #endif } else if (infodbtype=="sqlite") { #ifndef USE_SQLITE cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl; #endif } else if (infodbtype=="mssql") { #ifndef USE_MSSQL cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl; #endif } else { cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl; } } collectserver::collectserver () : collectinfo() { configinfo.collection = "null"; } collectserver::~collectserver () { // clean up the sources sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { if ((*source_here).s != NULL) delete (*source_here).s; ++source_here; } sources.clear(); // clean up the filters filtermapclass::iterator filter_here = filters.begin(); filtermapclass::iterator filter_end = filters.end(); while (filter_here != filter_end) { if ((*filter_here).second.f != NULL) delete (*filter_here).second.f; ++filter_here; } filters.clear(); } // configure should be called for each line in the // configuration files to configure the collection server and everything // it contains. The configuration should take place just before initialisation void collectserver::configure (const text_t &key, const text_tarray &cfgline) { if (cfgline.size() >= 1) { const text_t &value = cfgline[0]; if (key == "plugin") { //get the plugin name const text_t &name = cfgline[0]; if (name == "HTMLPlugin") { for (int hI = 1; hI < cfgline.size(); hI++) { const text_t &plugOption = cfgline[hI]; if (plugOption == "-use_realistic_book") { collectinfo.useBook = true; break; } } } } else if (key == "gsdlhome") configinfo.gsdlhome = value; else if (key == "gdbmhome") configinfo.dbhome = value; else if (key == "collecthome") configinfo.collecthome = value; else if (key == "collection") { configinfo.collection = value; collectinfo.shortInfo.name = value; } else if (key == "collectdir") configinfo.collectdir = value; else if (key == "host") collectinfo.shortInfo.host = value; else if (key == "port") collectinfo.shortInfo.port = value.getint(); else if (key == "public") { if (value == "true") collectinfo.isPublic = true; else collectinfo.isPublic = false; } else if (key == "beta") { if (value == "true") collectinfo.isBeta = true; else collectinfo.isBeta = false; } else if (key == "collectgroup") { if (value == "true") collectinfo.isCollectGroup = true; else collectinfo.isCollectGroup = false; } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline; else if (key == "supercollectionoptions") { text_tarray::const_iterator begin = cfgline.begin(); text_tarray::const_iterator end = cfgline.end(); while(begin != end) { if (*begin == "uniform_search_results_formatting") { collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting; } begin++; } } else if (key == "builddate") collectinfo.buildDate = value.getint(); else if (key == "languages") collectinfo.languages = cfgline; else if (key == "numdocs") collectinfo.numDocs = value.getint(); else if (key == "numsections") collectinfo.numSections = value.getint(); else if (key == "numwords") collectinfo.numWords = value.getint(); else if (key == "numbytes") collectinfo.numBytes = value.getint(); else if (key == "stemindexes") collectinfo.stemIndexes = value.getint(); else if (key == "collectionmeta") { // genuine collmeta get added as collectionmeta and collection_macros // .collmeta just get added as collection_macros text_t params; if (cfgline.size() == 3) { // get the params for later text_t::const_iterator first=cfgline[1].begin()+1; text_t::const_iterator last=cfgline[1].end()-1; params=substr(first, last); } text_t meta_name = cfgline[0]; if (*(meta_name.begin())=='.') { // a .xxx collectionmeta. strip off the . and // look it up in the indexmap to get the actual value text_t name = substr(cfgline[0].begin()+1,cfgline[0].end()); text_t new_name; if (indexmap.from2to(name, new_name)) { meta_name = new_name; } } else { // add them to collectionmeta text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]]; if (cfgline.size() == 2) { lang_map[g_EmptyText] = cfgline[1]; } else if (cfgline.size() == 3 ) { // get the lang out of params paramhashtype params_hash; splitparams(params, params_hash); text_t lang = params_hash["l"]; lang_map[lang] = cfgline[2]; if (lang_map[g_EmptyText].empty()) { // want the first one as the default if no default specified lang_map[g_EmptyText] = cfgline[2]; } } collectinfo.collectionmeta[cfgline[0]] = lang_map; } // add all collectionmeta to macro list text_tmap params_map = collectinfo.collection_macros[meta_name]; if (cfgline.size() == 2) {// no params for this macro params_map[g_EmptyText] = cfgline[1]; } else if (cfgline.size() == 3) {// has params params_map[params] = cfgline[2]; if (params_map[g_EmptyText].empty()) { params_map[g_EmptyText] = cfgline[2]; } } collectinfo.collection_macros[meta_name] = params_map; } else if (key == "collectionmacro") { text_t nobrackets; text_tmap params_map = collectinfo.collection_macros[cfgline[0]]; // add all to macro list if (cfgline.size() == 2) { // no params for this macro params_map[g_EmptyText] = cfgline[1]; } else if (cfgline.size() == 3) {// has params // strip [ ] brackets from params text_t::const_iterator first=cfgline[1].begin()+1; text_t::const_iterator last=cfgline[1].end()-1; nobrackets=substr(first, last); params_map[nobrackets] = cfgline[2]; } collectinfo.collection_macros[cfgline[0]] = params_map; } else if (key == "format" && cfgline.size() == 2) collectinfo.format[cfgline[0]] = cfgline[1]; else if (key == "building" && cfgline.size() == 2) collectinfo.building[cfgline[0]] = cfgline[1]; else if (key == "httpdomain") collectinfo.httpdomain = value; else if (key == "httpprefix") collectinfo.httpprefix = value; else if (key == "receptionist") collectinfo.receptionist = value; else if (key == "buildtype") { check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid collectinfo.buildType = value; } // backwards compatibility - searchytpes is now a format statement else if (key == "searchtype") { // means buildtype is mgpp if (collectinfo.buildType.empty()) { check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid collectinfo.buildType = "mgpp"; } joinchar(cfgline, ',', collectinfo.format["SearchTypes"]); //collectinfo.searchTypes = cfgline; } else if (key == "infodbtype") { check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid collectinfo.infodbType = value; } else if (key == "separate_cjk") { if (value == "true") collectinfo.isSegmented = true; else collectinfo.isSegmented = false; } // What have we set in our collect.cfg file : document or collection ? else if (key == "authenticate") collectinfo.authenticate = value; // What have we set for our group list else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group); // build.cfg, earliestDatestamp of this collection needed for // OAIServer to work out earliestDatestamp of this repository else if (key == "earliestdatestamp") { collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg } // store all the mappings for use when collection meta is read later // (build.cfg read before collect.cfg) else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") { indexmap.importmap (cfgline, true); } // In the map the key-value pair contain the same // data i.e key == data, if key is 2 then data is 2 // What have we set for our public_documents ACL else if (key == "public_documents") { text_tarray::const_iterator begin = cfgline.begin(); text_tarray::const_iterator end = cfgline.end(); while(begin != end) { // key = data i.e if key is 2 then data is 2 // collectinfo.public_documents[*begin] is the key // *begin is the data value collectinfo.public_documents[*begin] = *begin; ++begin; } } // What have we set for our private_documents ACL else if (key == "private_documents") { text_tarray::const_iterator begin = cfgline.begin(); text_tarray::const_iterator end = cfgline.end(); while(begin != end) { // key = data i.e if key is 2 then data is 2 // collectinfo.public_documents[*begin] is the key // *begin is the data value collectinfo.private_documents[*begin] = *begin; ++begin; } } // dynamic_classifier "" else if (key == "dynamic_classifier") { collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1]; } } // configure the filters filtermapclass::iterator filter_here = filters.begin(); filtermapclass::iterator filter_end = filters.end(); while (filter_here != filter_end) { assert ((*filter_here).second.f != NULL); if ((*filter_here).second.f != NULL) (*filter_here).second.f->configure(key, cfgline); ++filter_here; } // configure the sources sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if ((*source_here).s != NULL) (*source_here).s->configure(key, cfgline); ++source_here; } } void collectserver::configure (const text_t &key, const text_t &value) { text_tarray cfgline; cfgline.push_back (value); configure(key, cfgline); } void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) { // if we've not been properly configured, then it is a foregone // conclusion that we cannot be active if (this->configinfo.collection == "null") { wasSuccess = false; } // if no build date exists, then the collection was probably not built; // ditto if the number of documents is zero, then something is pretty // wrong else if (this->collectinfo.buildDate == 0 || this->collectinfo.numDocs == 0) { wasSuccess = false; } // it is probably okay else wasSuccess = true; } bool collectserver::init (ostream &logout) { // delete the indexmap indexmap.clear(); // init the filters filtermapclass::iterator filter_here = filters.begin(); filtermapclass::iterator filter_end = filters.end(); while (filter_here != filter_end) { assert ((*filter_here).second.f != NULL); if (((*filter_here).second.f != NULL) && !(*filter_here).second.f->init(logout)) return false; ++filter_here; } // init the sources sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if (((*source_here).s != NULL) && !(*source_here).s->init(logout)) return false; ++source_here; } return true; } void collectserver::get_collectinfo (ColInfoResponse_t &reponse, comerror_t &err, ostream &/*logout*/) { reponse = collectinfo; err = noError; } void collectserver::get_filterinfo (InfoFiltersResponse_t &response, comerror_t &err, ostream &/*logout*/) { response.clear (); // get a list of filter names filtermapclass::iterator filter_here = filters.begin(); filtermapclass::iterator filter_end = filters.end(); while (filter_here != filter_end) { response.filterNames.insert ((*filter_here).first); ++filter_here; } err = noError; } void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request, InfoFilterOptionsResponse_t &response, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; filterclass *thisfilter = filters.getfilter(request.filterName); if (thisfilter != NULL) { thisfilter->get_filteroptions (response, err, logout); } else { response.clear (); err = protocolError; text_t& infodbtype = collectinfo.infodbType; // Don't print out the warning if were's asking about SQLQueryFilter // when we know the infodbtype is something other than .*sql.* if ((request.filterName != "SQLQueryFilter") || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) { logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n" << "filter \"" << request.filterName << "\".\n\n"; } } } void collectserver::filter (FilterRequest_t &request, FilterResponse_t &response, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; // translate any ".fc", ".pr" etc. stuff in the docSet text_t translatedOID; text_tarray translatedOIDs; text_tarray::iterator doc_here = request.docSet.begin(); text_tarray::iterator doc_end = request.docSet.end(); while (doc_here != doc_end) { if (needs_translating (*doc_here)) { sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if (((*source_here).s != NULL) && ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) { if (err != noError) return; break; } ++source_here; } translatedOIDs.push_back (translatedOID); } else { translatedOIDs.push_back (*doc_here); } ++doc_here; } request.docSet = translatedOIDs; response.clear(); filterclass *thisfilter = filters.getfilter(request.filterName); if (thisfilter != NULL) { // filter the data thisfilter->filter (request, response, err, logout); if (err != noError) return; // fill in the metadata for each of the OIDs (if it is requested) if (request.filterResultOptions & FRmetadata) { bool processed = false; ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin(); ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end(); while (resultdoc_here != resultdoc_end) { // try each of the sources in turn sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if (((*source_here).s != NULL) && ((*source_here).s->get_metadata(request.requestParams, request.refParams, request.getParents, request.fields, (*resultdoc_here).OID, (*resultdoc_here).metadata, err, logout))) { if (err != noError) return; processed = true; break; } ++source_here; } if (!processed) { logout << text_t2ascii << "Protocol Error: nothing processed for " << "filter \"" << request.filterName << "\".\n\n"; err = protocolError; return; } ++resultdoc_here; } } err = noError; } else { response.clear (); err = protocolError; logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n" << "filter \"" << request.filterName << "\".\n\n"; } } void collectserver::get_document (const DocumentRequest_t &request, DocumentResponse_t &response, comerror_t &err, ostream &logout) { sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if (((*source_here).s != NULL) && ((*source_here).s->get_document (request.OID, response.doc, err, logout))) { if (err != noError) return; break; } ++source_here; } } void collectserver::is_searchable (bool &issearchable, comerror_t &err, ostream &logout) { sourcelistclass::iterator source_here = sources.begin(); sourcelistclass::iterator source_end = sources.end(); while (source_here != source_end) { assert ((*source_here).s != NULL); if (((*source_here).s != NULL) && ((*source_here).s->is_searchable (issearchable, err, logout))) { if (err != noError) return; break; } ++source_here; } } bool operator==(const collectserverptr &x, const collectserverptr &y) { return (x.c == y.c); } bool operator<(const collectserverptr &x, const collectserverptr &y) { return (x.c < y.c); } // thecollectserver remains the property of the calling code but // should not be deleted until it is removed from this list. void collectservermapclass::addcollectserver (collectserver *thecollectserver) { // can't add a null collection server assert (thecollectserver != NULL); if (thecollectserver == NULL) return; // can't add an collection server with no collection name assert (!(thecollectserver->get_collection_name()).empty()); if ((thecollectserver->get_collection_name()).empty()) return; collectserverptr cptr; cptr.c = thecollectserver; collectserverptrs[thecollectserver->get_collection_name()] = cptr; } // getcollectserver will return NULL if the collectserver could not be found collectserver *collectservermapclass::getcollectserver (const text_t &collection) { // can't find a collection with no name if (collection.empty()) return NULL; iterator here = collectserverptrs.find (collection); if (here == collectserverptrs.end()) return NULL; return (*here).second.c; }