/********************************************************************** * * collectset.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "collectset.h" #include "collectserver.h" #include "colservrconfig.h" #include "gsdlsitecfg.h" #include "gdbmclass.h" #include "jdbmnaiveclass.h" #include "gsdltools.h" #include "fileutil.h" #include "filter.h" #include "browsefilter.h" #include "sqlbrowsefilter.h" #include "queryfilter.h" #ifdef ENABLE_MG #include "mgqueryfilter.h" #include "mgsource.h" #endif #ifdef ENABLE_MGPP #include "mgppqueryfilter.h" #include "mgppsource.h" #endif #ifdef ENABLE_LUCENE #include "lucenequeryfilter.h" #include "lucenesource.h" #endif #include #ifdef USE_SQLITE #include "sqlitedbclass.h" #endif #ifdef USE_MSSQL #include "mssqldbclass.h" #endif collectset::collectset (text_t& gsdlhome, text_t& collecthome) { // gsdlhome and collecthome will be set as a result of calling this function // collecthome will default to "/collect" if not explicitly // specified in config file text_tarray collections; #ifdef ENABLE_MG mgsearch = NULL; #endif #ifdef ENABLE_MGPP mgppsearch = NULL; #endif #ifdef ENABLE_LUCENE lucenesearch = NULL; #endif // get gsdlhome (if we fail the error will be picked up later -- in // cgiwrapper) if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) { if (!gsdlhome.empty() && directory_exists(gsdlhome)) { if (read_dir (collecthome, collections)) { text_tarray::const_iterator thiscol = collections.begin(); text_tarray::const_iterator endcol = collections.end(); while (thiscol != endcol) { // ignore the modelcol if (*thiscol == "modelcol") { ++thiscol; continue; } this->add_collection (*thiscol, gsdlhome, collecthome); ++thiscol; } this->add_all_collection_groups(gsdlhome, collecthome); } } } set_gsdl_env_vars(gsdlhome); } collectset::collectset (text_t& httpprefix_arg) { httpprefix = httpprefix_arg; #ifdef ENABLE_MG mgsearch = NULL; #endif #ifdef ENABLE_MGPP mgppsearch = NULL; #endif #ifdef ENABLE_LUCENE lucenesearch = NULL; #endif } collectset::collectset () { #ifdef ENABLE_MG mgsearch = NULL; #endif #ifdef ENABLE_MGPP mgppsearch = NULL; #endif #ifdef ENABLE_LUCENE lucenesearch = NULL; #endif } collectset::~collectset () { collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { if ((*here).second.c != NULL) { delete (*here).second.c; } ++here; } cservers.clear(); } bool collectset::init (ostream &logout) { collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { assert ((*here).second.c != NULL); if ((*here).second.c != NULL) { const colservrconf &configinfo = (*here).second.c->get_configinfo (); // configure this collection server // note that we read build.cfg before collect.cfg so that the indexmaps // are available to decode defaultindex, defaultsubcollection, and // defaultlanguage bool failed_build_cfg = false; if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome, configinfo.collecthome, configinfo.collection)) { failed_build_cfg = true; outconvertclass text_t2ascii; logout << text_t2ascii << "Warning: couldn't read build.cfg file for collection \"" << configinfo.collection << "\"" << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n" << " collecthome=\"" << configinfo.collecthome << "\"\n"; } bool failed_collect_cfg = false; if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome, configinfo.collecthome, configinfo.collection)) { failed_collect_cfg = true; outconvertclass text_t2ascii; logout << text_t2ascii << "Warning: couldn't read collect.cfg file for collection \"" << configinfo.collection << "\"" << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n" << " collecthome=\"" << configinfo.collecthome << "\"\n"; } bool is_colgroup = (*here).second.c->is_collection_group(); if (failed_collect_cfg) { ++here; continue; } if (failed_build_cfg && (!is_colgroup)) { ++here; continue; } // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true' if (!(*here).second.c->init (logout)) return false; (*here).second.c->configure("httpdomain",httpdomain); (*here).second.c->configure("httpprefix",httpprefix); } ++here; } return true; } collectservermapclass collectset::servers() { return cservers; } void collectset::add_all_collections(const text_t &gsdlhome, const text_t& collecthome) { text_tarray collections; if (read_dir(collecthome, collections)) { text_tarray::const_iterator thiscol = collections.begin(); text_tarray::const_iterator endcol = collections.end(); while (thiscol != endcol) { // ignore the modelcol if (*thiscol == "modelcol") { ++thiscol; continue; } // create collection server for this collection this->add_collection (*thiscol, gsdlhome, collecthome); ++thiscol; } this->add_all_collection_groups(gsdlhome,collecthome); } } bool collectset::collection_is_collect_group (const text_t& collect_dir) { text_t is_collect_group_str = "false"; text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg"); if (file_exists(collect_cfg)) { char *collect_cfgc = collect_cfg.getcstr(); ifstream confin(collect_cfgc); if (confin) { text_tarray cfgline; while (read_cfg_line(confin, cfgline) >= 0) { if (cfgline.size() == 2) { text_t key = cfgline[0]; cfgline.erase(cfgline.begin()); if (key == "collectgroup") { is_collect_group_str = cfgline[0]; break; } } } confin.close(); } delete []collect_cfgc; } bool is_collect_group = (is_collect_group_str == "true") ? true : false; return is_collect_group; } // add_collection sets up the collectionserver and calls // add_collectserver void collectset::add_collection (const text_t& collection, const text_t& gsdlhome, const text_t& collecthome) { // read config file to see if built with mg, mgpp, or lucene text_t buildtype = "mg"; // mg is default text_t infodbtype = "gdbm"; // gdbm is default this->remove_collection(collection); collectserver *cserver = NULL; text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg"); if (file_exists (build_cfg)) { char *build_cfgc = build_cfg.getcstr(); ifstream confin(build_cfgc); if (confin) { text_tarray cfgline; while (read_cfg_line(confin, cfgline) >= 0) { if (cfgline.size() == 2) { text_t key = cfgline[0]; cfgline.erase(cfgline.begin()); if (key == "buildtype") { buildtype = cfgline[0]; } if (key == "infodbtype") { infodbtype = cfgline[0]; } } } confin.close(); } delete []build_cfgc; cserver = new collectserver(); // Create a dbclass of the correct type dbclass *db_ptr = NULL; #ifdef USE_SQLITE if (infodbtype == "sqlite") { sqlitedbclass *sql_db_ptr = new sqlitedbclass(); db_ptr = sql_db_ptr; // add a sql browse filter sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass(); sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr); cserver->add_filter (sqlbrowsefilter); } #endif #ifdef USE_MSSQL if (infodbtype == "mssql") { mssqldbclass *mssql_db_ptr = new mssqldbclass(); db_ptr = mssql_db_ptr; // add a sql browse filter sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass(); sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr); cserver->add_filter (sqlbrowsefilter); } #endif if (infodbtype == "jdbm") { jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome); db_ptr = jdbm_db_ptr; } // Use GDBM if the infodb type is empty or not one of the values above if (db_ptr == NULL) { db_ptr = new gdbmclass(); } // add a null filter filterclass *filter = new filterclass (); cserver->add_filter (filter); // add a browse filter browsefilterclass *browsefilter = new browsefilterclass(); browsefilter->set_db_ptr(db_ptr); cserver->add_filter (browsefilter); if (buildtype == "mg") { #ifdef ENABLE_MG mgsearch = new mgsearchclass(); // add a query filter mgqueryfilterclass *queryfilter = new mgqueryfilterclass(); queryfilter->set_db_ptr(db_ptr); queryfilter->set_textsearchptr (mgsearch); cserver->add_filter (queryfilter); // add a mg source mgsourceclass *mgsource = new mgsourceclass (); mgsource->set_db_ptr(db_ptr); mgsource->set_textsearchptr (mgsearch); cserver->add_source (mgsource); #else cerr << "Error: buildtype " << buildtype << " not enabled." << endl; #endif } else if (buildtype == "mgpp") { #ifdef ENABLE_MGPP mgppsearch = new mgppsearchclass(); // add a query filter mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass(); queryfilter->set_db_ptr(db_ptr); queryfilter->set_textsearchptr (mgppsearch); cserver->add_filter (queryfilter); // add a mgpp source mgppsourceclass *mgppsource = new mgppsourceclass (); mgppsource->set_db_ptr(db_ptr); mgppsource->set_textsearchptr (mgppsearch); cserver->add_source (mgppsource); #else cerr << "Error: buildtype " << buildtype << " not enabled." << endl; #endif } else if (buildtype == "lucene") { #ifdef ENABLE_LUCENE lucenesearch = new lucenesearchclass(); lucenesearch->set_gsdlhome(gsdlhome); // add a query filter lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass(); queryfilter->set_db_ptr(db_ptr); queryfilter->set_textsearchptr (lucenesearch); cserver->add_filter (queryfilter); // add a lucene source lucenesourceclass *lucenesource = new lucenesourceclass (); lucenesource->set_db_ptr(db_ptr); lucenesource->set_textsearchptr (lucenesearch); cserver->add_source (lucenesource); #else cerr << "Error: buildtype " << buildtype << " not enabled." << endl; #endif } else { cerr << "Warning: unrecognized buildtype " << buildtype << endl; } } else { // see if it is a collectgroup col text_t this_collect_dir = filename_cat(collecthome, collection); if (collection_is_collect_group(this_collect_dir)) { // by this point we know we will need a cserver cserver = new collectserver(); } // else not a collect group, or there was no collect.cfg // => leave cserver as NULL so it will not be added into cservers } if (cserver != NULL) { // inform collection server and everything it contains about its // collection name cserver->configure ("collection", collection); cserver->configure ("gsdlhome", gsdlhome); cserver->configure ("collecthome", collecthome); cservers.addcollectserver (cserver); } } void collectset::remove_all_collections () { #ifdef ENABLE_MG // first unload any cached mg databases if (mgsearch != NULL) { mgsearch->unload_database(); } #endif // now delete the collection server objects collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { if ((*here).second.c != NULL) { delete (*here).second.c; } ++here; } cservers.clear(); } void collectset::add_collection_group(const text_t& collection, const text_t& gsdlhome, const text_t& collecthome) { text_tarray group; text_t collect_group_dir = filename_cat (collecthome, collection); // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet if (collection_is_collect_group(collect_group_dir)) { if (read_dir (collect_group_dir, group)) { text_tarray::const_iterator thiscol = group.begin(); text_tarray::const_iterator endcol = group.end(); while (thiscol != endcol) { // ignore the etc directory if (*thiscol == "etc") { ++thiscol; continue; } //text_t group_col = filename_cat(collection,*thiscol); // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows text_t group_col = collection + "/" + *thiscol; this->add_collection (group_col, gsdlhome, collecthome); ++thiscol; } } } } void collectset::add_all_collection_groups (const text_t& gsdlhome, const text_t& collecthome) { collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { text_t collection = (*here).second.c->get_collection_name(); this->add_collection_group(collection,gsdlhome,collecthome); ++here; } } // remove_collection deletes the collection server of collection. // This only needs to be called if a collectionserver is to be // removed while the library is running. The destructor function // cleans up all collectservers when the program exits. void collectset::remove_collection (const text_t &collection) { // do nothing if no collection server exists for this collection if (cservers.getcollectserver(collection) == NULL) return; #ifdef ENABLE_MG // first unload any cached mg databases - we may need to do something // similar to this for mgpp and lucene too if (mgsearch != NULL) { mgsearch->unload_database(); } #endif // now delete the collection server object collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { if ((*here).second.c != NULL && (*here).first == collection) { delete (*here).second.c; cservers.erase (here); return; } ++here; } } // remove_collection deletes the collection server of collection. // This only needs to be called if a collectionserver is to be // removed while the library is running. The destructor function // cleans up all collectservers when the program exits. void collectset::remove_collection (const text_t &collection, ostream &logout) { remove_collection(collection); outconvertclass text_t2ascii; logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for " << collection << "\n"; } void collectset::configure(const text_t &key, const text_tarray &cfgline) { if ((key == "collection") || (key == "collectdir")) return; collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { assert ((*here).second.c != NULL); if ((*here).second.c != NULL) { if (key == "collectinfo") { if ((*here).first == cfgline[0]) { if (cfgline.size()==3) { (*here).second.c->configure ("gsdlhome", cfgline[1]); (*here).second.c->configure ("gdbmhome", cfgline[2]); } else { (*here).second.c->configure ("gsdlhome", cfgline[1]); (*here).second.c->configure ("collecthome", cfgline[2]); (*here).second.c->configure ("gdbmhome", cfgline[3]); } } } else { (*here).second.c->configure (key, cfgline); } } ++here; } } void collectset::getCollectionList (text_tarray &collist) { collist.erase(collist.begin(),collist.end()); collectservermapclass::iterator here = cservers.begin(); collectservermapclass::iterator end = cservers.end(); while (here != end) { assert ((*here).second.c != NULL); if ((*here).second.c != NULL) { collist.push_back ((*here).second.c->get_collection_name()); } ++here; } }