/********************************************************************** * * source.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "source.h" #include "fileutil.h" #include "OIDtools.h" #include sourceclass::sourceclass () { db_ptr = NULL; textsearchptr = NULL; classname = "source"; } sourceclass::~sourceclass () { if (db_ptr != NULL) delete db_ptr; if (textsearchptr != NULL) delete textsearchptr; } // configure should be called once for each configuration line void sourceclass::configure (const text_t &key, const text_tarray &cfgline) { if (cfgline.size() >= 1) { const text_t &value = cfgline[0]; if (key == "collection") collection = value; else if (key == "collectdir") collectdir = value; else if (key == "gsdlhome") gsdlhome = value; else if (key == "collecthome") collecthome = value; else if (key == "gdbmhome") dbhome = value; } if (key == "indexmap") { indexmap.importmap (cfgline); } else if (key == "defaultindex") { indexmap.from2to (cfgline[0], defaultindex); } else if (key == "subcollectionmap") { subcollectionmap.importmap (cfgline); } else if (key == "defaultsubcollection") { subcollectionmap.from2to (cfgline[0], defaultsubcollection); } else if (key == "languagemap") { languagemap.importmap (cfgline); } else if (key == "defaultlanguage") { languagemap.from2to (cfgline[0], defaultlanguage); } else if (key == "indexstem") { indexstem = cfgline[0]; } } text_t sourceclass::resolve_db_filename(const text_t& idx, const text_t& file_ext) { // This is an exact copy of the method (of the same name) in filterclass // Makes sense to merge them, in which either gsdlhome, collecthome, // dbhome, and collection need to also be passed in as parameters, // or else there is some notion of a shared base class that both // filter and source inherit from // NB: there is an even greater opportunity to share more code in this // function if sql_db_ptr/db_ptr and db_filename and sql_db_filename // are also drawn from one object text_t resolved_filename; if (gsdlhome==dbhome) { // dbhome has defaulted to gsdlhome which we take to means the // database has been specifically moved out of gsdlhome area. // => it should be whereever collecthome is set to resolved_filename = filename_cat(collecthome, collection, "index", "text", idx); } else { // dbhome is explicitly set to something other than gsdlhome // => use dbhome resolved_filename = filename_cat(dbhome, "collect", collection, "index", "text", idx); } resolved_filename += file_ext; return resolved_filename; } // init should be called after all the configuration is done but // before any other methods are called bool sourceclass::init (ostream &logout) { outconvertclass text_t2ascii; if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect"); if (dbhome.empty()) dbhome = gsdlhome; if (defaultindex.empty()) { // use first index in map as default if no default is set explicitly text_tarray toarray; indexmap.gettoarray(toarray); if (toarray.size()) { defaultindex = toarray[0]; } } if (defaultsubcollection.empty()) { // use first subcollection in map as default if no default is set explicitly text_tarray toarray; subcollectionmap.gettoarray(toarray); if (toarray.size()) { defaultsubcollection = toarray[0]; } } if (defaultlanguage.empty()) { // use first language in map as default if no default is set explicitly text_tarray toarray; languagemap.gettoarray(toarray); if (toarray.size()) { defaultlanguage = toarray[0]; } } // get the collection directory name if (collectdir.empty()) { collectdir = filename_cat (collecthome, collection); } if (db_ptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: queryfilter contains a null dbclass\n\n"; return false; } // get the filename for the database and make sure it exists if (indexstem.empty()) { indexstem = collection; } db_filename = resolve_db_filename(indexstem, db_ptr->getfileextension()); if (!file_exists(db_filename)) { logout << text_t2ascii << "warning: database \"" << db_filename << "\" does not exist\n\n"; // return false; } return true; } // translate_OID translates OIDs using ".pr", ."fc" etc. bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; err = noError; if (db_ptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: " << classname << " contains a null dbclass\n\n"; err = configurationError; return true; } // open the database db_ptr->setlogout(&logout); if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) { // most likely a system problem (we have already checked that the database exists) logout << text_t2ascii << "system problem: open on database \"" << db_filename << "\" failed\n\n"; err = systemProblem; return true; } infodbclass info; OIDout = db_ptr->translate_OID (OIDin, info); db_ptr->closedatabase(); // Important that local library doesn't leave any files open return true; } // get_metadata fills out the metadata if possible, if it is not responsible // for the given OID then it will return false. bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams, bool getParents, const text_tset &fields, const text_t &OID, MetadataInfo_tmap &metadata, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; metadata.erase(metadata.begin(), metadata.end()); err = noError; if (db_ptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: " << classname <<" contains a null dbclass\n\n"; err = configurationError; return true; } // open the database db_ptr->setlogout(&logout); if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) { // most likely a system problem (we have already checked that the database exists) logout << text_t2ascii << "system problem: open on database \"" << db_filename << "\" failed\n\n"; err = systemProblem; return true; } // get the metadata - if getParents is set we need to get // info for all parents of OID as well as OID vector info_array; text_tarray OIDs; if (getParents) get_parents_array (OID, OIDs); OIDs.push_back (OID); text_tarray::const_iterator this_OID = OIDs.begin(); text_tarray::const_iterator end_OID = OIDs.end(); while (this_OID != end_OID) { infodbclass info; if (!db_ptr->getinfo(*this_OID, info)) return false; // adjust the metadata text_t &contains = info["contains"]; if (contains.empty()) info["haschildren"] = 0; else info["haschildren"] = 1; //contains.clear(); info_array.push_back(info); ++this_OID; } // if fields set is empty we want to get all available metadata text_tset tfields = fields; if (tfields.empty() && !info_array.empty()) { infodbclass::iterator t_info = info_array[0].begin(); infodbclass::iterator e_info = info_array[0].end(); while (t_info != e_info) { if ((*t_info).first != "contains") tfields.insert ((*t_info).first); ++t_info; } tfields.insert ("hasnext"); tfields.insert ("hasprevious"); } // collect together the metadata bool donenextprevtest = false; bool hasnext=false, hasprevious=false; MetadataInfo_t this_metadata; text_tarray *pos_metadata; text_tset::const_iterator fields_here = tfields.begin(); text_tset::const_iterator fields_end = tfields.end(); while (fields_here != fields_end) { this_metadata.clear(); this_metadata.isRef = false; vector::reverse_iterator this_info = info_array.rbegin(); vector::reverse_iterator end_info = info_array.rend(); MetadataInfo_t *tmetaptr = &this_metadata; while (this_info != end_info) { pos_metadata = (*this_info).getmultinfo(*fields_here); if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) { // collect metadata if (!donenextprevtest) { donenextprevtest = true; // cache parent contents array text_t thisparent = get_parent (OID); if (!thisparent.empty()) { if (thisparent != parentOID) { parentOID = thisparent; parentcontents.erase(parentcontents.begin(), parentcontents.end()); if (db_ptr->getinfo(parentOID, parentinfo)) { text_t &parentinfocontains = parentinfo["contains"]; if (!parentinfocontains.empty()) splitchar (parentinfocontains.begin(), parentinfocontains.end(), ';', parentcontents); } } // do tests text_tarray::const_iterator parentcontents_here = parentcontents.begin(); text_tarray::const_iterator parentcontents_end = parentcontents.end(); text_t shrunk_OID = OID; shrink_parent (shrunk_OID); while (parentcontents_here != parentcontents_end) { if (*parentcontents_here == shrunk_OID) { if (parentcontents_here == parentcontents.begin()) hasprevious = false; else hasprevious = true; ++parentcontents_here; if (parentcontents_here == parentcontents.end()) hasnext = false; else hasnext = true; break; } ++parentcontents_here; } // fill in metadata if ((*fields_here == "hasnext" && hasnext) || (*fields_here == "hasprevious" && hasprevious)) tmetaptr->values.push_back("1"); else tmetaptr->values.push_back("0"); } else tmetaptr->values.push_back("0"); } } //else if (pos_metadata != NULL && *fields_here != "contains") { else if (pos_metadata != NULL) { tmetaptr->values = *pos_metadata; } else tmetaptr->values.push_back(""); ++this_info; if (this_info != end_info) { tmetaptr->parent = new MetadataInfo_t(); tmetaptr = tmetaptr->parent; } } metadata[*fields_here] = this_metadata; ++fields_here; } db_ptr->closedatabase(); // Important that local library doesn't leave any files open return true; } bool sourceclass::get_document (const text_t &OID, text_t &doc, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; err = noError; if (db_ptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: " << classname << " contains a null dbclass\n\n"; err = configurationError; return true; } // open the database db_ptr->setlogout(&logout); if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) { // most likely a system problem (we have already checked that the database exists) logout << text_t2ascii << "system problem: open on database \"" << db_filename << "\" failed\n\n"; err = systemProblem; return true; } text_t tOID = OID; if (needs_translating (OID)) translate_OID (OID, tOID, err, logout); infodbclass info; if (!db_ptr->getinfo(tOID, info)) { db_ptr->closedatabase(); // Important that local library doesn't leave any files open return false; } if (info["hastxt"].getint() == 1) { int docnum = info["docnum"].getint(); // set the collection directory textsearchptr->setcollectdir (collectdir); // get the text textsearchptr->docTargetDocument(defaultindex, defaultsubcollection, defaultlanguage, collection, docnum, doc); // remove the and tags doc.replace("", ""); doc.replace("", ""); doc.replace("", ""); doc.replace("", ""); } db_ptr->closedatabase(); // Important that local library doesn't leave any files open return true; } bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout) { err = noError; issearchable = false; text_tarray fromarray; indexmap.getfromarray(fromarray); if (fromarray.size() == 0) { return true; } else if (fromarray.size() == 1) { if (fromarray[0] == "dummy:text") { // always return true - issearchable is false here though return true; } } issearchable = true; return true; } bool operator==(const sourceptr &x, const sourceptr &y) { return (x.s == y.s); } bool operator<(const sourceptr &x, const sourceptr &y) { return (x.s < y.s); } // thesource remains the property of the calling code but // should not be deleted until it is removed from this list. void sourcelistclass::addsource (sourceclass *thesource) { // can't add a source that doesn't exist assert (thesource != NULL); if (thesource == NULL) return; sourceptr sp; sp.s = thesource; sourceptrs.push_back(sp); }