/********************************************************************** * * mggdbmsource.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: mggdbmsource.cpp 650 1999-10-10 08:20:37Z sjboddie $ * *********************************************************************/ /* $Log$ Revision 1.16 1999/10/10 08:20:36 sjboddie - metadata now returns map rather than array - redesigned browsing support (although it's not finished so won't currently work ;-) Revision 1.15 1999/09/07 04:57:22 sjboddie added gpl notice Revision 1.14 1999/08/31 22:40:44 rjmcnab A couple of gdbm changes. Revision 1.13 1999/08/13 04:20:27 sjboddie added ability to get return all available metadata when 'fields' is empty Revision 1.12 1999/07/07 06:17:47 rjmcnab broke search_index into index+subcollection+language within mgsearch Revision 1.11 1999/07/01 03:49:54 rjmcnab fixed a small warning. Revision 1.10 1999/06/29 23:06:07 sjboddie Fixed up default index for get_document Revision 1.9 1999/06/16 02:00:34 sjboddie Few changes to get getParent filter option to return metadata of parents as well as current OID Revision 1.8 1999/05/10 03:43:48 sjboddie lots of changes to lots of files - getting document action going Revision 1.7 1999/04/30 02:00:47 sjboddie lots of stuff to do with getting documentaction working Revision 1.6 1999/04/21 22:40:44 sjboddie made another change to the one I just committed. if requested metadata doesn't exist it now puts an empty string in the response array so we don't always have to test that a value exists before using it. Revision 1.5 1999/04/21 05:23:46 sjboddie changed the way metadata is returned Revision 1.4 1999/04/19 23:56:07 rjmcnab Finished the gdbm metadata stuff Revision 1.3 1999/04/12 10:30:33 rjmcnab Made a little more progress. Revision 1.2 1999/04/12 05:21:51 rjmcnab Started on a mg and gdbm source. Revision 1.1 1999/04/12 03:40:40 rjmcnab Initial revision. */ #include "mggdbmsource.h" #include "fileutil.h" #include "OIDtools.h" mggdbmsourceclass::mggdbmsourceclass () { gdbmptr = NULL; mgsearchptr = NULL; } mggdbmsourceclass::~mggdbmsourceclass () { } void mggdbmsourceclass::configure (const text_t &key, const text_tarray &cfgline) { if (cfgline.size() >= 1) { const text_t &value = cfgline[0]; if (key == "collection") collection = value; else if (key == "collectdir") collectdir = value; else if (key == "gsdlhome") gsdlhome = value; } if (key == "indexmap") { indexmap.importmap (cfgline); } else if (key == "defaultindex") { indexmap.from2to (cfgline[0], defaultindex); } else if (key == "subcollectionmap") { subcollectionmap.importmap (cfgline); } else if (key == "defaultsubcollection") { subcollectionmap.from2to (cfgline[0], defaultsubcollection); } else if (key == "languagemap") { languagemap.importmap (cfgline); } else if (key == "defaultlanguage") languagemap.from2to (cfgline[0], defaultlanguage); } bool mggdbmsourceclass::init (ostream &logout) { outconvertclass text_t2ascii; if (!sourceclass::init (logout)) return false; // get the collection directory name if (collectdir.empty()) { collectdir = filename_cat (gsdlhome, "collect", collection); } // get the filename for the database and make sure it exists gdbm_filename = filename_cat(collectdir,"index","text",collection); #ifdef _LITTLE_ENDIAN gdbm_filename += ".ldb"; #else gdbm_filename += ".bdb"; #endif if (!file_exists(gdbm_filename)) { logout << text_t2ascii << "error: gdbm database \"" << gdbm_filename << "\" does not exist\n\n"; return false; } return true; } bool mggdbmsourceclass::translate_OID (const text_t &OIDin, text_t &OIDout, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; err = noError; if (gdbmptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: mggdbmsource contains a null gdbmclass\n\n"; err = configurationError; return true; } // open the database gdbmptr->setlogout(&logout); if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) { // most likely a system problem (we have already checked that the // gdbm database exists) logout << text_t2ascii << "system problem: open on gdbm database \"" << gdbm_filename << "\" failed\n\n"; err = systemProblem; return true; } infodbclass info; OIDout = gdbmptr->translate_OID (OIDin, info); return true; } bool mggdbmsourceclass::get_metadata (const text_t &/*requestParams*/, const text_t &/*refParams*/, bool getParents, const text_tset &fields, const text_t &OID, MetadataInfo_tmap &metadata, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; metadata.erase(metadata.begin(), metadata.end()); err = noError; if (gdbmptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: mggdbmsource contains a null gdbmclass\n\n"; err = configurationError; return true; } // open the database gdbmptr->setlogout(&logout); if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) { // most likely a system problem (we have already checked that the // gdbm database exists) logout << text_t2ascii << "system problem: open on gdbm database \"" << gdbm_filename << "\" failed\n\n"; err = systemProblem; return true; } // get the metadata - if getParents is set we need to get // info for all parents of OID as well as OID vector info_array; text_tarray OIDs; if (getParents) get_parents_array (OID, OIDs); OIDs.push_back (OID); text_tarray::const_iterator this_OID = OIDs.begin(); text_tarray::const_iterator end_OID = OIDs.end(); while (this_OID != end_OID) { infodbclass info; if (!gdbmptr->getinfo(*this_OID, info)) return false; // adjust the metadata text_t &contains = info["contains"]; if (contains.empty()) info["haschildren"] = 0; else info["haschildren"] = 1; contains.clear(); info_array.push_back(info); this_OID ++; } // if fields set is empty we want to get all available metadata text_tset tfields = fields; if (tfields.empty() && !info_array.empty()) { infodbclass::iterator t_info = info_array[0].begin(); infodbclass::iterator e_info = info_array[0].end(); while (t_info != e_info) { if ((*t_info).first != "contains") tfields.insert ((*t_info).first); t_info ++; } tfields.insert ("hasnext"); tfields.insert ("hasprevious"); } // collect together the metadata bool donenextprevtest = false; bool hasnext=false, hasprevious=false; MetadataInfo_t this_metadata; text_tarray *pos_metadata; text_tset::const_iterator fields_here = tfields.begin(); text_tset::const_iterator fields_end = tfields.end(); while (fields_here != fields_end) { this_metadata.clear(); this_metadata.isRef = false; vector::reverse_iterator this_info = info_array.rbegin(); vector::reverse_iterator end_info = info_array.rend(); MetadataInfo_t *tmetaptr = &this_metadata; while (this_info != end_info) { pos_metadata = (*this_info).getmultinfo(*fields_here); if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) { // collect metadata if (!donenextprevtest) { donenextprevtest = true; // cache parent contents array text_t thisparent = get_parent (OID); if (thisparent != parentOID) { parentOID = thisparent; parentcontents.erase(parentcontents.begin(), parentcontents.end()); if (gdbmptr->getinfo(parentOID, parentinfo)) { text_t &parentinfocontains = parentinfo["contains"]; if (!parentinfocontains.empty()) splitchar (parentinfocontains.begin(), parentinfocontains.end(), ';', parentcontents); } } // do tests text_tarray::const_iterator parentcontents_here = parentcontents.begin(); text_tarray::const_iterator parentcontents_end = parentcontents.end(); text_t shrunk_OID = OID; shrink_parent (shrunk_OID); while (parentcontents_here != parentcontents_end) { if (*parentcontents_here == shrunk_OID) { if (parentcontents_here == parentcontents.begin()) hasprevious = false; else hasprevious = true; parentcontents_here++; if (parentcontents_here == parentcontents.end()) hasnext = false; else hasnext = true; break; } parentcontents_here ++; } } // fill in metadata if ((*fields_here == "hasnext" && hasnext) || (*fields_here == "hasprevious" && hasprevious)) tmetaptr->values.push_back("1"); else tmetaptr->values.push_back("0"); } else if (pos_metadata != NULL && *fields_here != "contains") tmetaptr->values = *pos_metadata; else tmetaptr->values.push_back(""); this_info ++; if (this_info != end_info) { tmetaptr->parent = new MetadataInfo_t(); tmetaptr = tmetaptr->parent; } } metadata[*fields_here] = this_metadata; fields_here++; } return true; } bool mggdbmsourceclass::get_document (const text_t &OID, text_t &doc, comerror_t &err, ostream &logout) { outconvertclass text_t2ascii; err = noError; if (gdbmptr == NULL) { // most likely a configuration problem logout << text_t2ascii << "configuration error: mggdbmsource contains a null gdbmclass\n\n"; err = configurationError; return true; } // open the database gdbmptr->setlogout(&logout); if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) { // most likely a system problem (we have already checked that the // gdbm database exists) logout << text_t2ascii << "system problem: open on gdbm database \"" << gdbm_filename << "\" failed\n\n"; err = systemProblem; return true; } text_t tOID = OID; if (needs_translating (OID)) translate_OID (OID, tOID, err, logout); infodbclass info; if (!gdbmptr->getinfo(tOID, info)) return false; if (info["hastxt"].getint() == 1) { int docnum = info["docnum"].getint(); // set the collection directory mgsearchptr->setcollectdir (collectdir); // get the text mgsearchptr->docTargetDocument(defaultindex, defaultsubcollection, defaultlanguage, collection, docnum, doc); } return true; }