/********************************************************************** * * z3950proto.cpp -- * Copyright (C) 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "z3950proto.h" #include "comtypes.h" #include "browsefilter.h" #include "queryfilter.h" #include "filter.h" // z39.50 yaz stuff #include // for (FILE *) type for yyin and fopen. // config file parsing stuff #include "z3950cfg.h" // for reading in config files - // defines "struct z3950cfg *zserver_list" as the head of the list. // note! yyin is hash-defined in z3950cfg.h to something else, to avoid // name conflicts with "other" yyins. extern FILE *yyin; extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex) extern "C" { extern int zconfigparse(); } z3950proto::z3950proto() { zserver_count=0; } z3950proto::~z3950proto() { } void z3950proto::add_server (z3950_server& zserver) { // append the new server zserver_count++; zservers.push_back(&zserver); } void z3950proto::read_config_file(const text_t &filename, const text_t &errf) { struct z3950cfg *here; struct z3950cfg *oldhere; z3950_server *zserver; ShortColInfo_t *tempinfo; // FILE *errfile declared in z3950cfg.h, defined in zparse.y char *errf_str=errf.getcstr(); if ((errfile=fopen(errf_str,"a"))==NULL) { // what do we do if we can't open the error file? // this means that errors will go to stderr, which may stuff up // any cgi headers and the page. errfile=stderr; } delete errf_str; yyout=errfile; // zconfigparse() is defined in zparse.tab.c, // which is the bison output of zparse.y char *filename_str=filename.getcstr(); yyin=fopen(filename_str,"r"); if (yyin==NULL) { cerr << "Could not open "<host.setcstr(here->hostname); tempinfo->port=here->port; tempinfo->name.setcstr(here->dbname); zserver->setInfo(tempinfo); zserver->setName(here->shortname); // now collection metadata. zserver->setMeta("collectionname",here->longname); if (here->icon!=NULL) zserver->setMeta("iconcollection",here->icon); if (here->smallicon!=NULL) zserver->setMeta("iconcollectionsmall",here->smallicon); /* filterclass *filter = new filterclass (); zserver->add_filter (filter); browsefilterclass *browsefilter = new browsefilterclass(); zserver->add_filter (browsefilter); queryfilterclass *queryfilter = new queryfilterclass(); zserver->add_filter (queryfilter); */ // About list if (here->about!=NULL) { struct z3950aboutlist *about_here=here->about; struct z3950aboutlist *oldabout; while (about_here!=NULL) { // problem with default lang (null): can't add ("",..) if (about_here->lang==NULL) zserver->addcfgAbout("en",about_here->text); else zserver->addcfgAbout(about_here->lang, about_here->text); oldabout=about_here; about_here=about_here->next; free(oldabout->lang); free(oldabout->text); free(oldabout); } } oldhere=here; here=here->next; free(oldhere->shortname); // these 4 strings should all be non-NULL... free(oldhere->hostname); free(oldhere->dbname); free(oldhere->longname); if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL if (oldhere->smallicon) free(oldhere->smallicon); free(oldhere); add_server(*zserver); } // end of while loop. } void z3950proto::configure (const text_t &/*key*/, const text_tarray &/*cfgline*/) { // this is called for each line in the gsdlsite.cfg file } bool z3950proto::init (ostream &/*logout*/) { // set up tcp connection to server here? // we might also read in the config file here (instead of librarymain.cpp) // // logout goes to initout.txt // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n"; //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n"; return true; } /*text_t z3950proto::get_protocol_name () { return "z3950proto"; } */ void z3950proto::get_collection_list (text_tarray &collist, comerror_t &/*err*/, ostream &/*logout*/) { z3950_server_array::iterator here = zservers.begin(); z3950_server_array::iterator end = zservers.end(); while (here != end) { collist.push_back((*here)->getName()); here++; } } void z3950proto::has_collection (const text_t &collection, bool &hascollection, comerror_t &/*err*/, ostream &/*logout*/) { z3950_server_array::iterator here = zservers.begin(); z3950_server_array::iterator end = zservers.end(); while (here != end) { if((*here)->getName()==collection) { hascollection=true; return; } here++; } hascollection=false; } void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess, comerror_t &/*err*/, ostream &/*logout*/) { // should we just ping the server, or actually create a connection // to the z39.50 server process on the machine ? wassuccess = true; } void z3950proto::get_collectinfo (const text_t &collection, ColInfoResponse_t &collectinfo, comerror_t &err, ostream &logout) { // set err to protocolError if something goes wrong... err=noError; z3950_server_array::iterator here = zservers.begin(); z3950_server_array::iterator end = zservers.end(); while (here != end) { if((*here)->getName()==collection) { break; } here++; } if (here==end) { err=protocolError; char *coll_str=collection.getcstr(); logout << "z39.50: couldn't find collection" << coll_str << endl; delete coll_str; return; } const ShortColInfo_t *colinfo=(*here)->getInfo(); collectinfo.shortInfo.name=colinfo->name; collectinfo.shortInfo.host=colinfo->host; collectinfo.shortInfo.port=colinfo->port; collectinfo.isPublic=true; // don't use beta field /*collectinfo.isBeta=false;*/ collectinfo.buildDate=1; // leave ccsCols empty (no cross-coll. searching - for now) /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!! // This info is available from the config file -- johnmcp /*******collectinfo.languages.push_back("en"); collectinfo.languages.push_back("fr");********/ collectinfo.numDocs=0; collectinfo.numWords=0; collectinfo.numBytes=0; // copy the text maps over. // collectinfo.collectionmeta; // text_tmap collectinfo.collectionmeta=*((*here)->getMeta()); collectinfo.format=*((*here)->getFormat()); //text_tmap /* collectinfo.building; //text_tmap */ ////collectinfo.receptionist="z3950"; /* for now... this is a url, relative to .../cgi-bin. NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c= */ } void z3950proto::get_filterinfo (const text_t &/*collection*/, InfoFiltersResponse_t &response, comerror_t &/*err*/, ostream &/*logout*/) { // we'll fake it here, and say we have set up some filters response.filterNames.insert("BrowseFilter"); response.filterNames.insert("QueryFilter"); response.filterNames.insert("NullFilter"); } void z3950proto::get_filteroptions (const text_t &/*collection*/, const InfoFilterOptionsRequest_t &/*req*/, InfoFilterOptionsResponse_t &response, comerror_t &err, ostream &/*logout*/) { // for now, assume all servers have the same characteristics /* if (request.filterName=="QueryFilter") { } else if (request.filterName=="BrowseFilter") { } else if (request.filterName=="NullFilter") { } */ response.filterOptions["Index"].type=FilterOption_t::stringt; response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery; response.filterOptions["Index"].defaultValue="any"; response.filterOptions["Index"].validValues.push_back(".any"); response.filterOptions["Index"].validValues.push_back(".title"); response.filterOptions["Index"].validValues.push_back(".author"); // and maybe ["Language"] option as well? err=noError; } void z3950proto::filter (const text_t &collection, FilterRequest_t &request, FilterResponse_t &response, comerror_t &err, ostream &logout) { // this function is called when: // * creating the title page,(looking for iconcoll* & collectname metadata) // * creating the about page (looking for "Title" metadata) // * doing the query - (note that a request for metadata comes first, then // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1) // metadata-only requests have filterName="NullFilter", else "QueryFilter". // For the title page, we should not create a connection to the target // (target means the actual z39.50 server, origin means us), but // for the about page and query pages, we need to get information from the // origin. (eg for the about page, we will print out some info such as ID, // name and version. // cerr now goes to errout.txt in etc directory err=noError; // get relevant "collection" z3950_server_array::iterator zserver = zservers.begin(); z3950_server_array::iterator zend = zservers.end(); while (zserver != zend) { if((*zserver)->getName()==collection) { break; } zserver++; } // now have collection in zserver. ColInfoResponse_t info; ResultDocInfo_t *docInfo; // leave response.termInfo empty // response.termInfo.push_back(""); ??????? (should be empty if not req.) // See if this is for a query action if (request.filterName=="QueryFilter") { /* Sample OptionValue pairs `StartResults'=`1' `EndResults'=`20' `Term'=`firstword secondword' (term is just whatever the user typed in) `QueryType'=`ranked' => 'OR' (cgiarg t=1) `QueryType' = `boolean' => 'AND' (cgiarg t=0) `Casefold'=`true' `Stem'=`false' `Maxdocs'=`50' */ // go through options text_t opt_term; // the term(s) that the user entered text_t opt_fields; // which fields to search on int opt_start=1, opt_end=20; // default values int nummatches=0, maxdocs=50; // default values OptionValue_tarray::iterator ov_here=request.filterOptions.begin(); OptionValue_tarray::iterator ov_end=request.filterOptions.end(); while (ov_here != ov_end) { // cerr << "OV pair: `" << ov_here->name.getcstr() << "'=`" // << ov_here->value.getcstr() << "'\n"; if (ov_here->name=="Term") { opt_term=ov_here->value; } else if (ov_here->name=="Index") { opt_fields=ov_here->value; } else if (ov_here->name=="StartResults") { opt_start=ov_here->value.getint(); } else if (ov_here->name=="EndResults") { opt_end=ov_here->value.getint(); } else if (ov_here->name=="Maxdocs") { maxdocs=ov_here->value.getint(); } ov_here++; } err=noError; text_tarray *titles=(*zserver)->getrecordTitles(opt_term, opt_fields, opt_start, // first to get opt_end-opt_start, //count &nummatches,err); if (err!=noError) { // can we return an err msg in a response, or just use // the more drastic Greenstone error mechanism? docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); docInfo->metadata["Title"].values.push_back("Error - query err?"); logout << "\nz3950 filter query: error connecting to server\n"; // for now, DON'T use GSDL protocol err. err=noError; return; } // check if (titles==NULL) - only happens on error? if (nummatches>0) { text_tarray::iterator titles_here=titles->begin(); text_tarray::iterator titles_end=titles->end(); int counter=1; while (titles_here!=titles_end) { docInfo=new ResultDocInfo_t; docInfo->metadata["Title"].values.push_back(*titles_here); docInfo->result_num=counter; // we need to give some OID, so we'll just use counter for now... // make it the number into the whole possible retrieved set. docInfo->OID=counter+opt_start-1; response.docInfo.push_back(*docInfo); counter++; titles_here++; } } if (request.filterResultOptions & FRtermFreq) { if (nummatches>maxdocs) { response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50) response.isApprox=MoreThan; } else { response.numDocs=nummatches; // eg "36 documents" response.isApprox=Exact; // Exact | Approximate | MoreThan } } // end of if (... & FRtermFreq) } // end of if (... & FROID) else { // this wasn't a query action if (request.filterOptions.size()>0 && request.filterOptions[0].name=="ParentNode") { // don't want to return anything return; /* } else if (request.docSet.size() && request.docSet[0]!="collection") { // documentaction // if docSet is not empty, it is either "collection", or an array // of OIDs docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); */ } else { // in case we need to return only metadata docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); } } // Fill in metadata for each response.docInfo (if wanted) if (request.filterResultOptions & FRmetadata) { get_collectinfo (collection, info, err, logout); // should check err returned here.... // get the Query out of the filterOptions. text_t query=""; text_t field=""; OptionValue_tarray::iterator opt_here=request.filterOptions.begin(); OptionValue_tarray::iterator opt_end=request.filterOptions.end(); while (opt_here!=opt_end) { if (opt_here->name=="Query") { query=opt_here->value; if (field!="") break; // break from loop if we've got both } else if (opt_here->name=="Index") { field=opt_here->value; if (query!="") break; // break from loop if we've got both } opt_here++; } if (!request.fields.empty()) { // loop on each document being returned ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin(); ResultDocInfo_tarray::iterator docs_end=response.docInfo.end(); while (docs_here!=docs_end) { // loop on all the metadata fields in request.fields (type text_tset) text_tset::iterator fields_here=request.fields.begin(); text_tset::iterator fields_end=request.fields.end(); text_tmap::iterator it; while (fields_here!=fields_end) { it=info.collectionmeta.find(*fields_here); if (it!=info.collectionmeta.end()) docs_here->metadata[*fields_here].values.push_back((*it).second); else if (*fields_here=="Title" && !request.docSet.empty()) { // We only do this for a document action. // (This comes through as a NullQuery). // hopefully docSet is only not empty for documentaction... text_t doctitle; int i; // check that docSet isn't empty first!!!!!! i=request.docSet[0].getint(); text_t doctext="unneeded"; (*zserver)->getfullrecord(query, field, i, doctitle, doctext, err); // check err value! docs_here->metadata["Title"].values.push_back(doctitle); } else { docs_here->metadata[*fields_here].values.push_back(""); /////// cerr << " (not found)"; } fields_here++; } // end of inner while loop docs_here++; } // end of outer while loop } // end of if (!request.fields.empty()) else { // request.fields empty: return all metadata for about page or query // we'll only put it in the first docInfo. text_tmap::iterator colmeta_here=info.collectionmeta.begin(); text_tmap::iterator colmeta_end=info.collectionmeta.end(); while (colmeta_here!=colmeta_end) { response.docInfo[0].metadata[(*colmeta_here).first]. values.push_back((*colmeta_here).second); colmeta_here++; } // check if "collectionextra" metadata is set. If it isn't, we should // create connection to target to get it. if (info.collectionmeta.find("collectionextra")==colmeta_end) { // it hasn't been set yet... text_t abouttext="Server Online
\n"; abouttext+=(*zserver)->getzAbout(); // add in the "About" text we read in from config file. // how do we incorporate multi-lingual metadata? abouttext+="

\n"; text_t tmpabout; if ((*zserver)->getcfgAbout("en", tmpabout)==true) abouttext+=tmpabout; (*zserver)->setMeta("collectionextra",abouttext); response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext); } } // end of else // do indices' names, regardless of whether asked for or not... if (!response.docInfo.empty()) { response.docInfo[0].metadata[".author"].values.push_back("author fields"); response.docInfo[0].metadata[".title"].values.push_back("title fields"); response.docInfo[0].metadata[".any"].values.push_back("any fields"); } } //end of if (... & FRmetadata) ... } void z3950proto::get_document (const text_t &collection, const DocumentRequest_t &request, DocumentResponse_t &response, comerror_t &err, ostream &logout) { err=noError; // get relevant "collection" z3950_server_array::iterator zserver = zservers.begin(); z3950_server_array::iterator zend = zservers.end(); while (zserver != zend) { if((*zserver)->getName()==collection) { break; } zserver++; } // now have collection in zserver. /* cout << "get document:\n\tOID: " << request.OID.getcstr() << "\n\tdocType: " << request.docType.getcstr() << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n"; */ /* docresponse consists of text_t response.doc */ text_t title="unneeded"; text_t doctext; text_t query; // this should not be needed, as we have already connected to // get the title.... text_t field; // ditto... (*zserver)->getfullrecord(query,field,request.OID.getint(), title,doctext,err); // check return value of above? (false=>not connected) if (err==noError) response.doc=doctext; else { // could print out different messages based on error type.... response.doc="

Error

There was an error while connecting to the "; response.doc+="z39.50 server (ie target). Most likely this was a \n"; response.doc+="\"Connection Refused\" error.\n"; } if (0) { err=protocolError; logout << "Some error\n"; } }