/********************************************************************** * * z3950proto.cpp -- * Copyright (C) 2000 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "z3950proto.h" #include "comtypes.h" #include "browsefilter.h" #include "queryfilter.h" #include "filter.h" // z39.50 yaz stuff #include "z3950cfg.h" // for reading in config files - // defines "struct z3950cfg *zserver_list" as the head of the list. // note! yyin is hash-defined in z3950cfg.h to something else, to avoid // name conflicts with "other" yyins. extern FILE *yyin; extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex) z3950proto::z3950proto() { zserver_count=0; } z3950proto::~z3950proto() { } void z3950proto::add_server (z3950_proxy& zserver) { // append the new server ++zserver_count; zservers.push_back(&zserver); } void z3950proto::read_config_file(const text_t &filename, const text_t &errf) { struct z3950cfg *here; struct z3950cfg *oldhere; z3950_proxy *zserver; ShortColInfo_t *tempinfo; // FILE *errfile declared in z3950cfg.h, defined in zparse.y char *errf_str=errf.getcstr(); if ((errfile=fopen(errf_str,"a"))==NULL) { // what do we do if we can't open the error file? // this means that errors will go to stderr, which may stuff up // any cgi headers and the page. errfile=stderr; } delete []errf_str; yyout=errfile; char *filename_str=filename.getcstr(); yyin=fopen(filename_str,"r"); if (yyin==NULL) { cerr << "Could not open "<host.setcstr(here->hostname); tempinfo->port=here->port; tempinfo->name.setcstr(here->dbname); zserver->setInfo(tempinfo); zserver->setName(here->shortname); // now collection metadata. zserver->setMeta("collectionname",here->longname); if (here->icon!=NULL) zserver->setMeta("iconcollection",here->icon); if (here->smallicon!=NULL) zserver->setMeta("iconcollectionsmall",here->smallicon); /* filterclass *filter = new filterclass (); zserver->add_filter (filter); browsefilterclass *browsefilter = new browsefilterclass(); zserver->add_filter (browsefilter); queryfilterclass *queryfilter = new queryfilterclass(); zserver->add_filter (queryfilter); */ // About list if (here->about!=NULL) { struct z3950aboutlist *about_here=here->about; struct z3950aboutlist *oldabout; while (about_here!=NULL) { // problem with default lang (null): can't add ("",..) if (about_here->lang==NULL) zserver->addcfgAbout("en",about_here->text); else zserver->addcfgAbout(about_here->lang, about_here->text); oldabout=about_here; about_here=about_here->next; free(oldabout->lang); free(oldabout->text); free(oldabout); } } oldhere=here; here=here->next; free(oldhere->shortname); // these 4 strings should all be non-NULL... free(oldhere->hostname); free(oldhere->dbname); free(oldhere->longname); if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL if (oldhere->smallicon) free(oldhere->smallicon); free(oldhere); add_server(*zserver); } // end of while loop. } void z3950proto::configure (const text_t &/*key*/, const text_tarray &/*cfgline*/) { // this is called for each line in the gsdlsite.cfg file } bool z3950proto::init (ostream &/*logout*/) { // set up tcp connection to server here? // we might also read in the config file here (instead of librarymain.cpp) // // logout goes to initout.txt // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n"; //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n"; return true; } /*text_t z3950proto::get_protocol_name () { return "z3950proto"; } */ void z3950proto::get_collection_list (text_tarray &collist, comerror_t &/*err*/, ostream &/*logout*/) { z3950_proxy_array::iterator here = zservers.begin(); z3950_proxy_array::iterator end = zservers.end(); while (here != end) { collist.push_back((*here)->getName()); ++here; } } void z3950proto::has_collection (const text_t &collection, bool &hascollection, comerror_t &/*err*/, ostream &/*logout*/) { z3950_proxy_array::iterator here = zservers.begin(); z3950_proxy_array::iterator end = zservers.end(); while (here != end) { if((*here)->getName()==collection) { hascollection=true; return; } ++here; } hascollection=false; } void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess, comerror_t &/*err*/, ostream &/*logout*/) { // should we just ping the server, or actually create a connection // to the z39.50 server process on the machine ? wassuccess = true; } void z3950proto::get_collectinfo (const text_t &collection, ColInfoResponse_t &collectinfo, comerror_t &err, ostream &logout) { // set err to protocolError if something goes wrong... err=noError; z3950_proxy_array::iterator here = zservers.begin(); z3950_proxy_array::iterator end = zservers.end(); while (here != end) { if((*here)->getName()==collection) { break; } ++here; } if (here==end) { err=protocolError; char *coll_str=collection.getcstr(); logout << "z39.50: couldn't find collection" << coll_str << endl; delete []coll_str; return; } const ShortColInfo_t *colinfo=(*here)->getInfo(); collectinfo.shortInfo.name=colinfo->name; collectinfo.shortInfo.host=colinfo->host; collectinfo.shortInfo.port=colinfo->port; collectinfo.isPublic=true; // don't use beta field /*collectinfo.isBeta=false;*/ collectinfo.buildDate=1; // leave ccsCols empty (no cross-coll. searching - for now) /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!! // This info is available from the config file -- johnmcp /*******collectinfo.languages.push_back("en"); collectinfo.languages.push_back("fr");********/ collectinfo.numDocs=0; collectinfo.numWords=0; collectinfo.numBytes=0; // copy the text maps over. // collectinfo.collectionmeta; // text_tmap text_tmap collmeta = *((*here)->getMeta()); text_tmap::iterator mhere = collmeta.begin(); text_tmap::iterator mend = collmeta.end(); while (mhere != mend) { collectinfo.collectionmeta[(*mhere).first][g_EmptyText] = (*mhere).second; mhere ++; } //collectinfo.collectionmeta=*((*here)->getMeta()); collectinfo.format=*((*here)->getFormat()); //text_tmap /* collectinfo.building; //text_tmap */ ////collectinfo.receptionist="z3950"; /* for now... this is a url, relative to .../cgi-bin. NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c= */ } void z3950proto::get_filterinfo (const text_t &/*collection*/, InfoFiltersResponse_t &response, comerror_t &/*err*/, ostream &/*logout*/) { // we'll fake it here, and say we have set up some filters response.filterNames.insert("BrowseFilter"); response.filterNames.insert("QueryFilter"); response.filterNames.insert("NullFilter"); } void z3950proto::get_filteroptions (const text_t &/*collection*/, const InfoFilterOptionsRequest_t &/*req*/, InfoFilterOptionsResponse_t &response, comerror_t &err, ostream &/*logout*/) { // for now, assume all servers have the same characteristics /* if (request.filterName=="QueryFilter") { } else if (request.filterName=="BrowseFilter") { } else if (request.filterName=="NullFilter") { } */ response.filterOptions["Index"].type=FilterOption_t::stringt; response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery; response.filterOptions["Index"].defaultValue="any"; response.filterOptions["Index"].validValues.push_back(".any"); response.filterOptions["Index"].validValues.push_back(".title"); response.filterOptions["Index"].validValues.push_back(".author"); // and maybe ["Language"] option as well? err=noError; } void z3950proto::filter (const text_t &collection, FilterRequest_t &request, FilterResponse_t &response, comerror_t &err, ostream &logout) { // this function is called when: // * creating the title page,(looking for iconcoll* & collectname metadata) // * creating the about page (looking for "Title" metadata) // * doing the query - (note that a request for metadata comes first, then // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1) // metadata-only requests have filterName="NullFilter", else "QueryFilter". // For the title page, we should not create a connection to the target // (target means the actual z39.50 server, origin means us), but // for the about page and query pages, we need to get information from the // origin. (eg for the about page, we will print out some info such as ID, // name and version. // cerr now goes to errout.txt in etc directory err=noError; // get relevant "collection" z3950_proxy_array::iterator zserver = zservers.begin(); z3950_proxy_array::iterator zend = zservers.end(); while (zserver != zend) { if((*zserver)->getName()==collection) { break; } ++zserver; } // now have collection in zserver. ColInfoResponse_t info; ResultDocInfo_t *docInfo; // leave response.termInfo empty // response.termInfo.push_back(g_EmptyText); ??????? (should be empty if not req.) // See if this is for a query action if (request.filterName=="QueryFilter") { /* Sample OptionValue pairs `StartResults'=`1' `EndResults'=`20' `Term'=`firstword secondword' (term is just whatever the user typed in) `QueryType'=ranked|boolean -> OR|AND //`MatchMode'=`some' => 'OR' //`MatchMode' = `all' => 'AND' `Casefold'=`true' `Stem'=`false' `Maxdocs'=`50' */ // go through options text_t opt_term; // the term(s) that the user entered int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and) text_t opt_fields; // which fields to search on int opt_start=1, opt_end=20; // default values int nummatches=0, maxdocs=50; // default values OptionValue_tarray::iterator ov_here=request.filterOptions.begin(); OptionValue_tarray::iterator ov_end=request.filterOptions.end(); while (ov_here != ov_end) { if (ov_here->name=="Term") { opt_term=ov_here->value; } else if (ov_here->name=="QueryType") { if (ov_here->value=="ranked") opt_querytype=1; else if (ov_here->value=="boolean") opt_querytype=2; else { /* error - shouldn't happen */ /* currently unhandled */ } } else if (ov_here->name=="Index") { opt_fields=ov_here->value; } else if (ov_here->name=="StartResults") { opt_start=ov_here->value.getint(); } else if (ov_here->name=="EndResults") { opt_end=ov_here->value.getint(); } else if (ov_here->name=="Maxdocs") { maxdocs=ov_here->value.getint(); } ++ov_here; } err=noError; text_tarray *titles=(*zserver)->getrecordTitles(opt_term, opt_querytype, opt_fields, opt_start, // first to get opt_end-opt_start, //count &nummatches,err); if (err!=noError) { // can we return an err msg in a response, or just use // the more drastic Greenstone error mechanism? docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); docInfo->metadata["Title"].values.push_back("Error - query err?"); logout << "\nz3950 filter query: error connecting to server\n"; // for now, DON'T use GSDL protocol err. err=noError; return; } // check if (titles==NULL) - only happens on error? if (nummatches>0) { text_tarray::iterator titles_here=titles->begin(); text_tarray::iterator titles_end=titles->end(); int counter=1; while (titles_here!=titles_end) { docInfo=new ResultDocInfo_t; docInfo->metadata["Title"].values.push_back(*titles_here); docInfo->result_num=counter; // we need to give some OID, so we'll just use counter for now... // make it the number into the whole possible retrieved set. docInfo->OID=counter+opt_start-1; response.docInfo.push_back(*docInfo); ++counter; ++titles_here; } } if (request.filterResultOptions & FRtermFreq) { if (nummatches>maxdocs) { response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50) response.isApprox=MoreThan; } else { response.numDocs=nummatches; // eg "36 documents" response.isApprox=Exact; // Exact | Approximate | MoreThan } } // end of if (... & FRtermFreq) } // end of if (... == "QueryFilter") else { // this wasn't a query action if (request.filterOptions.size()>0 && request.filterOptions[0].name=="ParentNode") { // don't want to return anything return; /* } else if (request.docSet.size() && request.docSet[0]!="collection") { // documentaction // if docSet is not empty, it is either "collection", or an array // of OIDs docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); */ } else { // in case we need to return only metadata docInfo=new ResultDocInfo_t; response.docInfo.push_back(*docInfo); } } // end of not a query action // Fill in metadata for each response.docInfo (if wanted) if (request.filterResultOptions & FRmetadata) { get_collectinfo (collection, info, err, logout); // should check err returned here.... if (!request.fields.empty()) { // currently, this is only true for NullFilter when getting the "Title" // for a documentaction. // loop on each document being returned ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin(); ResultDocInfo_tarray::iterator docs_end=response.docInfo.end(); while (docs_here!=docs_end) { // loop on all the metadata fields in request.fields (type text_tset) text_tset::iterator fields_here=request.fields.begin(); text_tset::iterator fields_end=request.fields.end(); //text_tmap::iterator it; collectionmeta_map::iterator it; while (fields_here!=fields_end) { it=info.collectionmeta.find(*fields_here); if (it!=info.collectionmeta.end()) docs_here->metadata[*fields_here].values.push_back(((*it).second)[g_EmptyText]); else if (*fields_here=="Title" && !request.docSet.empty()) { // We only do this for a document action. // (This comes through as a NullQuery). // hopefully docSet is only not empty for documentaction... text_t doctitle; int i; // check that docSet isn't empty first!!!!!! i=request.docSet[0].getint(); text_t doctext="unneeded"; /* following variables aren't used, as our query result has been cached in z3950proxy.cpp (but really we shouldn't know that here...) But for the NullFilter, we don't get given these again in the request, so for now we'll take advantage of this.*/ int querytype=0; text_t field=g_EmptyText; // get the Query out of the filterOptions. (we need get the Title) text_t query=g_EmptyText; OptionValue_tarray::iterator opthere=request.filterOptions.begin(); OptionValue_tarray::iterator opt_end=request.filterOptions.end(); while (opthere!=opt_end) { if (opthere->name=="Term") { query=opthere->value; } else if (opthere->name=="Index") { field=opthere->value; } else if (opthere->name=="QueryType") { if (opthere->value=="ranked") querytype=1; else if (opthere->value=="boolean") querytype=2; else { /* error - shouldn't happen */ /* currently unhandled */ } } ++opthere; } (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err); // check err value! docs_here->metadata["Title"].values.push_back(doctitle); } else { docs_here->metadata[*fields_here].values.push_back(g_EmptyText); } ++fields_here; } // end of inner while loop ++docs_here; } // end of outer while loop } // end of if (!request.fields.empty()) else { // request.fields empty: return all metadata for about page or query // we'll only put it in the first docInfo. collectionmeta_map::iterator colmeta_here=info.collectionmeta.begin(); collectionmeta_map::iterator colmeta_end=info.collectionmeta.end(); while (colmeta_here!=colmeta_end) { response.docInfo[0].metadata[(*colmeta_here).first]. values.push_back(((*colmeta_here).second)[g_EmptyText]); ++colmeta_here; } // check if "collectionextra" metadata is set. If it isn't, we should // create connection to target to get it. if (info.collectionmeta.find("collectionextra")==colmeta_end) { // it hasn't been set yet... text_t abouttext="Server Online
\n"; abouttext+=(*zserver)->getzAbout(); // add in the "About" text we read in from config file. // how do we incorporate multi-lingual metadata? abouttext+="

\n"; text_t tmpabout; if ((*zserver)->getcfgAbout("en", tmpabout)==true) abouttext+=tmpabout; (*zserver)->setMeta("collectionextra",abouttext); response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext); } } // end of else // do indices' names, regardless of whether asked for or not... if (!response.docInfo.empty()) { response.docInfo[0].metadata[".author"].values.push_back("author fields"); response.docInfo[0].metadata[".title"].values.push_back("title fields"); response.docInfo[0].metadata[".any"].values.push_back("any fields"); } } //end of if (... & FRmetadata) ... } void z3950proto::get_document (const text_t &collection, const DocumentRequest_t &request, DocumentResponse_t &response, comerror_t &err, ostream &logout) { err=noError; // get relevant "collection" z3950_proxy_array::iterator zserver = zservers.begin(); z3950_proxy_array::iterator zend = zservers.end(); while (zserver != zend) { if((*zserver)->getName()==collection) { break; } ++zserver; } // now have collection in zserver. /* docresponse consists of text_t response.doc */ text_t title="unneeded"; text_t doctext; text_t query; // this should not be needed, as we have already connected to // get the title.... int querytype = 1; //ditto... text_t field; // ditto... (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(), title,doctext,err); // check return value of above? (false=>not connected) if (err==noError) response.doc=doctext; else { // could print out different messages based on error type.... response.doc="

Error

There was an error while connecting to the "; response.doc+="z39.50 server (ie target). Most likely this was a \n"; response.doc+="\"Connection Refused\" error.\n"; } if (0) { err=protocolError; logout << "Some error\n"; } } // sets issearchable to true if the given colection is searchable void z3950proto::is_searchable (const text_t &/*collection*/, bool &issearchable, comerror_t &err, ostream &/*logout*/) { issearchable = true; // assume all collections are searchable? err = noError; }