source: branches/z3950-branch/gsdl/src/recpt/z3950proto.cpp@ 1342

Last change on this file since 1342 was 1342, checked in by johnmcp, 24 years ago

Relatively stable z39.50 implementation now, merged with the mgpp source.
(Still needs a decent interface and query language though...)

  • Property svn:keywords set to Author Date Id Revision
File size: 18.9 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36extern FILE *yyin;
37extern "C" {
38 extern int zconfigparse();
39}
40
41
42
43
44z3950proto::z3950proto() {
45 zserver_count=0;
46}
47
48z3950proto::~z3950proto() {
49}
50
51void z3950proto::add_server (z3950_server& zserver) {
52
53 // append the new server
54 zserver_count++;
55 zservers.push_back(&zserver);
56}
57
58void z3950proto::read_config_file(const text_t &filename) {
59 struct z3950cfg *here;
60 struct z3950cfg *oldhere;
61 z3950_server *zserver;
62 ShortColInfo_t *tempinfo;
63
64 // zconfigparse() is defined in zparse.tab.c,
65 // which is the bison output of zparse.y
66
67 yyin=fopen(filename.getcstr(),"r");
68 if (yyin==NULL) {
69 cerr << "Could not open "<<filename.getcstr()<<" for reading.\n";
70 }
71 zconfigparse();
72 // we now have the config files in the ptr zserver_list
73 if (zserver_list==NULL)
74 return; // no valid servers found in the config file - note that
75 // the parser will have already spat out any errors.
76
77 // now create z3950servers for each structure in server_list
78 here=zserver_list;
79 while (here!=NULL) {
80 zserver=new z3950_server;
81 tempinfo=new ShortColInfo_t;
82
83 tempinfo->host.setcstr(here->hostname);
84 tempinfo->port=here->port;
85 tempinfo->name.setcstr(here->dbname);
86 zserver->setInfo(tempinfo);
87 zserver->setName(here->shortname);
88 // now collection metadata.
89 zserver->setMeta("collectionname",here->longname);
90 if (here->icon!=NULL)
91 zserver->setMeta("iconcollection",here->icon);
92 if (here->smallicon!=NULL)
93 zserver->setMeta("iconcollectionsmall",here->smallicon);
94
95 /* filterclass *filter = new filterclass ();
96 zserver->add_filter (filter);
97 browsefilterclass *browsefilter = new browsefilterclass();
98 zserver->add_filter (browsefilter);
99 queryfilterclass *queryfilter = new queryfilterclass();
100 zserver->add_filter (queryfilter);
101 */
102
103 // About list
104 if (here->about!=NULL) {
105 struct z3950aboutlist *about_here=here->about;
106 struct z3950aboutlist *oldabout;
107
108 while (about_here!=NULL) {
109 // problem with default lang (null): can't add ("",..)
110 if (about_here->lang==NULL)
111 zserver->addcfgAbout("en",about_here->text);
112 else
113 zserver->addcfgAbout(about_here->lang, about_here->text);
114 oldabout=about_here;
115 about_here=about_here->next;
116 free(oldabout->lang);
117 free(oldabout->text);
118 free(oldabout);
119 }
120 }
121
122 oldhere=here;
123 here=here->next;
124 free(oldhere->shortname);
125 free(oldhere->hostname);
126 free(oldhere->dbname);
127 free(oldhere->longname);
128 free(oldhere->icon);
129 free(oldhere->smallicon);
130 free(oldhere);
131
132 add_server(*zserver);
133 } // end of while loop.
134
135}
136
137void z3950proto::configure (const text_t &key,
138 const text_tarray &cfgline) {
139
140 // this is called for each line in the gsdlsite.cfg file
141
142 if (0)
143 cerr << "z3950proto::configure called:"
144 << "key is " << key.getcstr()
145 << "\n1st line is " << cfgline[0].getcstr() << endl;
146
147}
148
149
150bool z3950proto::init (ostream &/*logout*/) {
151 // set up tcp connection to server here?
152 // we might also read in the config file here (instead of librarymain.cpp)
153
154 //
155
156 // logout goes to initout.txt
157 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
158 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
159 return true;
160
161}
162
163/*text_t z3950proto::get_protocol_name () {
164 return "z3950proto";
165}
166*/
167
168void z3950proto::get_collection_list (text_tarray &collist,
169 comerror_t &/*err*/,
170 ostream &/*logout*/) {
171 // logout here DOESN'T go to initout.txt
172 // logout << "zdebug: get_collection_list called:\n";
173
174 /** *** for now, we are assuming that each SERVER is a GSDL collection,
175 as opposed to each DATABASE on the servers.
176 */
177 z3950_server_array::iterator here = zservers.begin();
178 z3950_server_array::iterator end = zservers.end();
179 while (here != end) {
180 collist.push_back((*here)->getName());
181 //const ShortColInfo_t *info=here->getInfo();
182 //collist.push_back(info->name);
183 here++;
184 }
185}
186
187void z3950proto::has_collection (const text_t &collection, bool &hascollection,
188 comerror_t &/*err*/, ostream &/*logout*/) {
189 z3950_server_array::iterator here = zservers.begin();
190 z3950_server_array::iterator end = zservers.end();
191 while (here != end) {
192 if((*here)->getName()==collection) {
193 hascollection=true;
194 return;
195 }
196 here++;
197 }
198 hascollection=false;
199}
200
201void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
202 comerror_t &/*err*/, ostream &/*logout*/) {
203 // should we just ping the server, or actually create a connection
204 // to the z39.50 server process on the machine ?
205 wassuccess = true;
206}
207
208void z3950proto::get_collectinfo (const text_t &collection,
209 ColInfoResponse_t &collectinfo,
210 comerror_t &err, ostream &logout) {
211
212 // set err to protocolError if something goes wrong...
213
214 z3950_server_array::iterator here = zservers.begin();
215 z3950_server_array::iterator end = zservers.end();
216 while (here != end) {
217 if((*here)->getName()==collection) {
218 break;
219 }
220 here++;
221 }
222
223 if (here==end) {
224 err=protocolError;
225 logout << "z39.50: couldn't find collection"
226 << collection.getcstr()
227 << endl;
228 return;
229 }
230
231 const ShortColInfo_t *colinfo=(*here)->getInfo();
232 collectinfo.shortInfo.name=colinfo->name;
233 collectinfo.shortInfo.host=colinfo->host;
234 collectinfo.shortInfo.port=colinfo->port;
235
236 collectinfo.isPublic=true;
237 // don't use beta field
238 /*collectinfo.isBeta=false;*/
239 collectinfo.buildDate=1;
240 // leave ccsCols empty (no cross-coll. searching - for now)
241 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
242 // This info is available from the config file -- johnmcp
243 /*******collectinfo.languages.push_back("en");
244 collectinfo.languages.push_back("fr");********/
245 collectinfo.numDocs=0;
246 collectinfo.numWords=0;
247 collectinfo.numBytes=0;
248 // copy the text maps over.
249 // collectinfo.collectionmeta; // text_tmap
250 collectinfo.collectionmeta=*((*here)->getMeta());
251 collectinfo.format=*((*here)->getFormat()); //text_tmap
252 /* collectinfo.building; //text_tmap */
253
254 ////collectinfo.receptionist="z3950";
255 /* for now... this is a url, relative to .../cgi-bin.
256 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
257 */
258}
259
260void z3950proto::get_filterinfo (const text_t &/*collection*/,
261 InfoFiltersResponse_t &response,
262 comerror_t &/*err*/, ostream &/*logout*/) {
263 // we'll fake it here, and say we have set up some filters
264 response.filterNames.insert("BrowseFilter");
265 response.filterNames.insert("QueryFilter");
266 response.filterNames.insert("NullFilter");
267
268}
269
270void z3950proto::get_filteroptions (const text_t &/*collection*/,
271 const InfoFilterOptionsRequest_t &/*req*/,
272 InfoFilterOptionsResponse_t &response,
273 comerror_t &err, ostream &/*logout*/) {
274 // for now, assume all servers have the same characteristics
275 /* if (request.filterName=="QueryFilter") { }
276 else if (request.filterName=="BrowseFilter") { }
277 else if (request.filterName=="NullFilter") { } */
278 response.filterOptions["Index"].type=FilterOption_t::stringt;
279 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
280 response.filterOptions["Index"].defaultValue="any";
281 response.filterOptions["Index"].validValues.push_back(".any");
282 response.filterOptions["Index"].validValues.push_back(".title");
283 response.filterOptions["Index"].validValues.push_back(".author");
284 // and maybe ["Language"] option as well?
285 err=noError;
286}
287
288void z3950proto::filter (const text_t &collection,
289 FilterRequest_t &request,
290 FilterResponse_t &response,
291 comerror_t &err, ostream &logout) {
292 // this function is called when:
293 // * creating the title page,(looking for iconcoll* & collectname metadata)
294 // * creating the about page (looking for "Title" metadata)
295 // * doing the query - (note that a request for metadata comes first, then
296 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
297
298 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
299 // For the title page, we should not create a connection to the target
300 // (target means the actual z39.50 server, origin means us), but
301 // for the about page and query pages, we need to get information from the
302 // origin. (eg for the about page, we will print out some info such as ID,
303 // name and version.
304
305 // cerr now goes to errout.txt in etc directory
306 err=noError;
307
308 // get relevant "collection"
309 z3950_server_array::iterator zserver = zservers.begin();
310 z3950_server_array::iterator zend = zservers.end();
311 while (zserver != zend) {
312 if((*zserver)->getName()==collection) {
313 break;
314 }
315 zserver++;
316 }
317 // now have collection in zserver.
318
319 ColInfoResponse_t info;
320 ResultDocInfo_t *docInfo;
321
322 // leave response.termInfo empty
323 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
324
325 // See if this is for a query action
326 if (request.filterName=="QueryFilter") {
327 /* Sample OptionValue pairs
328 `StartResults'=`1'
329 `EndResults'=`20'
330 `Term'=`firstword secondword' (term is just whatever the user typed in)
331 `QueryType'=`ranked' => 'OR' (cgiarg t=1)
332 `QueryType' = `boolean' => 'AND' (cgiarg t=0)
333 `Casefold'=`true'
334 `Stem'=`false'
335 `Maxdocs'=`50'
336 */
337 // go through options
338 text_t opt_term; // the term(s) that the user entered
339 text_t opt_fields; // which fields to search on
340 int opt_start=1, opt_end=20; // default values
341 int nummatches=0, maxdocs=50; // default values
342 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
343 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
344 while (ov_here != ov_end) {
345 // cout << "OV pair: `" << ov_here->name.getcstr() << "'=`"
346 // << ov_here->value.getcstr() << "'\n";
347 if (ov_here->name=="Term")
348 {
349 opt_term=ov_here->value;
350 } else if (ov_here->name=="Index")
351 {
352 opt_fields=ov_here->value;
353 } else if (ov_here->name=="StartResults")
354 {
355 opt_start=ov_here->value.getint();
356 } else if (ov_here->name=="EndResults")
357 {
358 opt_end=ov_here->value.getint();
359 } else if (ov_here->name=="Maxdocs")
360 {
361 maxdocs=ov_here->value.getint();
362 }
363 ov_here++;
364 }
365 err=noError;
366 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
367 opt_fields,
368 opt_start, // first to get
369 opt_end-opt_start, //count
370 &nummatches,err);
371 if (err!=noError) {
372 // can we return an err msg in a response, or just use
373 // the more drastic Greenstone error mechanism?
374 docInfo=new ResultDocInfo_t;
375 response.docInfo.push_back(*docInfo);
376 docInfo->metadata["Title"].values.push_back("Error - query err?");
377 logout << "\nz3950 filter query: error connecting to server\n";
378 // for now, DON'T use GSDL protocol err.
379 err=noError;
380 return;
381 }
382 // check if (titles==NULL) - only happens on error?
383 if (nummatches>0) {
384 text_tarray::iterator titles_here=titles->begin();
385 text_tarray::iterator titles_end=titles->end();
386 int counter=1;
387 while (titles_here!=titles_end) {
388 docInfo=new ResultDocInfo_t;
389 docInfo->metadata["Title"].values.push_back(*titles_here);
390 docInfo->result_num=counter;
391 // we need to give some OID, so we'll just use counter for now...
392 // make it the number into the whole possible retrieved set.
393 docInfo->OID=counter+opt_start-1;
394 response.docInfo.push_back(*docInfo);
395 counter++;
396 titles_here++;
397 }
398 }
399
400 if (request.filterResultOptions & FRtermFreq) {
401 if (nummatches>maxdocs) {
402 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
403 response.isApprox=MoreThan;
404 } else {
405 response.numDocs=nummatches; // eg "36 documents"
406 response.isApprox=Exact; // Exact | Approximate | MoreThan
407 }
408 } // end of if (... & FRtermFreq)
409
410 } // end of if (... & FROID)
411 else {
412 // this wasn't a query action
413
414 if (request.filterOptions.size()>0 &&
415 request.filterOptions[0].name=="ParentNode") {
416 // don't want to return anything
417 return;
418 /* } else if (request.docSet.size() &&
419 request.docSet[0]!="collection") {
420 // documentaction
421 // if docSet is not empty, it is either "collection", or an array
422 // of OIDs
423 docInfo=new ResultDocInfo_t;
424 response.docInfo.push_back(*docInfo);
425 */
426 } else {
427 // in case we need to return only metadata
428 docInfo=new ResultDocInfo_t;
429 response.docInfo.push_back(*docInfo);
430 }
431 }
432
433 // Fill in metadata for each response.docInfo (if wanted)
434 if (request.filterResultOptions & FRmetadata) {
435 get_collectinfo (collection, info, err, logout);
436 // should check err returned here....
437
438 // get the Query out of the filterOptions.
439 text_t query="";
440 text_t field="";
441 OptionValue_tarray::iterator opt_here=request.filterOptions.begin();
442 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
443 while (opt_here!=opt_end) {
444 if (opt_here->name=="Query") {
445 query=opt_here->value;
446 if (field!="") break; // break from loop if we've got both
447 } else if (opt_here->name=="Index") {
448 field=opt_here->value;
449 if (query!="") break; // break from loop if we've got both
450 }
451 opt_here++;
452 }
453
454 if (!request.fields.empty()) {
455 // loop on each document being returned
456 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
457 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
458 while (docs_here!=docs_end) {
459
460 // loop on all the metadata fields in request.fields (type text_tset)
461 text_tset::iterator fields_here=request.fields.begin();
462 text_tset::iterator fields_end=request.fields.end();
463 text_tmap::iterator it;
464 while (fields_here!=fields_end) {
465 it=info.collectionmeta.find(*fields_here);
466 ////////// cerr << "filter: getting " << (*fields_here).getcstr();
467 if (it!=info.collectionmeta.end())
468 docs_here->metadata[*fields_here].values.push_back((*it).second);
469 else if (*fields_here=="Title" && !request.docSet.empty()) {
470 // We only do this for a document action.
471 // (This comes through as a NullQuery).
472 // hopefully docSet is only not empty for documentaction...
473 text_t doctitle;
474 int i;
475 // check that docSet isn't empty first!!!!!!
476 i=request.docSet[0].getint();
477 text_t doctext="unneeded";
478 (*zserver)->getfullrecord(query, field, i, doctitle, doctext, err);
479 // check err value!
480 docs_here->metadata["Title"].values.push_back(doctitle);
481 } else {
482 docs_here->metadata[*fields_here].values.push_back("");
483 /////// cerr << " (not found)";
484 }
485 ////////cerr << "\n";
486 fields_here++;
487 } // end of inner while loop
488 docs_here++;
489 } // end of outer while loop
490 } // end of if (!request.fields.empty())
491
492 else { // request.fields empty: return all metadata for about page or query
493 // we'll only put it in the first docInfo.
494 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
495 text_tmap::iterator colmeta_end=info.collectionmeta.end();
496 while (colmeta_here!=colmeta_end) {
497 response.docInfo[0].metadata[(*colmeta_here).first].
498 values.push_back((*colmeta_here).second);
499 /////cerr << "\t" << (*colmeta_here).first.getcstr() << "\n";
500 colmeta_here++;
501 }
502
503 // check if "collectionextra" metadata is set. If it isn't, we should
504 // create connection to target to get it.
505 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
506 // it hasn't been set yet...
507 text_t abouttext="<B>Server Online</B><br>\n";
508 abouttext+=(*zserver)->getzAbout();
509 // add in the "About" text we read in from config file.
510 // how do we incorporate multi-lingual metadata?
511 abouttext+="<P>\n";
512 text_t tmpabout;
513
514 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
515 abouttext+=tmpabout;
516
517 (*zserver)->setMeta("collectionextra",abouttext);
518 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
519 }
520 } // end of else
521
522 // do indices' names, regardless of whether asked for or not...
523 if (!response.docInfo.empty()) {
524 response.docInfo[0].metadata[".author"].values.push_back("author fields");
525 response.docInfo[0].metadata[".title"].values.push_back("title fields");
526 response.docInfo[0].metadata[".any"].values.push_back("any fields");
527 }
528 } //end of if (... & FRmetadata) ...
529}
530
531
532
533void z3950proto::get_document (const text_t &collection,
534 const DocumentRequest_t &request,
535 DocumentResponse_t &response,
536 comerror_t &err, ostream &logout) {
537
538 err=noError;
539
540 // get relevant "collection"
541 z3950_server_array::iterator zserver = zservers.begin();
542 z3950_server_array::iterator zend = zservers.end();
543 while (zserver != zend) {
544 if((*zserver)->getName()==collection) {
545 break;
546 }
547 zserver++;
548 }
549 // now have collection in zserver.
550
551 /* cout << "get document:\n\tOID: " << request.OID.getcstr()
552 << "\n\tdocType: " << request.docType.getcstr()
553 << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n";
554 */
555
556 /* docresponse consists of
557 text_t response.doc */
558 text_t title="unneeded";
559 text_t doctext;
560 text_t query; // this should not be needed, as we have already connected to
561 // get the title....
562 text_t field; // ditto...
563 (*zserver)->getfullrecord(query,field,request.OID.getint(),
564 title,doctext,err);
565 // check return value of above? (false=>not connected)
566 if (err==noError)
567 response.doc=doctext;
568 else {
569 // could print out different messages based on error type....
570 response.doc="<h2>Error</h2>There was an error while connecting to the ";
571 response.doc+="z39.50 server (ie target). Most likely this was a \n";
572 response.doc+="\"Connection Refused\" error.\n";
573
574 }
575 if (0) {
576 err=protocolError;
577 logout << "Some error\n";
578 }
579}
Note: See TracBrowser for help on using the repository browser.