source: trunk/gsdl/src/recpt/z3950proto.cpp@ 1900

Last change on this file since 1900 was 1900, checked in by jrm21, 23 years ago

The z39.50 client now does "and" or "or" boolean searches depending on
whether "some" or "all" is chosen in the UI. Note that this has been claimed
for some time, but it actually does it now :)

  • Property svn:keywords set to Author Date Id Revision
File size: 19.7 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36
37// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
38// name conflicts with "other" yyins.
39extern FILE *yyin;
40extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
41
42extern "C" {
43 extern int zconfigparse();
44}
45
46
47
48
49z3950proto::z3950proto() {
50 zserver_count=0;
51}
52
53z3950proto::~z3950proto() {
54}
55
56void z3950proto::add_server (z3950_server& zserver) {
57
58 // append the new server
59 zserver_count++;
60 zservers.push_back(&zserver);
61}
62
63void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
64 struct z3950cfg *here;
65 struct z3950cfg *oldhere;
66 z3950_server *zserver;
67 ShortColInfo_t *tempinfo;
68
69 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
70
71 char *errf_str=errf.getcstr();
72 if ((errfile=fopen(errf_str,"a"))==NULL) {
73 // what do we do if we can't open the error file?
74 // this means that errors will go to stderr, which may stuff up
75 // any cgi headers and the page.
76 errfile=stderr;
77 }
78 delete errf_str;
79 yyout=errfile;
80
81 // zconfigparse() is defined in zparse.tab.c,
82 // which is the bison output of zparse.y
83
84 char *filename_str=filename.getcstr();
85 yyin=fopen(filename_str,"r");
86 if (yyin==NULL) {
87 cerr << "Could not open "<<filename_str<<" for reading.\n";
88 delete filename_str;
89 return;
90 }
91 delete filename_str;
92 zconfigparse();
93
94 if (errfile!=stderr)
95 fclose(errfile);
96
97 // we now have the config files in the ptr zserver_list
98 if (zserver_list==NULL)
99 return; // no valid servers found in the config file - note that
100 // the parser will have already spat out any errors.
101
102 // now create z3950servers for each structure in server_list
103 here=zserver_list;
104 while (here!=NULL) {
105 zserver=new z3950_server;
106 tempinfo=new ShortColInfo_t;
107
108 tempinfo->host.setcstr(here->hostname);
109 tempinfo->port=here->port;
110 tempinfo->name.setcstr(here->dbname);
111 zserver->setInfo(tempinfo);
112 zserver->setName(here->shortname);
113 // now collection metadata.
114 zserver->setMeta("collectionname",here->longname);
115 if (here->icon!=NULL)
116 zserver->setMeta("iconcollection",here->icon);
117 if (here->smallicon!=NULL)
118 zserver->setMeta("iconcollectionsmall",here->smallicon);
119
120 /* filterclass *filter = new filterclass ();
121 zserver->add_filter (filter);
122 browsefilterclass *browsefilter = new browsefilterclass();
123 zserver->add_filter (browsefilter);
124 queryfilterclass *queryfilter = new queryfilterclass();
125 zserver->add_filter (queryfilter);
126 */
127
128 // About list
129 if (here->about!=NULL) {
130 struct z3950aboutlist *about_here=here->about;
131 struct z3950aboutlist *oldabout;
132
133 while (about_here!=NULL) {
134 // problem with default lang (null): can't add ("",..)
135 if (about_here->lang==NULL)
136 zserver->addcfgAbout("en",about_here->text);
137 else
138 zserver->addcfgAbout(about_here->lang, about_here->text);
139 oldabout=about_here;
140 about_here=about_here->next;
141 free(oldabout->lang);
142 free(oldabout->text);
143 free(oldabout);
144 }
145 }
146
147 oldhere=here;
148 here=here->next;
149 free(oldhere->shortname); // these 4 strings should all be non-NULL...
150 free(oldhere->hostname);
151 free(oldhere->dbname);
152 free(oldhere->longname);
153 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
154 if (oldhere->smallicon) free(oldhere->smallicon);
155 free(oldhere);
156
157 add_server(*zserver);
158 } // end of while loop.
159
160}
161
162void z3950proto::configure (const text_t &/*key*/,
163 const text_tarray &/*cfgline*/) {
164 // this is called for each line in the gsdlsite.cfg file
165}
166
167
168bool z3950proto::init (ostream &/*logout*/) {
169 // set up tcp connection to server here?
170 // we might also read in the config file here (instead of librarymain.cpp)
171
172 //
173
174 // logout goes to initout.txt
175 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
176 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
177 return true;
178
179}
180
181/*text_t z3950proto::get_protocol_name () {
182 return "z3950proto";
183}
184*/
185
186void z3950proto::get_collection_list (text_tarray &collist,
187 comerror_t &/*err*/,
188 ostream &/*logout*/) {
189
190 z3950_server_array::iterator here = zservers.begin();
191 z3950_server_array::iterator end = zservers.end();
192 while (here != end) {
193 collist.push_back((*here)->getName());
194 here++;
195 }
196}
197
198void z3950proto::has_collection (const text_t &collection, bool &hascollection,
199 comerror_t &/*err*/, ostream &/*logout*/) {
200 z3950_server_array::iterator here = zservers.begin();
201 z3950_server_array::iterator end = zservers.end();
202 while (here != end) {
203 if((*here)->getName()==collection) {
204 hascollection=true;
205 return;
206 }
207 here++;
208 }
209 hascollection=false;
210}
211
212void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
213 comerror_t &/*err*/, ostream &/*logout*/) {
214 // should we just ping the server, or actually create a connection
215 // to the z39.50 server process on the machine ?
216 wassuccess = true;
217}
218
219void z3950proto::get_collectinfo (const text_t &collection,
220 ColInfoResponse_t &collectinfo,
221 comerror_t &err, ostream &logout) {
222
223 // set err to protocolError if something goes wrong...
224 err=noError;
225
226 z3950_server_array::iterator here = zservers.begin();
227 z3950_server_array::iterator end = zservers.end();
228 while (here != end) {
229 if((*here)->getName()==collection) {
230 break;
231 }
232 here++;
233 }
234
235 if (here==end) {
236 err=protocolError;
237 char *coll_str=collection.getcstr();
238 logout << "z39.50: couldn't find collection"
239 << coll_str
240 << endl;
241 delete coll_str;
242 return;
243 }
244
245 const ShortColInfo_t *colinfo=(*here)->getInfo();
246 collectinfo.shortInfo.name=colinfo->name;
247 collectinfo.shortInfo.host=colinfo->host;
248 collectinfo.shortInfo.port=colinfo->port;
249
250 collectinfo.isPublic=true;
251 // don't use beta field
252 /*collectinfo.isBeta=false;*/
253 collectinfo.buildDate=1;
254 // leave ccsCols empty (no cross-coll. searching - for now)
255 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
256 // This info is available from the config file -- johnmcp
257 /*******collectinfo.languages.push_back("en");
258 collectinfo.languages.push_back("fr");********/
259 collectinfo.numDocs=0;
260 collectinfo.numWords=0;
261 collectinfo.numBytes=0;
262 // copy the text maps over.
263 // collectinfo.collectionmeta; // text_tmap
264 collectinfo.collectionmeta=*((*here)->getMeta());
265 collectinfo.format=*((*here)->getFormat()); //text_tmap
266 /* collectinfo.building; //text_tmap */
267
268 ////collectinfo.receptionist="z3950";
269 /* for now... this is a url, relative to .../cgi-bin.
270 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
271 */
272}
273
274void z3950proto::get_filterinfo (const text_t &/*collection*/,
275 InfoFiltersResponse_t &response,
276 comerror_t &/*err*/, ostream &/*logout*/) {
277 // we'll fake it here, and say we have set up some filters
278 response.filterNames.insert("BrowseFilter");
279 response.filterNames.insert("QueryFilter");
280 response.filterNames.insert("NullFilter");
281
282}
283
284void z3950proto::get_filteroptions (const text_t &/*collection*/,
285 const InfoFilterOptionsRequest_t &/*req*/,
286 InfoFilterOptionsResponse_t &response,
287 comerror_t &err, ostream &/*logout*/) {
288 // for now, assume all servers have the same characteristics
289 /* if (request.filterName=="QueryFilter") { }
290 else if (request.filterName=="BrowseFilter") { }
291 else if (request.filterName=="NullFilter") { } */
292 response.filterOptions["Index"].type=FilterOption_t::stringt;
293 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
294 response.filterOptions["Index"].defaultValue="any";
295 response.filterOptions["Index"].validValues.push_back(".any");
296 response.filterOptions["Index"].validValues.push_back(".title");
297 response.filterOptions["Index"].validValues.push_back(".author");
298 // and maybe ["Language"] option as well?
299 err=noError;
300}
301
302void z3950proto::filter (const text_t &collection,
303 FilterRequest_t &request,
304 FilterResponse_t &response,
305 comerror_t &err, ostream &logout) {
306 // this function is called when:
307 // * creating the title page,(looking for iconcoll* & collectname metadata)
308 // * creating the about page (looking for "Title" metadata)
309 // * doing the query - (note that a request for metadata comes first, then
310 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
311
312 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
313 // For the title page, we should not create a connection to the target
314 // (target means the actual z39.50 server, origin means us), but
315 // for the about page and query pages, we need to get information from the
316 // origin. (eg for the about page, we will print out some info such as ID,
317 // name and version.
318
319 // cerr now goes to errout.txt in etc directory
320 err=noError;
321
322 // get relevant "collection"
323 z3950_server_array::iterator zserver = zservers.begin();
324 z3950_server_array::iterator zend = zservers.end();
325 while (zserver != zend) {
326 if((*zserver)->getName()==collection) {
327 break;
328 }
329 zserver++;
330 }
331 // now have collection in zserver.
332
333 ColInfoResponse_t info;
334 ResultDocInfo_t *docInfo;
335
336 // leave response.termInfo empty
337 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
338
339 // See if this is for a query action
340 if (request.filterName=="QueryFilter") {
341 /* Sample OptionValue pairs
342 `StartResults'=`1'
343 `EndResults'=`20'
344 `Term'=`firstword secondword' (term is just whatever the user typed in)
345 `QueryType'=`ranked' => 'OR' (cgiarg t=1)
346 `QueryType' = `boolean' => 'AND' (cgiarg t=0)
347 `Casefold'=`true'
348 `Stem'=`false'
349 `Maxdocs'=`50'
350 */
351 // go through options
352 text_t opt_term; // the term(s) that the user entered
353 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
354 text_t opt_fields; // which fields to search on
355 int opt_start=1, opt_end=20; // default values
356 int nummatches=0, maxdocs=50; // default values
357 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
358 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
359 while (ov_here != ov_end) {
360 if (ov_here->name=="Term") {
361 opt_term=ov_here->value;
362 } else if (ov_here->name=="QueryType") {
363 if (ov_here->value=="ranked") opt_querytype=1;
364 else if (ov_here->value=="boolean") opt_querytype=2;
365 else { /* error - shouldn't happen */
366 /* currently unhandled */
367 }
368 } else if (ov_here->name=="Index") {
369 opt_fields=ov_here->value;
370 } else if (ov_here->name=="StartResults") {
371 opt_start=ov_here->value.getint();
372 } else if (ov_here->name=="EndResults") {
373 opt_end=ov_here->value.getint();
374 } else if (ov_here->name=="Maxdocs") {
375 maxdocs=ov_here->value.getint();
376 }
377 ov_here++;
378 }
379 err=noError;
380 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
381 opt_querytype,
382 opt_fields,
383 opt_start, // first to get
384 opt_end-opt_start, //count
385 &nummatches,err);
386 if (err!=noError) {
387 // can we return an err msg in a response, or just use
388 // the more drastic Greenstone error mechanism?
389 docInfo=new ResultDocInfo_t;
390 response.docInfo.push_back(*docInfo);
391 docInfo->metadata["Title"].values.push_back("Error - query err?");
392 logout << "\nz3950 filter query: error connecting to server\n";
393 // for now, DON'T use GSDL protocol err.
394 err=noError;
395 return;
396 }
397 // check if (titles==NULL) - only happens on error?
398 if (nummatches>0) {
399 text_tarray::iterator titles_here=titles->begin();
400 text_tarray::iterator titles_end=titles->end();
401 int counter=1;
402 while (titles_here!=titles_end) {
403 docInfo=new ResultDocInfo_t;
404 docInfo->metadata["Title"].values.push_back(*titles_here);
405 docInfo->result_num=counter;
406 // we need to give some OID, so we'll just use counter for now...
407 // make it the number into the whole possible retrieved set.
408 docInfo->OID=counter+opt_start-1;
409 response.docInfo.push_back(*docInfo);
410 counter++;
411 titles_here++;
412 }
413 }
414
415 if (request.filterResultOptions & FRtermFreq) {
416 if (nummatches>maxdocs) {
417 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
418 response.isApprox=MoreThan;
419 } else {
420 response.numDocs=nummatches; // eg "36 documents"
421 response.isApprox=Exact; // Exact | Approximate | MoreThan
422 }
423 } // end of if (... & FRtermFreq)
424
425 } // end of if (... == "QueryFilter")
426 else {
427 // this wasn't a query action
428
429 if (request.filterOptions.size()>0 &&
430 request.filterOptions[0].name=="ParentNode") {
431 // don't want to return anything
432 return;
433 /* } else if (request.docSet.size() &&
434 request.docSet[0]!="collection") {
435 // documentaction
436 // if docSet is not empty, it is either "collection", or an array
437 // of OIDs
438 docInfo=new ResultDocInfo_t;
439 response.docInfo.push_back(*docInfo);
440 */
441 } else {
442 // in case we need to return only metadata
443 docInfo=new ResultDocInfo_t;
444 response.docInfo.push_back(*docInfo);
445 }
446 } // end of not a query action
447
448
449
450 // Fill in metadata for each response.docInfo (if wanted)
451 if (request.filterResultOptions & FRmetadata) {
452 get_collectinfo (collection, info, err, logout);
453 // should check err returned here....
454
455 if (!request.fields.empty()) {
456 // currently, this is only true for NullFilter when getting the "Title"
457 // for a documentaction.
458
459 // loop on each document being returned
460 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
461 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
462 while (docs_here!=docs_end) {
463 // loop on all the metadata fields in request.fields (type text_tset)
464 text_tset::iterator fields_here=request.fields.begin();
465 text_tset::iterator fields_end=request.fields.end();
466 text_tmap::iterator it;
467 while (fields_here!=fields_end) {
468 it=info.collectionmeta.find(*fields_here);
469 if (it!=info.collectionmeta.end())
470 docs_here->metadata[*fields_here].values.push_back((*it).second);
471 else if (*fields_here=="Title" && !request.docSet.empty()) {
472 // We only do this for a document action.
473 // (This comes through as a NullQuery).
474 // hopefully docSet is only not empty for documentaction...
475 text_t doctitle;
476 int i;
477 // check that docSet isn't empty first!!!!!!
478 i=request.docSet[0].getint();
479 text_t doctext="unneeded";
480 /* following variables aren't used, as our query result has been
481 cached in z3950server.cpp (but really we shouldn't know that
482 here...) But for the NullFilter, we don't get given these
483 again in the request, so for now we'll take advantage of this.*/
484
485 int querytype=0; text_t field="";
486
487 // get the Query out of the filterOptions. (we need get the Title)
488 text_t query="";
489 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
490 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
491 while (opthere!=opt_end) {
492 if (opthere->name=="Query") {
493 query=opthere->value;
494 break;
495 }
496 opthere++;
497 }
498 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
499 // check err value!
500 docs_here->metadata["Title"].values.push_back(doctitle);
501 } else {
502 docs_here->metadata[*fields_here].values.push_back("");
503 }
504 fields_here++;
505 } // end of inner while loop
506 docs_here++;
507 } // end of outer while loop
508 } // end of if (!request.fields.empty())
509
510 else { // request.fields empty: return all metadata for about page or query
511 // we'll only put it in the first docInfo.
512 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
513 text_tmap::iterator colmeta_end=info.collectionmeta.end();
514 while (colmeta_here!=colmeta_end) {
515 response.docInfo[0].metadata[(*colmeta_here).first].
516 values.push_back((*colmeta_here).second);
517 colmeta_here++;
518 }
519
520 // check if "collectionextra" metadata is set. If it isn't, we should
521 // create connection to target to get it.
522 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
523 // it hasn't been set yet...
524 text_t abouttext="<B>Server Online</B><br>\n";
525 abouttext+=(*zserver)->getzAbout();
526 // add in the "About" text we read in from config file.
527 // how do we incorporate multi-lingual metadata?
528 abouttext+="<P>\n";
529 text_t tmpabout;
530
531 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
532 abouttext+=tmpabout;
533
534 (*zserver)->setMeta("collectionextra",abouttext);
535 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
536 }
537 } // end of else
538
539 // do indices' names, regardless of whether asked for or not...
540 if (!response.docInfo.empty()) {
541 response.docInfo[0].metadata[".author"].values.push_back("author fields");
542 response.docInfo[0].metadata[".title"].values.push_back("title fields");
543 response.docInfo[0].metadata[".any"].values.push_back("any fields");
544 }
545 } //end of if (... & FRmetadata) ...
546}
547
548
549
550void z3950proto::get_document (const text_t &collection,
551 const DocumentRequest_t &request,
552 DocumentResponse_t &response,
553 comerror_t &err, ostream &logout) {
554
555 err=noError;
556
557 // get relevant "collection"
558 z3950_server_array::iterator zserver = zservers.begin();
559 z3950_server_array::iterator zend = zservers.end();
560 while (zserver != zend) {
561 if((*zserver)->getName()==collection) {
562 break;
563 }
564 zserver++;
565 }
566 // now have collection in zserver.
567
568 /* cout << "get document:\n\tOID: " << request.OID.getcstr()
569 << "\n\tdocType: " << request.docType.getcstr()
570 << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n";
571 */
572
573 /* docresponse consists of
574 text_t response.doc */
575 text_t title="unneeded";
576 text_t doctext;
577 text_t query; // this should not be needed, as we have already connected to
578 // get the title....
579 int querytype; //ditto...
580 text_t field; // ditto...
581 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
582 title,doctext,err);
583 // check return value of above? (false=>not connected)
584 if (err==noError)
585 response.doc=doctext;
586 else {
587 // could print out different messages based on error type....
588 response.doc="<h2>Error</h2>There was an error while connecting to the ";
589 response.doc+="z39.50 server (ie target). Most likely this was a \n";
590 response.doc+="\"Connection Refused\" error.\n";
591
592 }
593 if (0) {
594 err=protocolError;
595 logout << "Some error\n";
596 }
597}
Note: See TracBrowser for help on using the repository browser.