source: trunk/gsdl/src/recpt/z3950proto.cpp@ 1861

Last change on this file since 1861 was 1738, checked in by jrm21, 23 years ago

all calls to text_t.getcstr() now delete the returned ptr. Also, we
remove any " chars from the string as this may ruin the query. We don't
yet do any post-processing to filter for phrases...

  • Property svn:keywords set to Author Date Id Revision
File size: 19.2 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36
37// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
38// name conflicts with "other" yyins.
39extern FILE *yyin;
40extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
41
42extern "C" {
43 extern int zconfigparse();
44}
45
46
47
48
49z3950proto::z3950proto() {
50 zserver_count=0;
51}
52
53z3950proto::~z3950proto() {
54}
55
56void z3950proto::add_server (z3950_server& zserver) {
57
58 // append the new server
59 zserver_count++;
60 zservers.push_back(&zserver);
61}
62
63void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
64 struct z3950cfg *here;
65 struct z3950cfg *oldhere;
66 z3950_server *zserver;
67 ShortColInfo_t *tempinfo;
68
69 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
70
71 char *errf_str=errf.getcstr();
72 if ((errfile=fopen(errf_str,"a"))==NULL) {
73 // what do we do if we can't open the error file?
74 // this means that errors will go to stderr, which may stuff up
75 // any cgi headers and the page.
76 errfile=stderr;
77 }
78 delete errf_str;
79 yyout=errfile;
80
81 // zconfigparse() is defined in zparse.tab.c,
82 // which is the bison output of zparse.y
83
84 char *filename_str=filename.getcstr();
85 yyin=fopen(filename_str,"r");
86 if (yyin==NULL) {
87 cerr << "Could not open "<<filename_str<<" for reading.\n";
88 delete filename_str;
89 return;
90 }
91 delete filename_str;
92 zconfigparse();
93
94 if (errfile!=stderr)
95 fclose(errfile);
96
97 // we now have the config files in the ptr zserver_list
98 if (zserver_list==NULL)
99 return; // no valid servers found in the config file - note that
100 // the parser will have already spat out any errors.
101
102 // now create z3950servers for each structure in server_list
103 here=zserver_list;
104 while (here!=NULL) {
105 zserver=new z3950_server;
106 tempinfo=new ShortColInfo_t;
107
108 tempinfo->host.setcstr(here->hostname);
109 tempinfo->port=here->port;
110 tempinfo->name.setcstr(here->dbname);
111 zserver->setInfo(tempinfo);
112 zserver->setName(here->shortname);
113 // now collection metadata.
114 zserver->setMeta("collectionname",here->longname);
115 if (here->icon!=NULL)
116 zserver->setMeta("iconcollection",here->icon);
117 if (here->smallicon!=NULL)
118 zserver->setMeta("iconcollectionsmall",here->smallicon);
119
120 /* filterclass *filter = new filterclass ();
121 zserver->add_filter (filter);
122 browsefilterclass *browsefilter = new browsefilterclass();
123 zserver->add_filter (browsefilter);
124 queryfilterclass *queryfilter = new queryfilterclass();
125 zserver->add_filter (queryfilter);
126 */
127
128 // About list
129 if (here->about!=NULL) {
130 struct z3950aboutlist *about_here=here->about;
131 struct z3950aboutlist *oldabout;
132
133 while (about_here!=NULL) {
134 // problem with default lang (null): can't add ("",..)
135 if (about_here->lang==NULL)
136 zserver->addcfgAbout("en",about_here->text);
137 else
138 zserver->addcfgAbout(about_here->lang, about_here->text);
139 oldabout=about_here;
140 about_here=about_here->next;
141 free(oldabout->lang);
142 free(oldabout->text);
143 free(oldabout);
144 }
145 }
146
147 oldhere=here;
148 here=here->next;
149 free(oldhere->shortname); // these 4 strings should all be non-NULL...
150 free(oldhere->hostname);
151 free(oldhere->dbname);
152 free(oldhere->longname);
153 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
154 if (oldhere->smallicon) free(oldhere->smallicon);
155 free(oldhere);
156
157 add_server(*zserver);
158 } // end of while loop.
159
160}
161
162void z3950proto::configure (const text_t &/*key*/,
163 const text_tarray &/*cfgline*/) {
164 // this is called for each line in the gsdlsite.cfg file
165}
166
167
168bool z3950proto::init (ostream &/*logout*/) {
169 // set up tcp connection to server here?
170 // we might also read in the config file here (instead of librarymain.cpp)
171
172 //
173
174 // logout goes to initout.txt
175 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
176 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
177 return true;
178
179}
180
181/*text_t z3950proto::get_protocol_name () {
182 return "z3950proto";
183}
184*/
185
186void z3950proto::get_collection_list (text_tarray &collist,
187 comerror_t &/*err*/,
188 ostream &/*logout*/) {
189
190 z3950_server_array::iterator here = zservers.begin();
191 z3950_server_array::iterator end = zservers.end();
192 while (here != end) {
193 collist.push_back((*here)->getName());
194 here++;
195 }
196}
197
198void z3950proto::has_collection (const text_t &collection, bool &hascollection,
199 comerror_t &/*err*/, ostream &/*logout*/) {
200 z3950_server_array::iterator here = zservers.begin();
201 z3950_server_array::iterator end = zservers.end();
202 while (here != end) {
203 if((*here)->getName()==collection) {
204 hascollection=true;
205 return;
206 }
207 here++;
208 }
209 hascollection=false;
210}
211
212void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
213 comerror_t &/*err*/, ostream &/*logout*/) {
214 // should we just ping the server, or actually create a connection
215 // to the z39.50 server process on the machine ?
216 wassuccess = true;
217}
218
219void z3950proto::get_collectinfo (const text_t &collection,
220 ColInfoResponse_t &collectinfo,
221 comerror_t &err, ostream &logout) {
222
223 // set err to protocolError if something goes wrong...
224 err=noError;
225
226 z3950_server_array::iterator here = zservers.begin();
227 z3950_server_array::iterator end = zservers.end();
228 while (here != end) {
229 if((*here)->getName()==collection) {
230 break;
231 }
232 here++;
233 }
234
235 if (here==end) {
236 err=protocolError;
237 char *coll_str=collection.getcstr();
238 logout << "z39.50: couldn't find collection"
239 << coll_str
240 << endl;
241 delete coll_str;
242 return;
243 }
244
245 const ShortColInfo_t *colinfo=(*here)->getInfo();
246 collectinfo.shortInfo.name=colinfo->name;
247 collectinfo.shortInfo.host=colinfo->host;
248 collectinfo.shortInfo.port=colinfo->port;
249
250 collectinfo.isPublic=true;
251 // don't use beta field
252 /*collectinfo.isBeta=false;*/
253 collectinfo.buildDate=1;
254 // leave ccsCols empty (no cross-coll. searching - for now)
255 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
256 // This info is available from the config file -- johnmcp
257 /*******collectinfo.languages.push_back("en");
258 collectinfo.languages.push_back("fr");********/
259 collectinfo.numDocs=0;
260 collectinfo.numWords=0;
261 collectinfo.numBytes=0;
262 // copy the text maps over.
263 // collectinfo.collectionmeta; // text_tmap
264 collectinfo.collectionmeta=*((*here)->getMeta());
265 collectinfo.format=*((*here)->getFormat()); //text_tmap
266 /* collectinfo.building; //text_tmap */
267
268 ////collectinfo.receptionist="z3950";
269 /* for now... this is a url, relative to .../cgi-bin.
270 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
271 */
272}
273
274void z3950proto::get_filterinfo (const text_t &/*collection*/,
275 InfoFiltersResponse_t &response,
276 comerror_t &/*err*/, ostream &/*logout*/) {
277 // we'll fake it here, and say we have set up some filters
278 response.filterNames.insert("BrowseFilter");
279 response.filterNames.insert("QueryFilter");
280 response.filterNames.insert("NullFilter");
281
282}
283
284void z3950proto::get_filteroptions (const text_t &/*collection*/,
285 const InfoFilterOptionsRequest_t &/*req*/,
286 InfoFilterOptionsResponse_t &response,
287 comerror_t &err, ostream &/*logout*/) {
288 // for now, assume all servers have the same characteristics
289 /* if (request.filterName=="QueryFilter") { }
290 else if (request.filterName=="BrowseFilter") { }
291 else if (request.filterName=="NullFilter") { } */
292 response.filterOptions["Index"].type=FilterOption_t::stringt;
293 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
294 response.filterOptions["Index"].defaultValue="any";
295 response.filterOptions["Index"].validValues.push_back(".any");
296 response.filterOptions["Index"].validValues.push_back(".title");
297 response.filterOptions["Index"].validValues.push_back(".author");
298 // and maybe ["Language"] option as well?
299 err=noError;
300}
301
302void z3950proto::filter (const text_t &collection,
303 FilterRequest_t &request,
304 FilterResponse_t &response,
305 comerror_t &err, ostream &logout) {
306 // this function is called when:
307 // * creating the title page,(looking for iconcoll* & collectname metadata)
308 // * creating the about page (looking for "Title" metadata)
309 // * doing the query - (note that a request for metadata comes first, then
310 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
311
312 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
313 // For the title page, we should not create a connection to the target
314 // (target means the actual z39.50 server, origin means us), but
315 // for the about page and query pages, we need to get information from the
316 // origin. (eg for the about page, we will print out some info such as ID,
317 // name and version.
318
319 // cerr now goes to errout.txt in etc directory
320 err=noError;
321
322 // get relevant "collection"
323 z3950_server_array::iterator zserver = zservers.begin();
324 z3950_server_array::iterator zend = zservers.end();
325 while (zserver != zend) {
326 if((*zserver)->getName()==collection) {
327 break;
328 }
329 zserver++;
330 }
331 // now have collection in zserver.
332
333 ColInfoResponse_t info;
334 ResultDocInfo_t *docInfo;
335
336 // leave response.termInfo empty
337 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
338
339 // See if this is for a query action
340 if (request.filterName=="QueryFilter") {
341 /* Sample OptionValue pairs
342 `StartResults'=`1'
343 `EndResults'=`20'
344 `Term'=`firstword secondword' (term is just whatever the user typed in)
345 `QueryType'=`ranked' => 'OR' (cgiarg t=1)
346 `QueryType' = `boolean' => 'AND' (cgiarg t=0)
347 `Casefold'=`true'
348 `Stem'=`false'
349 `Maxdocs'=`50'
350 */
351 // go through options
352 text_t opt_term; // the term(s) that the user entered
353 text_t opt_fields; // which fields to search on
354 int opt_start=1, opt_end=20; // default values
355 int nummatches=0, maxdocs=50; // default values
356 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
357 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
358 while (ov_here != ov_end) {
359 // cerr << "OV pair: `" << ov_here->name.getcstr() << "'=`"
360 // << ov_here->value.getcstr() << "'\n";
361 if (ov_here->name=="Term")
362 {
363 opt_term=ov_here->value;
364 } else if (ov_here->name=="Index")
365 {
366 opt_fields=ov_here->value;
367 } else if (ov_here->name=="StartResults")
368 {
369 opt_start=ov_here->value.getint();
370 } else if (ov_here->name=="EndResults")
371 {
372 opt_end=ov_here->value.getint();
373 } else if (ov_here->name=="Maxdocs")
374 {
375 maxdocs=ov_here->value.getint();
376 }
377 ov_here++;
378 }
379 err=noError;
380 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
381 opt_fields,
382 opt_start, // first to get
383 opt_end-opt_start, //count
384 &nummatches,err);
385 if (err!=noError) {
386 // can we return an err msg in a response, or just use
387 // the more drastic Greenstone error mechanism?
388 docInfo=new ResultDocInfo_t;
389 response.docInfo.push_back(*docInfo);
390 docInfo->metadata["Title"].values.push_back("Error - query err?");
391 logout << "\nz3950 filter query: error connecting to server\n";
392 // for now, DON'T use GSDL protocol err.
393 err=noError;
394 return;
395 }
396 // check if (titles==NULL) - only happens on error?
397 if (nummatches>0) {
398 text_tarray::iterator titles_here=titles->begin();
399 text_tarray::iterator titles_end=titles->end();
400 int counter=1;
401 while (titles_here!=titles_end) {
402 docInfo=new ResultDocInfo_t;
403 docInfo->metadata["Title"].values.push_back(*titles_here);
404 docInfo->result_num=counter;
405 // we need to give some OID, so we'll just use counter for now...
406 // make it the number into the whole possible retrieved set.
407 docInfo->OID=counter+opt_start-1;
408 response.docInfo.push_back(*docInfo);
409 counter++;
410 titles_here++;
411 }
412 }
413
414 if (request.filterResultOptions & FRtermFreq) {
415 if (nummatches>maxdocs) {
416 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
417 response.isApprox=MoreThan;
418 } else {
419 response.numDocs=nummatches; // eg "36 documents"
420 response.isApprox=Exact; // Exact | Approximate | MoreThan
421 }
422 } // end of if (... & FRtermFreq)
423
424 } // end of if (... & FROID)
425 else {
426 // this wasn't a query action
427
428 if (request.filterOptions.size()>0 &&
429 request.filterOptions[0].name=="ParentNode") {
430 // don't want to return anything
431 return;
432 /* } else if (request.docSet.size() &&
433 request.docSet[0]!="collection") {
434 // documentaction
435 // if docSet is not empty, it is either "collection", or an array
436 // of OIDs
437 docInfo=new ResultDocInfo_t;
438 response.docInfo.push_back(*docInfo);
439 */
440 } else {
441 // in case we need to return only metadata
442 docInfo=new ResultDocInfo_t;
443 response.docInfo.push_back(*docInfo);
444 }
445 }
446
447 // Fill in metadata for each response.docInfo (if wanted)
448 if (request.filterResultOptions & FRmetadata) {
449 get_collectinfo (collection, info, err, logout);
450 // should check err returned here....
451
452 // get the Query out of the filterOptions.
453 text_t query="";
454 text_t field="";
455 OptionValue_tarray::iterator opt_here=request.filterOptions.begin();
456 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
457 while (opt_here!=opt_end) {
458 if (opt_here->name=="Query") {
459 query=opt_here->value;
460 if (field!="") break; // break from loop if we've got both
461 } else if (opt_here->name=="Index") {
462 field=opt_here->value;
463 if (query!="") break; // break from loop if we've got both
464 }
465 opt_here++;
466 }
467
468 if (!request.fields.empty()) {
469 // loop on each document being returned
470 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
471 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
472 while (docs_here!=docs_end) {
473
474 // loop on all the metadata fields in request.fields (type text_tset)
475 text_tset::iterator fields_here=request.fields.begin();
476 text_tset::iterator fields_end=request.fields.end();
477 text_tmap::iterator it;
478 while (fields_here!=fields_end) {
479 it=info.collectionmeta.find(*fields_here);
480 if (it!=info.collectionmeta.end())
481 docs_here->metadata[*fields_here].values.push_back((*it).second);
482 else if (*fields_here=="Title" && !request.docSet.empty()) {
483 // We only do this for a document action.
484 // (This comes through as a NullQuery).
485 // hopefully docSet is only not empty for documentaction...
486 text_t doctitle;
487 int i;
488 // check that docSet isn't empty first!!!!!!
489 i=request.docSet[0].getint();
490 text_t doctext="unneeded";
491 (*zserver)->getfullrecord(query, field, i, doctitle, doctext, err);
492 // check err value!
493 docs_here->metadata["Title"].values.push_back(doctitle);
494 } else {
495 docs_here->metadata[*fields_here].values.push_back("");
496 /////// cerr << " (not found)";
497 }
498 fields_here++;
499 } // end of inner while loop
500 docs_here++;
501 } // end of outer while loop
502 } // end of if (!request.fields.empty())
503
504 else { // request.fields empty: return all metadata for about page or query
505 // we'll only put it in the first docInfo.
506 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
507 text_tmap::iterator colmeta_end=info.collectionmeta.end();
508 while (colmeta_here!=colmeta_end) {
509 response.docInfo[0].metadata[(*colmeta_here).first].
510 values.push_back((*colmeta_here).second);
511 colmeta_here++;
512 }
513
514 // check if "collectionextra" metadata is set. If it isn't, we should
515 // create connection to target to get it.
516 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
517 // it hasn't been set yet...
518 text_t abouttext="<B>Server Online</B><br>\n";
519 abouttext+=(*zserver)->getzAbout();
520 // add in the "About" text we read in from config file.
521 // how do we incorporate multi-lingual metadata?
522 abouttext+="<P>\n";
523 text_t tmpabout;
524
525 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
526 abouttext+=tmpabout;
527
528 (*zserver)->setMeta("collectionextra",abouttext);
529 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
530 }
531 } // end of else
532
533 // do indices' names, regardless of whether asked for or not...
534 if (!response.docInfo.empty()) {
535 response.docInfo[0].metadata[".author"].values.push_back("author fields");
536 response.docInfo[0].metadata[".title"].values.push_back("title fields");
537 response.docInfo[0].metadata[".any"].values.push_back("any fields");
538 }
539 } //end of if (... & FRmetadata) ...
540}
541
542
543
544void z3950proto::get_document (const text_t &collection,
545 const DocumentRequest_t &request,
546 DocumentResponse_t &response,
547 comerror_t &err, ostream &logout) {
548
549 err=noError;
550
551 // get relevant "collection"
552 z3950_server_array::iterator zserver = zservers.begin();
553 z3950_server_array::iterator zend = zservers.end();
554 while (zserver != zend) {
555 if((*zserver)->getName()==collection) {
556 break;
557 }
558 zserver++;
559 }
560 // now have collection in zserver.
561
562 /* cout << "get document:\n\tOID: " << request.OID.getcstr()
563 << "\n\tdocType: " << request.docType.getcstr()
564 << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n";
565 */
566
567 /* docresponse consists of
568 text_t response.doc */
569 text_t title="unneeded";
570 text_t doctext;
571 text_t query; // this should not be needed, as we have already connected to
572 // get the title....
573 text_t field; // ditto...
574 (*zserver)->getfullrecord(query,field,request.OID.getint(),
575 title,doctext,err);
576 // check return value of above? (false=>not connected)
577 if (err==noError)
578 response.doc=doctext;
579 else {
580 // could print out different messages based on error type....
581 response.doc="<h2>Error</h2>There was an error while connecting to the ";
582 response.doc+="z39.50 server (ie target). Most likely this was a \n";
583 response.doc+="\"Connection Refused\" error.\n";
584
585 }
586 if (0) {
587 err=protocolError;
588 logout << "Some error\n";
589 }
590}
Note: See TracBrowser for help on using the repository browser.