source: trunk/gsdl/src/recpt/z3950proto.cpp@ 1355

Last change on this file since 1355 was 1355, checked in by jrm21, 24 years ago

Fixed up parsing of z39.50 config file, so that errors go to file instead
of stderr, which screws up the cgi headers and page...
Errors goes to etc/recpt/z3950err.txt

  • Property svn:keywords set to Author Date Id Revision
File size: 18.7 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36extern FILE *yyin;
37extern "C" {
38 extern int zconfigparse();
39}
40
41
42
43
44z3950proto::z3950proto() {
45 zserver_count=0;
46}
47
48z3950proto::~z3950proto() {
49}
50
51void z3950proto::add_server (z3950_server& zserver) {
52
53 // append the new server
54 zserver_count++;
55 zservers.push_back(&zserver);
56}
57
58void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
59 struct z3950cfg *here;
60 struct z3950cfg *oldhere;
61 z3950_server *zserver;
62 ShortColInfo_t *tempinfo;
63 FILE *stderr_tmp, *err;
64
65 err=fopen(errf.getcstr(), "a");
66 stderr_tmp=stderr;
67 if (err==NULL) {
68 // what do we do if we can't open the error file?
69 // this means that errors will go to stderr, which may stuff up
70 // any cgi headers and the page.
71 } else {
72 stderr=err;
73 }
74
75 // zconfigparse() is defined in zparse.tab.c,
76 // which is the bison output of zparse.y
77
78 yyin=fopen(filename.getcstr(),"r");
79 if (yyin==NULL) {
80 cerr << "Could not open "<<filename.getcstr()<<" for reading.\n";
81 return();
82 }
83 zconfigparse();
84 if (err!=NULL) {
85 fclose(err);
86 stderr=stderr_tmp;
87 }
88
89 // we now have the config files in the ptr zserver_list
90 if (zserver_list==NULL)
91 return; // no valid servers found in the config file - note that
92 // the parser will have already spat out any errors.
93
94 // now create z3950servers for each structure in server_list
95 here=zserver_list;
96 while (here!=NULL) {
97 zserver=new z3950_server;
98 tempinfo=new ShortColInfo_t;
99
100 tempinfo->host.setcstr(here->hostname);
101 tempinfo->port=here->port;
102 tempinfo->name.setcstr(here->dbname);
103 zserver->setInfo(tempinfo);
104 zserver->setName(here->shortname);
105 // now collection metadata.
106 zserver->setMeta("collectionname",here->longname);
107 if (here->icon!=NULL)
108 zserver->setMeta("iconcollection",here->icon);
109 if (here->smallicon!=NULL)
110 zserver->setMeta("iconcollectionsmall",here->smallicon);
111
112 /* filterclass *filter = new filterclass ();
113 zserver->add_filter (filter);
114 browsefilterclass *browsefilter = new browsefilterclass();
115 zserver->add_filter (browsefilter);
116 queryfilterclass *queryfilter = new queryfilterclass();
117 zserver->add_filter (queryfilter);
118 */
119
120 // About list
121 if (here->about!=NULL) {
122 struct z3950aboutlist *about_here=here->about;
123 struct z3950aboutlist *oldabout;
124
125 while (about_here!=NULL) {
126 // problem with default lang (null): can't add ("",..)
127 if (about_here->lang==NULL)
128 zserver->addcfgAbout("en",about_here->text);
129 else
130 zserver->addcfgAbout(about_here->lang, about_here->text);
131 oldabout=about_here;
132 about_here=about_here->next;
133 free(oldabout->lang);
134 free(oldabout->text);
135 free(oldabout);
136 }
137 }
138
139 oldhere=here;
140 here=here->next;
141 free(oldhere->shortname);
142 free(oldhere->hostname);
143 free(oldhere->dbname);
144 free(oldhere->longname);
145 free(oldhere->icon);
146 free(oldhere->smallicon);
147 free(oldhere);
148
149 add_server(*zserver);
150 } // end of while loop.
151
152}
153
154void z3950proto::configure (const text_t &/*key*/,
155 const text_tarray &/*cfgline*/) {
156 // this is called for each line in the gsdlsite.cfg file
157}
158
159
160bool z3950proto::init (ostream &/*logout*/) {
161 // set up tcp connection to server here?
162 // we might also read in the config file here (instead of librarymain.cpp)
163
164 //
165
166 // logout goes to initout.txt
167 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
168 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
169 return true;
170
171}
172
173/*text_t z3950proto::get_protocol_name () {
174 return "z3950proto";
175}
176*/
177
178void z3950proto::get_collection_list (text_tarray &collist,
179 comerror_t &/*err*/,
180 ostream &/*logout*/) {
181
182 z3950_server_array::iterator here = zservers.begin();
183 z3950_server_array::iterator end = zservers.end();
184 while (here != end) {
185 collist.push_back((*here)->getName());
186 here++;
187 }
188}
189
190void z3950proto::has_collection (const text_t &collection, bool &hascollection,
191 comerror_t &/*err*/, ostream &/*logout*/) {
192 z3950_server_array::iterator here = zservers.begin();
193 z3950_server_array::iterator end = zservers.end();
194 while (here != end) {
195 if((*here)->getName()==collection) {
196 hascollection=true;
197 return;
198 }
199 here++;
200 }
201 hascollection=false;
202}
203
204void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
205 comerror_t &/*err*/, ostream &/*logout*/) {
206 // should we just ping the server, or actually create a connection
207 // to the z39.50 server process on the machine ?
208 wassuccess = true;
209}
210
211void z3950proto::get_collectinfo (const text_t &collection,
212 ColInfoResponse_t &collectinfo,
213 comerror_t &err, ostream &logout) {
214
215 // set err to protocolError if something goes wrong...
216 err=noError;
217
218 z3950_server_array::iterator here = zservers.begin();
219 z3950_server_array::iterator end = zservers.end();
220 while (here != end) {
221 if((*here)->getName()==collection) {
222 break;
223 }
224 here++;
225 }
226
227 if (here==end) {
228 err=protocolError;
229 logout << "z39.50: couldn't find collection"
230 << collection.getcstr()
231 << endl;
232 return;
233 }
234
235 const ShortColInfo_t *colinfo=(*here)->getInfo();
236 collectinfo.shortInfo.name=colinfo->name;
237 collectinfo.shortInfo.host=colinfo->host;
238 collectinfo.shortInfo.port=colinfo->port;
239
240 collectinfo.isPublic=true;
241 // don't use beta field
242 /*collectinfo.isBeta=false;*/
243 collectinfo.buildDate=1;
244 // leave ccsCols empty (no cross-coll. searching - for now)
245 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
246 // This info is available from the config file -- johnmcp
247 /*******collectinfo.languages.push_back("en");
248 collectinfo.languages.push_back("fr");********/
249 collectinfo.numDocs=0;
250 collectinfo.numWords=0;
251 collectinfo.numBytes=0;
252 // copy the text maps over.
253 // collectinfo.collectionmeta; // text_tmap
254 collectinfo.collectionmeta=*((*here)->getMeta());
255 collectinfo.format=*((*here)->getFormat()); //text_tmap
256 /* collectinfo.building; //text_tmap */
257
258 ////collectinfo.receptionist="z3950";
259 /* for now... this is a url, relative to .../cgi-bin.
260 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
261 */
262}
263
264void z3950proto::get_filterinfo (const text_t &/*collection*/,
265 InfoFiltersResponse_t &response,
266 comerror_t &/*err*/, ostream &/*logout*/) {
267 // we'll fake it here, and say we have set up some filters
268 response.filterNames.insert("BrowseFilter");
269 response.filterNames.insert("QueryFilter");
270 response.filterNames.insert("NullFilter");
271
272}
273
274void z3950proto::get_filteroptions (const text_t &/*collection*/,
275 const InfoFilterOptionsRequest_t &/*req*/,
276 InfoFilterOptionsResponse_t &response,
277 comerror_t &err, ostream &/*logout*/) {
278 // for now, assume all servers have the same characteristics
279 /* if (request.filterName=="QueryFilter") { }
280 else if (request.filterName=="BrowseFilter") { }
281 else if (request.filterName=="NullFilter") { } */
282 response.filterOptions["Index"].type=FilterOption_t::stringt;
283 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
284 response.filterOptions["Index"].defaultValue="any";
285 response.filterOptions["Index"].validValues.push_back(".any");
286 response.filterOptions["Index"].validValues.push_back(".title");
287 response.filterOptions["Index"].validValues.push_back(".author");
288 // and maybe ["Language"] option as well?
289 err=noError;
290}
291
292void z3950proto::filter (const text_t &collection,
293 FilterRequest_t &request,
294 FilterResponse_t &response,
295 comerror_t &err, ostream &logout) {
296 // this function is called when:
297 // * creating the title page,(looking for iconcoll* & collectname metadata)
298 // * creating the about page (looking for "Title" metadata)
299 // * doing the query - (note that a request for metadata comes first, then
300 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
301
302 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
303 // For the title page, we should not create a connection to the target
304 // (target means the actual z39.50 server, origin means us), but
305 // for the about page and query pages, we need to get information from the
306 // origin. (eg for the about page, we will print out some info such as ID,
307 // name and version.
308
309 // cerr now goes to errout.txt in etc directory
310 err=noError;
311
312 // get relevant "collection"
313 z3950_server_array::iterator zserver = zservers.begin();
314 z3950_server_array::iterator zend = zservers.end();
315 while (zserver != zend) {
316 if((*zserver)->getName()==collection) {
317 break;
318 }
319 zserver++;
320 }
321 // now have collection in zserver.
322
323 ColInfoResponse_t info;
324 ResultDocInfo_t *docInfo;
325
326 // leave response.termInfo empty
327 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
328
329 // See if this is for a query action
330 if (request.filterName=="QueryFilter") {
331 /* Sample OptionValue pairs
332 `StartResults'=`1'
333 `EndResults'=`20'
334 `Term'=`firstword secondword' (term is just whatever the user typed in)
335 `QueryType'=`ranked' => 'OR' (cgiarg t=1)
336 `QueryType' = `boolean' => 'AND' (cgiarg t=0)
337 `Casefold'=`true'
338 `Stem'=`false'
339 `Maxdocs'=`50'
340 */
341 // go through options
342 text_t opt_term; // the term(s) that the user entered
343 text_t opt_fields; // which fields to search on
344 int opt_start=1, opt_end=20; // default values
345 int nummatches=0, maxdocs=50; // default values
346 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
347 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
348 while (ov_here != ov_end) {
349 // cout << "OV pair: `" << ov_here->name.getcstr() << "'=`"
350 // << ov_here->value.getcstr() << "'\n";
351 if (ov_here->name=="Term")
352 {
353 opt_term=ov_here->value;
354 } else if (ov_here->name=="Index")
355 {
356 opt_fields=ov_here->value;
357 } else if (ov_here->name=="StartResults")
358 {
359 opt_start=ov_here->value.getint();
360 } else if (ov_here->name=="EndResults")
361 {
362 opt_end=ov_here->value.getint();
363 } else if (ov_here->name=="Maxdocs")
364 {
365 maxdocs=ov_here->value.getint();
366 }
367 ov_here++;
368 }
369 err=noError;
370 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
371 opt_fields,
372 opt_start, // first to get
373 opt_end-opt_start, //count
374 &nummatches,err);
375 if (err!=noError) {
376 // can we return an err msg in a response, or just use
377 // the more drastic Greenstone error mechanism?
378 docInfo=new ResultDocInfo_t;
379 response.docInfo.push_back(*docInfo);
380 docInfo->metadata["Title"].values.push_back("Error - query err?");
381 logout << "\nz3950 filter query: error connecting to server\n";
382 // for now, DON'T use GSDL protocol err.
383 err=noError;
384 return;
385 }
386 // check if (titles==NULL) - only happens on error?
387 if (nummatches>0) {
388 text_tarray::iterator titles_here=titles->begin();
389 text_tarray::iterator titles_end=titles->end();
390 int counter=1;
391 while (titles_here!=titles_end) {
392 docInfo=new ResultDocInfo_t;
393 docInfo->metadata["Title"].values.push_back(*titles_here);
394 docInfo->result_num=counter;
395 // we need to give some OID, so we'll just use counter for now...
396 // make it the number into the whole possible retrieved set.
397 docInfo->OID=counter+opt_start-1;
398 response.docInfo.push_back(*docInfo);
399 counter++;
400 titles_here++;
401 }
402 }
403
404 if (request.filterResultOptions & FRtermFreq) {
405 if (nummatches>maxdocs) {
406 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
407 response.isApprox=MoreThan;
408 } else {
409 response.numDocs=nummatches; // eg "36 documents"
410 response.isApprox=Exact; // Exact | Approximate | MoreThan
411 }
412 } // end of if (... & FRtermFreq)
413
414 } // end of if (... & FROID)
415 else {
416 // this wasn't a query action
417
418 if (request.filterOptions.size()>0 &&
419 request.filterOptions[0].name=="ParentNode") {
420 // don't want to return anything
421 return;
422 /* } else if (request.docSet.size() &&
423 request.docSet[0]!="collection") {
424 // documentaction
425 // if docSet is not empty, it is either "collection", or an array
426 // of OIDs
427 docInfo=new ResultDocInfo_t;
428 response.docInfo.push_back(*docInfo);
429 */
430 } else {
431 // in case we need to return only metadata
432 docInfo=new ResultDocInfo_t;
433 response.docInfo.push_back(*docInfo);
434 }
435 }
436
437 // Fill in metadata for each response.docInfo (if wanted)
438 if (request.filterResultOptions & FRmetadata) {
439 get_collectinfo (collection, info, err, logout);
440 // should check err returned here....
441
442 // get the Query out of the filterOptions.
443 text_t query="";
444 text_t field="";
445 OptionValue_tarray::iterator opt_here=request.filterOptions.begin();
446 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
447 while (opt_here!=opt_end) {
448 if (opt_here->name=="Query") {
449 query=opt_here->value;
450 if (field!="") break; // break from loop if we've got both
451 } else if (opt_here->name=="Index") {
452 field=opt_here->value;
453 if (query!="") break; // break from loop if we've got both
454 }
455 opt_here++;
456 }
457
458 if (!request.fields.empty()) {
459 // loop on each document being returned
460 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
461 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
462 while (docs_here!=docs_end) {
463
464 // loop on all the metadata fields in request.fields (type text_tset)
465 text_tset::iterator fields_here=request.fields.begin();
466 text_tset::iterator fields_end=request.fields.end();
467 text_tmap::iterator it;
468 while (fields_here!=fields_end) {
469 it=info.collectionmeta.find(*fields_here);
470 if (it!=info.collectionmeta.end())
471 docs_here->metadata[*fields_here].values.push_back((*it).second);
472 else if (*fields_here=="Title" && !request.docSet.empty()) {
473 // We only do this for a document action.
474 // (This comes through as a NullQuery).
475 // hopefully docSet is only not empty for documentaction...
476 text_t doctitle;
477 int i;
478 // check that docSet isn't empty first!!!!!!
479 i=request.docSet[0].getint();
480 text_t doctext="unneeded";
481 (*zserver)->getfullrecord(query, field, i, doctitle, doctext, err);
482 // check err value!
483 docs_here->metadata["Title"].values.push_back(doctitle);
484 } else {
485 docs_here->metadata[*fields_here].values.push_back("");
486 /////// cerr << " (not found)";
487 }
488 fields_here++;
489 } // end of inner while loop
490 docs_here++;
491 } // end of outer while loop
492 } // end of if (!request.fields.empty())
493
494 else { // request.fields empty: return all metadata for about page or query
495 // we'll only put it in the first docInfo.
496 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
497 text_tmap::iterator colmeta_end=info.collectionmeta.end();
498 while (colmeta_here!=colmeta_end) {
499 response.docInfo[0].metadata[(*colmeta_here).first].
500 values.push_back((*colmeta_here).second);
501 colmeta_here++;
502 }
503
504 // check if "collectionextra" metadata is set. If it isn't, we should
505 // create connection to target to get it.
506 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
507 // it hasn't been set yet...
508 text_t abouttext="<B>Server Online</B><br>\n";
509 abouttext+=(*zserver)->getzAbout();
510 // add in the "About" text we read in from config file.
511 // how do we incorporate multi-lingual metadata?
512 abouttext+="<P>\n";
513 text_t tmpabout;
514
515 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
516 abouttext+=tmpabout;
517
518 (*zserver)->setMeta("collectionextra",abouttext);
519 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
520 }
521 } // end of else
522
523 // do indices' names, regardless of whether asked for or not...
524 if (!response.docInfo.empty()) {
525 response.docInfo[0].metadata[".author"].values.push_back("author fields");
526 response.docInfo[0].metadata[".title"].values.push_back("title fields");
527 response.docInfo[0].metadata[".any"].values.push_back("any fields");
528 }
529 } //end of if (... & FRmetadata) ...
530}
531
532
533
534void z3950proto::get_document (const text_t &collection,
535 const DocumentRequest_t &request,
536 DocumentResponse_t &response,
537 comerror_t &err, ostream &logout) {
538
539 err=noError;
540
541 // get relevant "collection"
542 z3950_server_array::iterator zserver = zservers.begin();
543 z3950_server_array::iterator zend = zservers.end();
544 while (zserver != zend) {
545 if((*zserver)->getName()==collection) {
546 break;
547 }
548 zserver++;
549 }
550 // now have collection in zserver.
551
552 /* cout << "get document:\n\tOID: " << request.OID.getcstr()
553 << "\n\tdocType: " << request.docType.getcstr()
554 << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n";
555 */
556
557 /* docresponse consists of
558 text_t response.doc */
559 text_t title="unneeded";
560 text_t doctext;
561 text_t query; // this should not be needed, as we have already connected to
562 // get the title....
563 text_t field; // ditto...
564 (*zserver)->getfullrecord(query,field,request.OID.getint(),
565 title,doctext,err);
566 // check return value of above? (false=>not connected)
567 if (err==noError)
568 response.doc=doctext;
569 else {
570 // could print out different messages based on error type....
571 response.doc="<h2>Error</h2>There was an error while connecting to the ";
572 response.doc+="z39.50 server (ie target). Most likely this was a \n";
573 response.doc+="\"Connection Refused\" error.\n";
574
575 }
576 if (0) {
577 err=protocolError;
578 logout << "Some error\n";
579 }
580}
Note: See TracBrowser for help on using the repository browser.