source: trunk/gsdl/src/recpt/z3950proto.cpp@ 2905

Last change on this file since 2905 was 2905, checked in by jrm21, 22 years ago

removed some old debugging statements no longer needed.

  • Property svn:keywords set to Author Date Id Revision
File size: 19.8 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36
37// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
38// name conflicts with "other" yyins.
39extern FILE *yyin;
40extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
41
42extern "C" {
43 extern int zconfigparse();
44}
45
46
47
48
49z3950proto::z3950proto() {
50 zserver_count=0;
51}
52
53z3950proto::~z3950proto() {
54}
55
56void z3950proto::add_server (z3950_server& zserver) {
57
58 // append the new server
59 zserver_count++;
60 zservers.push_back(&zserver);
61}
62
63void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
64 struct z3950cfg *here;
65 struct z3950cfg *oldhere;
66 z3950_server *zserver;
67 ShortColInfo_t *tempinfo;
68
69 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
70
71 char *errf_str=errf.getcstr();
72 if ((errfile=fopen(errf_str,"a"))==NULL) {
73 // what do we do if we can't open the error file?
74 // this means that errors will go to stderr, which may stuff up
75 // any cgi headers and the page.
76 errfile=stderr;
77 }
78 delete errf_str;
79 yyout=errfile;
80
81 // zconfigparse() is defined in zparse.tab.c,
82 // which is the bison output of zparse.y
83
84 char *filename_str=filename.getcstr();
85 yyin=fopen(filename_str,"r");
86 if (yyin==NULL) {
87 cerr << "Could not open "<<filename_str<<" for reading.\n";
88 delete filename_str;
89 return;
90 }
91 delete filename_str;
92 zconfigparse();
93
94 if (errfile!=stderr)
95 fclose(errfile);
96
97 // we now have the config files in the ptr zserver_list
98 if (zserver_list==NULL)
99 return; // no valid servers found in the config file - note that
100 // the parser will have already spat out any errors.
101
102 // now create z3950servers for each structure in server_list
103 here=zserver_list;
104 while (here!=NULL) {
105 zserver=new z3950_server;
106 tempinfo=new ShortColInfo_t;
107
108 tempinfo->host.setcstr(here->hostname);
109 tempinfo->port=here->port;
110 tempinfo->name.setcstr(here->dbname);
111 zserver->setInfo(tempinfo);
112 zserver->setName(here->shortname);
113 // now collection metadata.
114 zserver->setMeta("collectionname",here->longname);
115 if (here->icon!=NULL)
116 zserver->setMeta("iconcollection",here->icon);
117 if (here->smallicon!=NULL)
118 zserver->setMeta("iconcollectionsmall",here->smallicon);
119
120 /* filterclass *filter = new filterclass ();
121 zserver->add_filter (filter);
122 browsefilterclass *browsefilter = new browsefilterclass();
123 zserver->add_filter (browsefilter);
124 queryfilterclass *queryfilter = new queryfilterclass();
125 zserver->add_filter (queryfilter);
126 */
127
128 // About list
129 if (here->about!=NULL) {
130 struct z3950aboutlist *about_here=here->about;
131 struct z3950aboutlist *oldabout;
132
133 while (about_here!=NULL) {
134 // problem with default lang (null): can't add ("",..)
135 if (about_here->lang==NULL)
136 zserver->addcfgAbout("en",about_here->text);
137 else
138 zserver->addcfgAbout(about_here->lang, about_here->text);
139 oldabout=about_here;
140 about_here=about_here->next;
141 free(oldabout->lang);
142 free(oldabout->text);
143 free(oldabout);
144 }
145 }
146
147 oldhere=here;
148 here=here->next;
149 free(oldhere->shortname); // these 4 strings should all be non-NULL...
150 free(oldhere->hostname);
151 free(oldhere->dbname);
152 free(oldhere->longname);
153 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
154 if (oldhere->smallicon) free(oldhere->smallicon);
155 free(oldhere);
156
157 add_server(*zserver);
158 } // end of while loop.
159
160}
161
162void z3950proto::configure (const text_t &/*key*/,
163 const text_tarray &/*cfgline*/) {
164 // this is called for each line in the gsdlsite.cfg file
165}
166
167
168bool z3950proto::init (ostream &/*logout*/) {
169 // set up tcp connection to server here?
170 // we might also read in the config file here (instead of librarymain.cpp)
171
172 //
173
174 // logout goes to initout.txt
175 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
176 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
177 return true;
178
179}
180
181/*text_t z3950proto::get_protocol_name () {
182 return "z3950proto";
183}
184*/
185
186void z3950proto::get_collection_list (text_tarray &collist,
187 comerror_t &/*err*/,
188 ostream &/*logout*/) {
189
190 z3950_server_array::iterator here = zservers.begin();
191 z3950_server_array::iterator end = zservers.end();
192 while (here != end) {
193 collist.push_back((*here)->getName());
194 here++;
195 }
196}
197
198void z3950proto::has_collection (const text_t &collection, bool &hascollection,
199 comerror_t &/*err*/, ostream &/*logout*/) {
200 z3950_server_array::iterator here = zservers.begin();
201 z3950_server_array::iterator end = zservers.end();
202 while (here != end) {
203 if((*here)->getName()==collection) {
204 hascollection=true;
205 return;
206 }
207 here++;
208 }
209 hascollection=false;
210}
211
212void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
213 comerror_t &/*err*/, ostream &/*logout*/) {
214 // should we just ping the server, or actually create a connection
215 // to the z39.50 server process on the machine ?
216 wassuccess = true;
217}
218
219void z3950proto::get_collectinfo (const text_t &collection,
220 ColInfoResponse_t &collectinfo,
221 comerror_t &err, ostream &logout) {
222
223 // set err to protocolError if something goes wrong...
224 err=noError;
225
226 z3950_server_array::iterator here = zservers.begin();
227 z3950_server_array::iterator end = zservers.end();
228 while (here != end) {
229 if((*here)->getName()==collection) {
230 break;
231 }
232 here++;
233 }
234
235 if (here==end) {
236 err=protocolError;
237 char *coll_str=collection.getcstr();
238 logout << "z39.50: couldn't find collection"
239 << coll_str
240 << endl;
241 delete coll_str;
242 return;
243 }
244
245 const ShortColInfo_t *colinfo=(*here)->getInfo();
246 collectinfo.shortInfo.name=colinfo->name;
247 collectinfo.shortInfo.host=colinfo->host;
248 collectinfo.shortInfo.port=colinfo->port;
249
250 collectinfo.isPublic=true;
251 // don't use beta field
252 /*collectinfo.isBeta=false;*/
253 collectinfo.buildDate=1;
254 // leave ccsCols empty (no cross-coll. searching - for now)
255 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
256 // This info is available from the config file -- johnmcp
257 /*******collectinfo.languages.push_back("en");
258 collectinfo.languages.push_back("fr");********/
259 collectinfo.numDocs=0;
260 collectinfo.numWords=0;
261 collectinfo.numBytes=0;
262 // copy the text maps over.
263 // collectinfo.collectionmeta; // text_tmap
264 collectinfo.collectionmeta=*((*here)->getMeta());
265 collectinfo.format=*((*here)->getFormat()); //text_tmap
266 /* collectinfo.building; //text_tmap */
267
268 ////collectinfo.receptionist="z3950";
269 /* for now... this is a url, relative to .../cgi-bin.
270 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
271 */
272}
273
274void z3950proto::get_filterinfo (const text_t &/*collection*/,
275 InfoFiltersResponse_t &response,
276 comerror_t &/*err*/, ostream &/*logout*/) {
277 // we'll fake it here, and say we have set up some filters
278 response.filterNames.insert("BrowseFilter");
279 response.filterNames.insert("QueryFilter");
280 response.filterNames.insert("NullFilter");
281
282}
283
284void z3950proto::get_filteroptions (const text_t &/*collection*/,
285 const InfoFilterOptionsRequest_t &/*req*/,
286 InfoFilterOptionsResponse_t &response,
287 comerror_t &err, ostream &/*logout*/) {
288 // for now, assume all servers have the same characteristics
289 /* if (request.filterName=="QueryFilter") { }
290 else if (request.filterName=="BrowseFilter") { }
291 else if (request.filterName=="NullFilter") { } */
292 response.filterOptions["Index"].type=FilterOption_t::stringt;
293 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
294 response.filterOptions["Index"].defaultValue="any";
295 response.filterOptions["Index"].validValues.push_back(".any");
296 response.filterOptions["Index"].validValues.push_back(".title");
297 response.filterOptions["Index"].validValues.push_back(".author");
298 // and maybe ["Language"] option as well?
299 err=noError;
300}
301
302void z3950proto::filter (const text_t &collection,
303 FilterRequest_t &request,
304 FilterResponse_t &response,
305 comerror_t &err, ostream &logout) {
306 // this function is called when:
307 // * creating the title page,(looking for iconcoll* & collectname metadata)
308 // * creating the about page (looking for "Title" metadata)
309 // * doing the query - (note that a request for metadata comes first, then
310 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
311
312 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
313 // For the title page, we should not create a connection to the target
314 // (target means the actual z39.50 server, origin means us), but
315 // for the about page and query pages, we need to get information from the
316 // origin. (eg for the about page, we will print out some info such as ID,
317 // name and version.
318
319 // cerr now goes to errout.txt in etc directory
320 err=noError;
321
322 // get relevant "collection"
323 z3950_server_array::iterator zserver = zservers.begin();
324 z3950_server_array::iterator zend = zservers.end();
325 while (zserver != zend) {
326 if((*zserver)->getName()==collection) {
327 break;
328 }
329 zserver++;
330 }
331 // now have collection in zserver.
332
333 ColInfoResponse_t info;
334 ResultDocInfo_t *docInfo;
335
336 // leave response.termInfo empty
337 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
338
339 // See if this is for a query action
340 if (request.filterName=="QueryFilter") {
341 /* Sample OptionValue pairs
342 `StartResults'=`1'
343 `EndResults'=`20'
344 `Term'=`firstword secondword' (term is just whatever the user typed in)
345 `QueryType'=ranked|boolean -> OR|AND
346 //`MatchMode'=`some' => 'OR'
347 //`MatchMode' = `all' => 'AND'
348 `Casefold'=`true'
349 `Stem'=`false'
350 `Maxdocs'=`50'
351 */
352 // go through options
353 text_t opt_term; // the term(s) that the user entered
354 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
355 text_t opt_fields; // which fields to search on
356 int opt_start=1, opt_end=20; // default values
357 int nummatches=0, maxdocs=50; // default values
358 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
359 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
360 while (ov_here != ov_end) {
361 if (ov_here->name=="Term") {
362 opt_term=ov_here->value;
363 } else if (ov_here->name=="QueryType") {
364 if (ov_here->value=="ranked") opt_querytype=1;
365 else if (ov_here->value=="boolean") opt_querytype=2;
366 else { /* error - shouldn't happen */
367 /* currently unhandled */
368 }
369 } else if (ov_here->name=="Index") {
370 opt_fields=ov_here->value;
371 } else if (ov_here->name=="StartResults") {
372 opt_start=ov_here->value.getint();
373 } else if (ov_here->name=="EndResults") {
374 opt_end=ov_here->value.getint();
375 } else if (ov_here->name=="Maxdocs") {
376 maxdocs=ov_here->value.getint();
377 }
378 ov_here++;
379 }
380 err=noError;
381 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
382 opt_querytype,
383 opt_fields,
384 opt_start, // first to get
385 opt_end-opt_start, //count
386 &nummatches,err);
387 if (err!=noError) {
388 // can we return an err msg in a response, or just use
389 // the more drastic Greenstone error mechanism?
390 docInfo=new ResultDocInfo_t;
391 response.docInfo.push_back(*docInfo);
392 docInfo->metadata["Title"].values.push_back("Error - query err?");
393 logout << "\nz3950 filter query: error connecting to server\n";
394 // for now, DON'T use GSDL protocol err.
395 err=noError;
396 return;
397 }
398 // check if (titles==NULL) - only happens on error?
399 if (nummatches>0) {
400 text_tarray::iterator titles_here=titles->begin();
401 text_tarray::iterator titles_end=titles->end();
402 int counter=1;
403 while (titles_here!=titles_end) {
404 docInfo=new ResultDocInfo_t;
405 docInfo->metadata["Title"].values.push_back(*titles_here);
406 docInfo->result_num=counter;
407 // we need to give some OID, so we'll just use counter for now...
408 // make it the number into the whole possible retrieved set.
409 docInfo->OID=counter+opt_start-1;
410 response.docInfo.push_back(*docInfo);
411 counter++;
412 titles_here++;
413 }
414 }
415
416 if (request.filterResultOptions & FRtermFreq) {
417 if (nummatches>maxdocs) {
418 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
419 response.isApprox=MoreThan;
420 } else {
421 response.numDocs=nummatches; // eg "36 documents"
422 response.isApprox=Exact; // Exact | Approximate | MoreThan
423 }
424 } // end of if (... & FRtermFreq)
425
426 } // end of if (... == "QueryFilter")
427 else {
428 // this wasn't a query action
429
430 if (request.filterOptions.size()>0 &&
431 request.filterOptions[0].name=="ParentNode") {
432 // don't want to return anything
433 return;
434 /* } else if (request.docSet.size() &&
435 request.docSet[0]!="collection") {
436 // documentaction
437 // if docSet is not empty, it is either "collection", or an array
438 // of OIDs
439 docInfo=new ResultDocInfo_t;
440 response.docInfo.push_back(*docInfo);
441 */
442 } else {
443 // in case we need to return only metadata
444 docInfo=new ResultDocInfo_t;
445 response.docInfo.push_back(*docInfo);
446 }
447 } // end of not a query action
448
449
450
451 // Fill in metadata for each response.docInfo (if wanted)
452 if (request.filterResultOptions & FRmetadata) {
453 get_collectinfo (collection, info, err, logout);
454 // should check err returned here....
455
456 if (!request.fields.empty()) {
457 // currently, this is only true for NullFilter when getting the "Title"
458 // for a documentaction.
459
460 // loop on each document being returned
461 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
462 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
463 while (docs_here!=docs_end) {
464 // loop on all the metadata fields in request.fields (type text_tset)
465 text_tset::iterator fields_here=request.fields.begin();
466 text_tset::iterator fields_end=request.fields.end();
467 text_tmap::iterator it;
468 while (fields_here!=fields_end) {
469 it=info.collectionmeta.find(*fields_here);
470 if (it!=info.collectionmeta.end())
471 docs_here->metadata[*fields_here].values.push_back((*it).second);
472 else if (*fields_here=="Title" && !request.docSet.empty()) {
473 // We only do this for a document action.
474 // (This comes through as a NullQuery).
475 // hopefully docSet is only not empty for documentaction...
476 text_t doctitle;
477 int i;
478 // check that docSet isn't empty first!!!!!!
479 i=request.docSet[0].getint();
480 text_t doctext="unneeded";
481 /* following variables aren't used, as our query result has been
482 cached in z3950server.cpp (but really we shouldn't know that
483 here...) But for the NullFilter, we don't get given these
484 again in the request, so for now we'll take advantage of this.*/
485
486 int querytype=0; text_t field="";
487
488 // get the Query out of the filterOptions. (we need get the Title)
489 text_t query="";
490 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
491 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
492 while (opthere!=opt_end) {
493 if (opthere->name=="Term") {
494 query=opthere->value;
495 } else if (opthere->name=="Index") {
496 field=opthere->value;
497 } else if (opthere->name=="QueryType") {
498 if (opthere->value=="ranked") querytype=1;
499 else if (opthere->value=="boolean") querytype=2;
500 else { /* error - shouldn't happen */
501 /* currently unhandled */
502 }
503 }
504 opthere++;
505 }
506 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
507 // check err value!
508 docs_here->metadata["Title"].values.push_back(doctitle);
509 } else {
510 docs_here->metadata[*fields_here].values.push_back("");
511 }
512 fields_here++;
513 } // end of inner while loop
514 docs_here++;
515 } // end of outer while loop
516 } // end of if (!request.fields.empty())
517
518 else { // request.fields empty: return all metadata for about page or query
519 // we'll only put it in the first docInfo.
520 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
521 text_tmap::iterator colmeta_end=info.collectionmeta.end();
522 while (colmeta_here!=colmeta_end) {
523 response.docInfo[0].metadata[(*colmeta_here).first].
524 values.push_back((*colmeta_here).second);
525 colmeta_here++;
526 }
527
528 // check if "collectionextra" metadata is set. If it isn't, we should
529 // create connection to target to get it.
530 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
531 // it hasn't been set yet...
532 text_t abouttext="<B>Server Online</B><br>\n";
533 abouttext+=(*zserver)->getzAbout();
534 // add in the "About" text we read in from config file.
535 // how do we incorporate multi-lingual metadata?
536 abouttext+="<P>\n";
537 text_t tmpabout;
538
539 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
540 abouttext+=tmpabout;
541
542 (*zserver)->setMeta("collectionextra",abouttext);
543 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
544 }
545 } // end of else
546
547 // do indices' names, regardless of whether asked for or not...
548 if (!response.docInfo.empty()) {
549 response.docInfo[0].metadata[".author"].values.push_back("author fields");
550 response.docInfo[0].metadata[".title"].values.push_back("title fields");
551 response.docInfo[0].metadata[".any"].values.push_back("any fields");
552 }
553 } //end of if (... & FRmetadata) ...
554}
555
556
557
558void z3950proto::get_document (const text_t &collection,
559 const DocumentRequest_t &request,
560 DocumentResponse_t &response,
561 comerror_t &err, ostream &logout) {
562
563 err=noError;
564
565 // get relevant "collection"
566 z3950_server_array::iterator zserver = zservers.begin();
567 z3950_server_array::iterator zend = zservers.end();
568 while (zserver != zend) {
569 if((*zserver)->getName()==collection) {
570 break;
571 }
572 zserver++;
573 }
574 // now have collection in zserver.
575
576 /* docresponse consists of
577 text_t response.doc */
578 text_t title="unneeded";
579 text_t doctext;
580 text_t query; // this should not be needed, as we have already connected to
581 // get the title....
582 int querytype; //ditto...
583 text_t field; // ditto...
584 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
585 title,doctext,err);
586 // check return value of above? (false=>not connected)
587 if (err==noError)
588 response.doc=doctext;
589 else {
590 // could print out different messages based on error type....
591 response.doc="<h2>Error</h2>There was an error while connecting to the ";
592 response.doc+="z39.50 server (ie target). Most likely this was a \n";
593 response.doc+="\"Connection Refused\" error.\n";
594
595 }
596 if (0) {
597 err=protocolError;
598 logout << "Some error\n";
599 }
600}
Note: See TracBrowser for help on using the repository browser.