source: trunk/gsdl/src/recpt/z3950proto.cpp@ 10705

Last change on this file since 10705 was 10705, checked in by jrm21, 19 years ago

tidied up interaction between flex/bison and greenstone's z3950 client
classes. renamed parse.y to zparse.y to make it more obvious it's related
to that stuff.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32
33#include "z3950cfg.h" // for reading in config files -
34// defines "struct z3950cfg *zserver_list" as the head of the list.
35
36// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
37// name conflicts with "other" yyins.
38extern FILE *yyin;
39extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
40
41
42
43z3950proto::z3950proto() {
44 zserver_count=0;
45}
46
47z3950proto::~z3950proto() {
48}
49
50void z3950proto::add_server (z3950_proxy& zserver) {
51
52 // append the new server
53 ++zserver_count;
54 zservers.push_back(&zserver);
55}
56
57void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
58 struct z3950cfg *here;
59 struct z3950cfg *oldhere;
60 z3950_proxy *zserver;
61 ShortColInfo_t *tempinfo;
62
63 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
64
65 char *errf_str=errf.getcstr();
66 if ((errfile=fopen(errf_str,"a"))==NULL) {
67 // what do we do if we can't open the error file?
68 // this means that errors will go to stderr, which may stuff up
69 // any cgi headers and the page.
70 errfile=stderr;
71 }
72 delete []errf_str;
73 yyout=errfile;
74
75
76 char *filename_str=filename.getcstr();
77 yyin=fopen(filename_str,"r");
78 if (yyin==NULL) {
79 cerr << "Could not open "<<filename_str<<" for reading.\n";
80 delete []filename_str;
81 return;
82 }
83 delete []filename_str;
84
85 // this is in zparse.tab.c,
86 // which is the bison output of zparse.y
87 yyparse();
88
89 if (errfile!=stderr)
90 fclose(errfile);
91
92 // we now have the config files in the ptr zserver_list
93 if (zserver_list==NULL)
94 return; // no valid servers found in the config file - note that
95 // the parser will have already spat out any errors.
96
97 // now create z3950 proxies for each structure in server_list
98 here=zserver_list;
99 while (here!=NULL) {
100 zserver=new z3950_proxy;
101 tempinfo=new ShortColInfo_t;
102
103 tempinfo->host.setcstr(here->hostname);
104 tempinfo->port=here->port;
105 tempinfo->name.setcstr(here->dbname);
106 zserver->setInfo(tempinfo);
107 zserver->setName(here->shortname);
108 // now collection metadata.
109 zserver->setMeta("collectionname",here->longname);
110 if (here->icon!=NULL)
111 zserver->setMeta("iconcollection",here->icon);
112 if (here->smallicon!=NULL)
113 zserver->setMeta("iconcollectionsmall",here->smallicon);
114
115 /* filterclass *filter = new filterclass ();
116 zserver->add_filter (filter);
117 browsefilterclass *browsefilter = new browsefilterclass();
118 zserver->add_filter (browsefilter);
119 queryfilterclass *queryfilter = new queryfilterclass();
120 zserver->add_filter (queryfilter);
121 */
122
123 // About list
124 if (here->about!=NULL) {
125 struct z3950aboutlist *about_here=here->about;
126 struct z3950aboutlist *oldabout;
127
128 while (about_here!=NULL) {
129 // problem with default lang (null): can't add ("",..)
130 if (about_here->lang==NULL)
131 zserver->addcfgAbout("en",about_here->text);
132 else
133 zserver->addcfgAbout(about_here->lang, about_here->text);
134 oldabout=about_here;
135 about_here=about_here->next;
136 free(oldabout->lang);
137 free(oldabout->text);
138 free(oldabout);
139 }
140 }
141
142 oldhere=here;
143 here=here->next;
144 free(oldhere->shortname); // these 4 strings should all be non-NULL...
145 free(oldhere->hostname);
146 free(oldhere->dbname);
147 free(oldhere->longname);
148 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
149 if (oldhere->smallicon) free(oldhere->smallicon);
150 free(oldhere);
151
152 add_server(*zserver);
153 } // end of while loop.
154
155}
156
157void z3950proto::configure (const text_t &/*key*/,
158 const text_tarray &/*cfgline*/) {
159 // this is called for each line in the gsdlsite.cfg file
160}
161
162
163bool z3950proto::init (ostream &/*logout*/) {
164 // set up tcp connection to server here?
165 // we might also read in the config file here (instead of librarymain.cpp)
166
167 //
168
169 // logout goes to initout.txt
170 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
171 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
172 return true;
173
174}
175
176/*text_t z3950proto::get_protocol_name () {
177 return "z3950proto";
178}
179*/
180
181void z3950proto::get_collection_list (text_tarray &collist,
182 comerror_t &/*err*/,
183 ostream &/*logout*/) {
184
185 z3950_proxy_array::iterator here = zservers.begin();
186 z3950_proxy_array::iterator end = zservers.end();
187 while (here != end) {
188 collist.push_back((*here)->getName());
189 ++here;
190 }
191}
192
193void z3950proto::has_collection (const text_t &collection, bool &hascollection,
194 comerror_t &/*err*/, ostream &/*logout*/) {
195 z3950_proxy_array::iterator here = zservers.begin();
196 z3950_proxy_array::iterator end = zservers.end();
197 while (here != end) {
198 if((*here)->getName()==collection) {
199 hascollection=true;
200 return;
201 }
202 ++here;
203 }
204 hascollection=false;
205}
206
207void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
208 comerror_t &/*err*/, ostream &/*logout*/) {
209 // should we just ping the server, or actually create a connection
210 // to the z39.50 server process on the machine ?
211 wassuccess = true;
212}
213
214void z3950proto::get_collectinfo (const text_t &collection,
215 ColInfoResponse_t &collectinfo,
216 comerror_t &err, ostream &logout) {
217
218 // set err to protocolError if something goes wrong...
219 err=noError;
220
221 z3950_proxy_array::iterator here = zservers.begin();
222 z3950_proxy_array::iterator end = zservers.end();
223 while (here != end) {
224 if((*here)->getName()==collection) {
225 break;
226 }
227 ++here;
228 }
229
230 if (here==end) {
231 err=protocolError;
232 char *coll_str=collection.getcstr();
233 logout << "z39.50: couldn't find collection"
234 << coll_str
235 << endl;
236 delete []coll_str;
237 return;
238 }
239
240 const ShortColInfo_t *colinfo=(*here)->getInfo();
241 collectinfo.shortInfo.name=colinfo->name;
242 collectinfo.shortInfo.host=colinfo->host;
243 collectinfo.shortInfo.port=colinfo->port;
244
245 collectinfo.isPublic=true;
246 // don't use beta field
247 /*collectinfo.isBeta=false;*/
248 collectinfo.buildDate=1;
249 // leave ccsCols empty (no cross-coll. searching - for now)
250 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
251 // This info is available from the config file -- johnmcp
252 /*******collectinfo.languages.push_back("en");
253 collectinfo.languages.push_back("fr");********/
254 collectinfo.numDocs=0;
255 collectinfo.numWords=0;
256 collectinfo.numBytes=0;
257 // copy the text maps over.
258 // collectinfo.collectionmeta; // text_tmap
259 text_tmap collmeta = *((*here)->getMeta());
260 text_tmap::iterator mhere = collmeta.begin();
261 text_tmap::iterator mend = collmeta.end();
262 while (mhere != mend) {
263 collectinfo.collectionmeta[(*mhere).first][g_EmptyText] = (*mhere).second;
264 mhere ++;
265 }
266 //collectinfo.collectionmeta=*((*here)->getMeta());
267 collectinfo.format=*((*here)->getFormat()); //text_tmap
268 /* collectinfo.building; //text_tmap */
269
270 ////collectinfo.receptionist="z3950";
271 /* for now... this is a url, relative to .../cgi-bin.
272 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
273 */
274}
275
276void z3950proto::get_filterinfo (const text_t &/*collection*/,
277 InfoFiltersResponse_t &response,
278 comerror_t &/*err*/, ostream &/*logout*/) {
279 // we'll fake it here, and say we have set up some filters
280 response.filterNames.insert("BrowseFilter");
281 response.filterNames.insert("QueryFilter");
282 response.filterNames.insert("NullFilter");
283
284}
285
286void z3950proto::get_filteroptions (const text_t &/*collection*/,
287 const InfoFilterOptionsRequest_t &/*req*/,
288 InfoFilterOptionsResponse_t &response,
289 comerror_t &err, ostream &/*logout*/) {
290 // for now, assume all servers have the same characteristics
291 /* if (request.filterName=="QueryFilter") { }
292 else if (request.filterName=="BrowseFilter") { }
293 else if (request.filterName=="NullFilter") { } */
294 response.filterOptions["Index"].type=FilterOption_t::stringt;
295 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
296 response.filterOptions["Index"].defaultValue="any";
297 response.filterOptions["Index"].validValues.push_back(".any");
298 response.filterOptions["Index"].validValues.push_back(".title");
299 response.filterOptions["Index"].validValues.push_back(".author");
300 // and maybe ["Language"] option as well?
301 err=noError;
302}
303
304void z3950proto::filter (const text_t &collection,
305 FilterRequest_t &request,
306 FilterResponse_t &response,
307 comerror_t &err, ostream &logout) {
308 // this function is called when:
309 // * creating the title page,(looking for iconcoll* & collectname metadata)
310 // * creating the about page (looking for "Title" metadata)
311 // * doing the query - (note that a request for metadata comes first, then
312 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
313
314 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
315 // For the title page, we should not create a connection to the target
316 // (target means the actual z39.50 server, origin means us), but
317 // for the about page and query pages, we need to get information from the
318 // origin. (eg for the about page, we will print out some info such as ID,
319 // name and version.
320
321 // cerr now goes to errout.txt in etc directory
322 err=noError;
323
324 // get relevant "collection"
325 z3950_proxy_array::iterator zserver = zservers.begin();
326 z3950_proxy_array::iterator zend = zservers.end();
327 while (zserver != zend) {
328 if((*zserver)->getName()==collection) {
329 break;
330 }
331 ++zserver;
332 }
333 // now have collection in zserver.
334
335 ColInfoResponse_t info;
336 ResultDocInfo_t *docInfo;
337
338 // leave response.termInfo empty
339 // response.termInfo.push_back(g_EmptyText); ??????? (should be empty if not req.)
340
341 // See if this is for a query action
342 if (request.filterName=="QueryFilter") {
343 /* Sample OptionValue pairs
344 `StartResults'=`1'
345 `EndResults'=`20'
346 `Term'=`firstword secondword' (term is just whatever the user typed in)
347 `QueryType'=ranked|boolean -> OR|AND
348 //`MatchMode'=`some' => 'OR'
349 //`MatchMode' = `all' => 'AND'
350 `Casefold'=`true'
351 `Stem'=`false'
352 `Maxdocs'=`50'
353 */
354 // go through options
355 text_t opt_term; // the term(s) that the user entered
356 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
357 text_t opt_fields; // which fields to search on
358 int opt_start=1, opt_end=20; // default values
359 int nummatches=0, maxdocs=50; // default values
360 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
361 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
362 while (ov_here != ov_end) {
363 if (ov_here->name=="Term") {
364 opt_term=ov_here->value;
365 } else if (ov_here->name=="QueryType") {
366 if (ov_here->value=="ranked") opt_querytype=1;
367 else if (ov_here->value=="boolean") opt_querytype=2;
368 else { /* error - shouldn't happen */
369 /* currently unhandled */
370 }
371 } else if (ov_here->name=="Index") {
372 opt_fields=ov_here->value;
373 } else if (ov_here->name=="StartResults") {
374 opt_start=ov_here->value.getint();
375 } else if (ov_here->name=="EndResults") {
376 opt_end=ov_here->value.getint();
377 } else if (ov_here->name=="Maxdocs") {
378 maxdocs=ov_here->value.getint();
379 }
380 ++ov_here;
381 }
382 err=noError;
383 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
384 opt_querytype,
385 opt_fields,
386 opt_start, // first to get
387 opt_end-opt_start, //count
388 &nummatches,err);
389 if (err!=noError) {
390 // can we return an err msg in a response, or just use
391 // the more drastic Greenstone error mechanism?
392 docInfo=new ResultDocInfo_t;
393 response.docInfo.push_back(*docInfo);
394 docInfo->metadata["Title"].values.push_back("Error - query err?");
395 logout << "\nz3950 filter query: error connecting to server\n";
396 // for now, DON'T use GSDL protocol err.
397 err=noError;
398 return;
399 }
400 // check if (titles==NULL) - only happens on error?
401 if (nummatches>0) {
402 text_tarray::iterator titles_here=titles->begin();
403 text_tarray::iterator titles_end=titles->end();
404 int counter=1;
405 while (titles_here!=titles_end) {
406 docInfo=new ResultDocInfo_t;
407 docInfo->metadata["Title"].values.push_back(*titles_here);
408 docInfo->result_num=counter;
409 // we need to give some OID, so we'll just use counter for now...
410 // make it the number into the whole possible retrieved set.
411 docInfo->OID=counter+opt_start-1;
412 response.docInfo.push_back(*docInfo);
413 ++counter;
414 ++titles_here;
415 }
416 }
417
418 if (request.filterResultOptions & FRtermFreq) {
419 if (nummatches>maxdocs) {
420 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
421 response.isApprox=MoreThan;
422 } else {
423 response.numDocs=nummatches; // eg "36 documents"
424 response.isApprox=Exact; // Exact | Approximate | MoreThan
425 }
426 } // end of if (... & FRtermFreq)
427
428 } // end of if (... == "QueryFilter")
429 else {
430 // this wasn't a query action
431
432 if (request.filterOptions.size()>0 &&
433 request.filterOptions[0].name=="ParentNode") {
434 // don't want to return anything
435 return;
436 /* } else if (request.docSet.size() &&
437 request.docSet[0]!="collection") {
438 // documentaction
439 // if docSet is not empty, it is either "collection", or an array
440 // of OIDs
441 docInfo=new ResultDocInfo_t;
442 response.docInfo.push_back(*docInfo);
443 */
444 } else {
445 // in case we need to return only metadata
446 docInfo=new ResultDocInfo_t;
447 response.docInfo.push_back(*docInfo);
448 }
449 } // end of not a query action
450
451
452
453 // Fill in metadata for each response.docInfo (if wanted)
454 if (request.filterResultOptions & FRmetadata) {
455 get_collectinfo (collection, info, err, logout);
456 // should check err returned here....
457
458 if (!request.fields.empty()) {
459 // currently, this is only true for NullFilter when getting the "Title"
460 // for a documentaction.
461
462 // loop on each document being returned
463 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
464 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
465 while (docs_here!=docs_end) {
466 // loop on all the metadata fields in request.fields (type text_tset)
467 text_tset::iterator fields_here=request.fields.begin();
468 text_tset::iterator fields_end=request.fields.end();
469 //text_tmap::iterator it;
470 collectionmeta_map::iterator it;
471 while (fields_here!=fields_end) {
472 it=info.collectionmeta.find(*fields_here);
473 if (it!=info.collectionmeta.end())
474 docs_here->metadata[*fields_here].values.push_back(((*it).second)[g_EmptyText]);
475 else if (*fields_here=="Title" && !request.docSet.empty()) {
476 // We only do this for a document action.
477 // (This comes through as a NullQuery).
478 // hopefully docSet is only not empty for documentaction...
479 text_t doctitle;
480 int i;
481 // check that docSet isn't empty first!!!!!!
482 i=request.docSet[0].getint();
483 text_t doctext="unneeded";
484 /* following variables aren't used, as our query result has been
485 cached in z3950proxy.cpp (but really we shouldn't know that
486 here...) But for the NullFilter, we don't get given these
487 again in the request, so for now we'll take advantage of this.*/
488
489 int querytype=0; text_t field=g_EmptyText;
490
491 // get the Query out of the filterOptions. (we need get the Title)
492 text_t query=g_EmptyText;
493 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
494 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
495 while (opthere!=opt_end) {
496 if (opthere->name=="Term") {
497 query=opthere->value;
498 } else if (opthere->name=="Index") {
499 field=opthere->value;
500 } else if (opthere->name=="QueryType") {
501 if (opthere->value=="ranked") querytype=1;
502 else if (opthere->value=="boolean") querytype=2;
503 else { /* error - shouldn't happen */
504 /* currently unhandled */
505 }
506 }
507 ++opthere;
508 }
509 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
510 // check err value!
511 docs_here->metadata["Title"].values.push_back(doctitle);
512 } else {
513 docs_here->metadata[*fields_here].values.push_back(g_EmptyText);
514 }
515 ++fields_here;
516 } // end of inner while loop
517 ++docs_here;
518 } // end of outer while loop
519 } // end of if (!request.fields.empty())
520
521 else { // request.fields empty: return all metadata for about page or query
522 // we'll only put it in the first docInfo.
523 collectionmeta_map::iterator colmeta_here=info.collectionmeta.begin();
524 collectionmeta_map::iterator colmeta_end=info.collectionmeta.end();
525 while (colmeta_here!=colmeta_end) {
526 response.docInfo[0].metadata[(*colmeta_here).first].
527 values.push_back(((*colmeta_here).second)[g_EmptyText]);
528 ++colmeta_here;
529 }
530
531 // check if "collectionextra" metadata is set. If it isn't, we should
532 // create connection to target to get it.
533 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
534 // it hasn't been set yet...
535 text_t abouttext="<B>Server Online</B><br>\n";
536 abouttext+=(*zserver)->getzAbout();
537 // add in the "About" text we read in from config file.
538 // how do we incorporate multi-lingual metadata?
539 abouttext+="<P>\n";
540 text_t tmpabout;
541
542 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
543 abouttext+=tmpabout;
544
545 (*zserver)->setMeta("collectionextra",abouttext);
546 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
547 }
548 } // end of else
549
550 // do indices' names, regardless of whether asked for or not...
551 if (!response.docInfo.empty()) {
552 response.docInfo[0].metadata[".author"].values.push_back("author fields");
553 response.docInfo[0].metadata[".title"].values.push_back("title fields");
554 response.docInfo[0].metadata[".any"].values.push_back("any fields");
555 }
556 } //end of if (... & FRmetadata) ...
557}
558
559
560
561void z3950proto::get_document (const text_t &collection,
562 const DocumentRequest_t &request,
563 DocumentResponse_t &response,
564 comerror_t &err, ostream &logout) {
565
566 err=noError;
567
568 // get relevant "collection"
569 z3950_proxy_array::iterator zserver = zservers.begin();
570 z3950_proxy_array::iterator zend = zservers.end();
571 while (zserver != zend) {
572 if((*zserver)->getName()==collection) {
573 break;
574 }
575 ++zserver;
576 }
577 // now have collection in zserver.
578
579 /* docresponse consists of
580 text_t response.doc */
581 text_t title="unneeded";
582 text_t doctext;
583 text_t query; // this should not be needed, as we have already connected to
584 // get the title....
585 int querytype = 1; //ditto...
586 text_t field; // ditto...
587 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
588 title,doctext,err);
589 // check return value of above? (false=>not connected)
590 if (err==noError)
591 response.doc=doctext;
592 else {
593 // could print out different messages based on error type....
594 response.doc="<h2>Error</h2>There was an error while connecting to the ";
595 response.doc+="z39.50 server (ie target). Most likely this was a \n";
596 response.doc+="\"Connection Refused\" error.\n";
597
598 }
599 if (0) {
600 err=protocolError;
601 logout << "Some error\n";
602 }
603}
604
605// sets issearchable to true if the given colection is searchable
606void z3950proto::is_searchable (const text_t &/*collection*/, bool &issearchable,
607 comerror_t &err, ostream &/*logout*/) {
608 issearchable = true; // assume all collections are searchable?
609 err = noError;
610}
611
Note: See TracBrowser for help on using the repository browser.