source: trunk/gsdl/src/recpt/z3950proto.cpp@ 1642

Last change on this file since 1642 was 1642, checked in by jrm21, 24 years ago

Redirect flex's (parse.fl) "FILE *yyout" to go to the error file, so
unmatched tokens go there, instead of into the HTTP headers (oops...).
The only unmatched tokens known are non-alphanum symbols - for example,
the "<<<<<<<<<<<<<<" printed out in a CVS conflict..... (thanks Gordon!)

  • Property svn:keywords set to Author Date Id Revision
File size: 19.0 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36
37// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
38// name conflicts with "other" yyins.
39extern FILE *yyin;
40extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
41
42extern "C" {
43 extern int zconfigparse();
44}
45
46
47
48
49z3950proto::z3950proto() {
50 zserver_count=0;
51}
52
53z3950proto::~z3950proto() {
54}
55
56void z3950proto::add_server (z3950_server& zserver) {
57
58 // append the new server
59 zserver_count++;
60 zservers.push_back(&zserver);
61}
62
63void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
64 struct z3950cfg *here;
65 struct z3950cfg *oldhere;
66 z3950_server *zserver;
67 ShortColInfo_t *tempinfo;
68
69 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
70
71 if ((errfile=fopen(errf.getcstr(),"a"))==NULL) {
72 // what do we do if we can't open the error file?
73 // this means that errors will go to stderr, which may stuff up
74 // any cgi headers and the page.
75 errfile=stderr;
76 }
77 yyout=errfile;
78
79 // zconfigparse() is defined in zparse.tab.c,
80 // which is the bison output of zparse.y
81
82 yyin=fopen(filename.getcstr(),"r");
83 if (yyin==NULL) {
84 cerr << "Could not open "<<filename.getcstr()<<" for reading.\n";
85 return;
86 }
87 zconfigparse();
88
89 if (errfile!=stderr)
90 fclose(errfile);
91
92 // we now have the config files in the ptr zserver_list
93 if (zserver_list==NULL)
94 return; // no valid servers found in the config file - note that
95 // the parser will have already spat out any errors.
96
97 // now create z3950servers for each structure in server_list
98 here=zserver_list;
99 while (here!=NULL) {
100 zserver=new z3950_server;
101 tempinfo=new ShortColInfo_t;
102
103 tempinfo->host.setcstr(here->hostname);
104 tempinfo->port=here->port;
105 tempinfo->name.setcstr(here->dbname);
106 zserver->setInfo(tempinfo);
107 zserver->setName(here->shortname);
108 // now collection metadata.
109 zserver->setMeta("collectionname",here->longname);
110 if (here->icon!=NULL)
111 zserver->setMeta("iconcollection",here->icon);
112 if (here->smallicon!=NULL)
113 zserver->setMeta("iconcollectionsmall",here->smallicon);
114
115 /* filterclass *filter = new filterclass ();
116 zserver->add_filter (filter);
117 browsefilterclass *browsefilter = new browsefilterclass();
118 zserver->add_filter (browsefilter);
119 queryfilterclass *queryfilter = new queryfilterclass();
120 zserver->add_filter (queryfilter);
121 */
122
123 // About list
124 if (here->about!=NULL) {
125 struct z3950aboutlist *about_here=here->about;
126 struct z3950aboutlist *oldabout;
127
128 while (about_here!=NULL) {
129 // problem with default lang (null): can't add ("",..)
130 if (about_here->lang==NULL)
131 zserver->addcfgAbout("en",about_here->text);
132 else
133 zserver->addcfgAbout(about_here->lang, about_here->text);
134 oldabout=about_here;
135 about_here=about_here->next;
136 free(oldabout->lang);
137 free(oldabout->text);
138 free(oldabout);
139 }
140 }
141
142 oldhere=here;
143 here=here->next;
144 free(oldhere->shortname); // these 4 strings should all be non-NULL...
145 free(oldhere->hostname);
146 free(oldhere->dbname);
147 free(oldhere->longname);
148 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
149 if (oldhere->smallicon) free(oldhere->smallicon);
150 free(oldhere);
151
152 add_server(*zserver);
153 } // end of while loop.
154
155}
156
157void z3950proto::configure (const text_t &/*key*/,
158 const text_tarray &/*cfgline*/) {
159 // this is called for each line in the gsdlsite.cfg file
160}
161
162
163bool z3950proto::init (ostream &/*logout*/) {
164 // set up tcp connection to server here?
165 // we might also read in the config file here (instead of librarymain.cpp)
166
167 //
168
169 // logout goes to initout.txt
170 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
171 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
172 return true;
173
174}
175
176/*text_t z3950proto::get_protocol_name () {
177 return "z3950proto";
178}
179*/
180
181void z3950proto::get_collection_list (text_tarray &collist,
182 comerror_t &/*err*/,
183 ostream &/*logout*/) {
184
185 z3950_server_array::iterator here = zservers.begin();
186 z3950_server_array::iterator end = zservers.end();
187 while (here != end) {
188 collist.push_back((*here)->getName());
189 here++;
190 }
191}
192
193void z3950proto::has_collection (const text_t &collection, bool &hascollection,
194 comerror_t &/*err*/, ostream &/*logout*/) {
195 z3950_server_array::iterator here = zservers.begin();
196 z3950_server_array::iterator end = zservers.end();
197 while (here != end) {
198 if((*here)->getName()==collection) {
199 hascollection=true;
200 return;
201 }
202 here++;
203 }
204 hascollection=false;
205}
206
207void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
208 comerror_t &/*err*/, ostream &/*logout*/) {
209 // should we just ping the server, or actually create a connection
210 // to the z39.50 server process on the machine ?
211 wassuccess = true;
212}
213
214void z3950proto::get_collectinfo (const text_t &collection,
215 ColInfoResponse_t &collectinfo,
216 comerror_t &err, ostream &logout) {
217
218 // set err to protocolError if something goes wrong...
219 err=noError;
220
221 z3950_server_array::iterator here = zservers.begin();
222 z3950_server_array::iterator end = zservers.end();
223 while (here != end) {
224 if((*here)->getName()==collection) {
225 break;
226 }
227 here++;
228 }
229
230 if (here==end) {
231 err=protocolError;
232 logout << "z39.50: couldn't find collection"
233 << collection.getcstr()
234 << endl;
235 return;
236 }
237
238 const ShortColInfo_t *colinfo=(*here)->getInfo();
239 collectinfo.shortInfo.name=colinfo->name;
240 collectinfo.shortInfo.host=colinfo->host;
241 collectinfo.shortInfo.port=colinfo->port;
242
243 collectinfo.isPublic=true;
244 // don't use beta field
245 /*collectinfo.isBeta=false;*/
246 collectinfo.buildDate=1;
247 // leave ccsCols empty (no cross-coll. searching - for now)
248 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
249 // This info is available from the config file -- johnmcp
250 /*******collectinfo.languages.push_back("en");
251 collectinfo.languages.push_back("fr");********/
252 collectinfo.numDocs=0;
253 collectinfo.numWords=0;
254 collectinfo.numBytes=0;
255 // copy the text maps over.
256 // collectinfo.collectionmeta; // text_tmap
257 collectinfo.collectionmeta=*((*here)->getMeta());
258 collectinfo.format=*((*here)->getFormat()); //text_tmap
259 /* collectinfo.building; //text_tmap */
260
261 ////collectinfo.receptionist="z3950";
262 /* for now... this is a url, relative to .../cgi-bin.
263 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
264 */
265}
266
267void z3950proto::get_filterinfo (const text_t &/*collection*/,
268 InfoFiltersResponse_t &response,
269 comerror_t &/*err*/, ostream &/*logout*/) {
270 // we'll fake it here, and say we have set up some filters
271 response.filterNames.insert("BrowseFilter");
272 response.filterNames.insert("QueryFilter");
273 response.filterNames.insert("NullFilter");
274
275}
276
277void z3950proto::get_filteroptions (const text_t &/*collection*/,
278 const InfoFilterOptionsRequest_t &/*req*/,
279 InfoFilterOptionsResponse_t &response,
280 comerror_t &err, ostream &/*logout*/) {
281 // for now, assume all servers have the same characteristics
282 /* if (request.filterName=="QueryFilter") { }
283 else if (request.filterName=="BrowseFilter") { }
284 else if (request.filterName=="NullFilter") { } */
285 response.filterOptions["Index"].type=FilterOption_t::stringt;
286 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
287 response.filterOptions["Index"].defaultValue="any";
288 response.filterOptions["Index"].validValues.push_back(".any");
289 response.filterOptions["Index"].validValues.push_back(".title");
290 response.filterOptions["Index"].validValues.push_back(".author");
291 // and maybe ["Language"] option as well?
292 err=noError;
293}
294
295void z3950proto::filter (const text_t &collection,
296 FilterRequest_t &request,
297 FilterResponse_t &response,
298 comerror_t &err, ostream &logout) {
299 // this function is called when:
300 // * creating the title page,(looking for iconcoll* & collectname metadata)
301 // * creating the about page (looking for "Title" metadata)
302 // * doing the query - (note that a request for metadata comes first, then
303 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
304
305 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
306 // For the title page, we should not create a connection to the target
307 // (target means the actual z39.50 server, origin means us), but
308 // for the about page and query pages, we need to get information from the
309 // origin. (eg for the about page, we will print out some info such as ID,
310 // name and version.
311
312 // cerr now goes to errout.txt in etc directory
313 err=noError;
314
315 // get relevant "collection"
316 z3950_server_array::iterator zserver = zservers.begin();
317 z3950_server_array::iterator zend = zservers.end();
318 while (zserver != zend) {
319 if((*zserver)->getName()==collection) {
320 break;
321 }
322 zserver++;
323 }
324 // now have collection in zserver.
325
326 ColInfoResponse_t info;
327 ResultDocInfo_t *docInfo;
328
329 // leave response.termInfo empty
330 // response.termInfo.push_back(""); ??????? (should be empty if not req.)
331
332 // See if this is for a query action
333 if (request.filterName=="QueryFilter") {
334 /* Sample OptionValue pairs
335 `StartResults'=`1'
336 `EndResults'=`20'
337 `Term'=`firstword secondword' (term is just whatever the user typed in)
338 `QueryType'=`ranked' => 'OR' (cgiarg t=1)
339 `QueryType' = `boolean' => 'AND' (cgiarg t=0)
340 `Casefold'=`true'
341 `Stem'=`false'
342 `Maxdocs'=`50'
343 */
344 // go through options
345 text_t opt_term; // the term(s) that the user entered
346 text_t opt_fields; // which fields to search on
347 int opt_start=1, opt_end=20; // default values
348 int nummatches=0, maxdocs=50; // default values
349 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
350 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
351 while (ov_here != ov_end) {
352 // cout << "OV pair: `" << ov_here->name.getcstr() << "'=`"
353 // << ov_here->value.getcstr() << "'\n";
354 if (ov_here->name=="Term")
355 {
356 opt_term=ov_here->value;
357 } else if (ov_here->name=="Index")
358 {
359 opt_fields=ov_here->value;
360 } else if (ov_here->name=="StartResults")
361 {
362 opt_start=ov_here->value.getint();
363 } else if (ov_here->name=="EndResults")
364 {
365 opt_end=ov_here->value.getint();
366 } else if (ov_here->name=="Maxdocs")
367 {
368 maxdocs=ov_here->value.getint();
369 }
370 ov_here++;
371 }
372 err=noError;
373 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
374 opt_fields,
375 opt_start, // first to get
376 opt_end-opt_start, //count
377 &nummatches,err);
378 if (err!=noError) {
379 // can we return an err msg in a response, or just use
380 // the more drastic Greenstone error mechanism?
381 docInfo=new ResultDocInfo_t;
382 response.docInfo.push_back(*docInfo);
383 docInfo->metadata["Title"].values.push_back("Error - query err?");
384 logout << "\nz3950 filter query: error connecting to server\n";
385 // for now, DON'T use GSDL protocol err.
386 err=noError;
387 return;
388 }
389 // check if (titles==NULL) - only happens on error?
390 if (nummatches>0) {
391 text_tarray::iterator titles_here=titles->begin();
392 text_tarray::iterator titles_end=titles->end();
393 int counter=1;
394 while (titles_here!=titles_end) {
395 docInfo=new ResultDocInfo_t;
396 docInfo->metadata["Title"].values.push_back(*titles_here);
397 docInfo->result_num=counter;
398 // we need to give some OID, so we'll just use counter for now...
399 // make it the number into the whole possible retrieved set.
400 docInfo->OID=counter+opt_start-1;
401 response.docInfo.push_back(*docInfo);
402 counter++;
403 titles_here++;
404 }
405 }
406
407 if (request.filterResultOptions & FRtermFreq) {
408 if (nummatches>maxdocs) {
409 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
410 response.isApprox=MoreThan;
411 } else {
412 response.numDocs=nummatches; // eg "36 documents"
413 response.isApprox=Exact; // Exact | Approximate | MoreThan
414 }
415 } // end of if (... & FRtermFreq)
416
417 } // end of if (... & FROID)
418 else {
419 // this wasn't a query action
420
421 if (request.filterOptions.size()>0 &&
422 request.filterOptions[0].name=="ParentNode") {
423 // don't want to return anything
424 return;
425 /* } else if (request.docSet.size() &&
426 request.docSet[0]!="collection") {
427 // documentaction
428 // if docSet is not empty, it is either "collection", or an array
429 // of OIDs
430 docInfo=new ResultDocInfo_t;
431 response.docInfo.push_back(*docInfo);
432 */
433 } else {
434 // in case we need to return only metadata
435 docInfo=new ResultDocInfo_t;
436 response.docInfo.push_back(*docInfo);
437 }
438 }
439
440 // Fill in metadata for each response.docInfo (if wanted)
441 if (request.filterResultOptions & FRmetadata) {
442 get_collectinfo (collection, info, err, logout);
443 // should check err returned here....
444
445 // get the Query out of the filterOptions.
446 text_t query="";
447 text_t field="";
448 OptionValue_tarray::iterator opt_here=request.filterOptions.begin();
449 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
450 while (opt_here!=opt_end) {
451 if (opt_here->name=="Query") {
452 query=opt_here->value;
453 if (field!="") break; // break from loop if we've got both
454 } else if (opt_here->name=="Index") {
455 field=opt_here->value;
456 if (query!="") break; // break from loop if we've got both
457 }
458 opt_here++;
459 }
460
461 if (!request.fields.empty()) {
462 // loop on each document being returned
463 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
464 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
465 while (docs_here!=docs_end) {
466
467 // loop on all the metadata fields in request.fields (type text_tset)
468 text_tset::iterator fields_here=request.fields.begin();
469 text_tset::iterator fields_end=request.fields.end();
470 text_tmap::iterator it;
471 while (fields_here!=fields_end) {
472 it=info.collectionmeta.find(*fields_here);
473 if (it!=info.collectionmeta.end())
474 docs_here->metadata[*fields_here].values.push_back((*it).second);
475 else if (*fields_here=="Title" && !request.docSet.empty()) {
476 // We only do this for a document action.
477 // (This comes through as a NullQuery).
478 // hopefully docSet is only not empty for documentaction...
479 text_t doctitle;
480 int i;
481 // check that docSet isn't empty first!!!!!!
482 i=request.docSet[0].getint();
483 text_t doctext="unneeded";
484 (*zserver)->getfullrecord(query, field, i, doctitle, doctext, err);
485 // check err value!
486 docs_here->metadata["Title"].values.push_back(doctitle);
487 } else {
488 docs_here->metadata[*fields_here].values.push_back("");
489 /////// cerr << " (not found)";
490 }
491 fields_here++;
492 } // end of inner while loop
493 docs_here++;
494 } // end of outer while loop
495 } // end of if (!request.fields.empty())
496
497 else { // request.fields empty: return all metadata for about page or query
498 // we'll only put it in the first docInfo.
499 text_tmap::iterator colmeta_here=info.collectionmeta.begin();
500 text_tmap::iterator colmeta_end=info.collectionmeta.end();
501 while (colmeta_here!=colmeta_end) {
502 response.docInfo[0].metadata[(*colmeta_here).first].
503 values.push_back((*colmeta_here).second);
504 colmeta_here++;
505 }
506
507 // check if "collectionextra" metadata is set. If it isn't, we should
508 // create connection to target to get it.
509 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
510 // it hasn't been set yet...
511 text_t abouttext="<B>Server Online</B><br>\n";
512 abouttext+=(*zserver)->getzAbout();
513 // add in the "About" text we read in from config file.
514 // how do we incorporate multi-lingual metadata?
515 abouttext+="<P>\n";
516 text_t tmpabout;
517
518 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
519 abouttext+=tmpabout;
520
521 (*zserver)->setMeta("collectionextra",abouttext);
522 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
523 }
524 } // end of else
525
526 // do indices' names, regardless of whether asked for or not...
527 if (!response.docInfo.empty()) {
528 response.docInfo[0].metadata[".author"].values.push_back("author fields");
529 response.docInfo[0].metadata[".title"].values.push_back("title fields");
530 response.docInfo[0].metadata[".any"].values.push_back("any fields");
531 }
532 } //end of if (... & FRmetadata) ...
533}
534
535
536
537void z3950proto::get_document (const text_t &collection,
538 const DocumentRequest_t &request,
539 DocumentResponse_t &response,
540 comerror_t &err, ostream &logout) {
541
542 err=noError;
543
544 // get relevant "collection"
545 z3950_server_array::iterator zserver = zservers.begin();
546 z3950_server_array::iterator zend = zservers.end();
547 while (zserver != zend) {
548 if((*zserver)->getName()==collection) {
549 break;
550 }
551 zserver++;
552 }
553 // now have collection in zserver.
554
555 /* cout << "get document:\n\tOID: " << request.OID.getcstr()
556 << "\n\tdocType: " << request.docType.getcstr()
557 << "\n\tdocFormat: " << request.docFormat.getcstr() <<"\n";
558 */
559
560 /* docresponse consists of
561 text_t response.doc */
562 text_t title="unneeded";
563 text_t doctext;
564 text_t query; // this should not be needed, as we have already connected to
565 // get the title....
566 text_t field; // ditto...
567 (*zserver)->getfullrecord(query,field,request.OID.getint(),
568 title,doctext,err);
569 // check return value of above? (false=>not connected)
570 if (err==noError)
571 response.doc=doctext;
572 else {
573 // could print out different messages based on error type....
574 response.doc="<h2>Error</h2>There was an error while connecting to the ";
575 response.doc+="z39.50 server (ie target). Most likely this was a \n";
576 response.doc+="\"Connection Refused\" error.\n";
577
578 }
579 if (0) {
580 err=protocolError;
581 logout << "Some error\n";
582 }
583}
Note: See TracBrowser for help on using the repository browser.