source: trunk/gsdl/src/recpt/z3950proto.cpp@ 10440

Last change on this file since 10440 was 10440, checked in by kjdon, 19 years ago

small changes so it compiles under windows

  • Property svn:keywords set to Author Date Id Revision
File size: 20.4 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34extern "C" {
35#include "z3950cfg.h" // for reading in config files -
36// defines "struct z3950cfg *zserver_list" as the head of the list.
37}
38// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
39// name conflicts with "other" yyins.
40//extern "C" {
41FILE *yyin;
42FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
43//}
44extern "C" {
45 extern int zconfigparse();
46}
47
48
49
50
51z3950proto::z3950proto() {
52 zserver_count=0;
53}
54
55z3950proto::~z3950proto() {
56}
57
58void z3950proto::add_server (z3950_proxy& zserver) {
59
60 // append the new server
61 ++zserver_count;
62 zservers.push_back(&zserver);
63}
64
65void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
66 struct z3950cfg *here;
67 struct z3950cfg *oldhere;
68 z3950_proxy *zserver;
69 ShortColInfo_t *tempinfo;
70
71 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
72
73 char *errf_str=errf.getcstr();
74 if ((errfile=fopen(errf_str,"a"))==NULL) {
75 // what do we do if we can't open the error file?
76 // this means that errors will go to stderr, which may stuff up
77 // any cgi headers and the page.
78 errfile=stderr;
79 }
80 delete []errf_str;
81 yyout=errfile;
82
83 // zconfigparse() is defined in zparse.tab.c,
84 // which is the bison output of zparse.y
85
86 char *filename_str=filename.getcstr();
87 yyin=fopen(filename_str,"r");
88 if (yyin==NULL) {
89 cerr << "Could not open "<<filename_str<<" for reading.\n";
90 delete []filename_str;
91 return;
92 }
93 delete []filename_str;
94 zconfigparse();
95
96 if (errfile!=stderr)
97 fclose(errfile);
98
99 // we now have the config files in the ptr zserver_list
100 if (zserver_list==NULL)
101 return; // no valid servers found in the config file - note that
102 // the parser will have already spat out any errors.
103
104 // now create z3950 proxies for each structure in server_list
105 here=zserver_list;
106 while (here!=NULL) {
107 zserver=new z3950_proxy;
108 tempinfo=new ShortColInfo_t;
109
110 tempinfo->host.setcstr(here->hostname);
111 tempinfo->port=here->port;
112 tempinfo->name.setcstr(here->dbname);
113 zserver->setInfo(tempinfo);
114 zserver->setName(here->shortname);
115 // now collection metadata.
116 zserver->setMeta("collectionname",here->longname);
117 if (here->icon!=NULL)
118 zserver->setMeta("iconcollection",here->icon);
119 if (here->smallicon!=NULL)
120 zserver->setMeta("iconcollectionsmall",here->smallicon);
121
122 /* filterclass *filter = new filterclass ();
123 zserver->add_filter (filter);
124 browsefilterclass *browsefilter = new browsefilterclass();
125 zserver->add_filter (browsefilter);
126 queryfilterclass *queryfilter = new queryfilterclass();
127 zserver->add_filter (queryfilter);
128 */
129
130 // About list
131 if (here->about!=NULL) {
132 struct z3950aboutlist *about_here=here->about;
133 struct z3950aboutlist *oldabout;
134
135 while (about_here!=NULL) {
136 // problem with default lang (null): can't add ("",..)
137 if (about_here->lang==NULL)
138 zserver->addcfgAbout("en",about_here->text);
139 else
140 zserver->addcfgAbout(about_here->lang, about_here->text);
141 oldabout=about_here;
142 about_here=about_here->next;
143 free(oldabout->lang);
144 free(oldabout->text);
145 free(oldabout);
146 }
147 }
148
149 oldhere=here;
150 here=here->next;
151 free(oldhere->shortname); // these 4 strings should all be non-NULL...
152 free(oldhere->hostname);
153 free(oldhere->dbname);
154 free(oldhere->longname);
155 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
156 if (oldhere->smallicon) free(oldhere->smallicon);
157 free(oldhere);
158
159 add_server(*zserver);
160 } // end of while loop.
161
162}
163
164void z3950proto::configure (const text_t &/*key*/,
165 const text_tarray &/*cfgline*/) {
166 // this is called for each line in the gsdlsite.cfg file
167}
168
169
170bool z3950proto::init (ostream &/*logout*/) {
171 // set up tcp connection to server here?
172 // we might also read in the config file here (instead of librarymain.cpp)
173
174 //
175
176 // logout goes to initout.txt
177 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
178 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
179 return true;
180
181}
182
183/*text_t z3950proto::get_protocol_name () {
184 return "z3950proto";
185}
186*/
187
188void z3950proto::get_collection_list (text_tarray &collist,
189 comerror_t &/*err*/,
190 ostream &/*logout*/) {
191
192 z3950_proxy_array::iterator here = zservers.begin();
193 z3950_proxy_array::iterator end = zservers.end();
194 while (here != end) {
195 collist.push_back((*here)->getName());
196 ++here;
197 }
198}
199
200void z3950proto::has_collection (const text_t &collection, bool &hascollection,
201 comerror_t &/*err*/, ostream &/*logout*/) {
202 z3950_proxy_array::iterator here = zservers.begin();
203 z3950_proxy_array::iterator end = zservers.end();
204 while (here != end) {
205 if((*here)->getName()==collection) {
206 hascollection=true;
207 return;
208 }
209 ++here;
210 }
211 hascollection=false;
212}
213
214void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
215 comerror_t &/*err*/, ostream &/*logout*/) {
216 // should we just ping the server, or actually create a connection
217 // to the z39.50 server process on the machine ?
218 wassuccess = true;
219}
220
221void z3950proto::get_collectinfo (const text_t &collection,
222 ColInfoResponse_t &collectinfo,
223 comerror_t &err, ostream &logout) {
224
225 // set err to protocolError if something goes wrong...
226 err=noError;
227
228 z3950_proxy_array::iterator here = zservers.begin();
229 z3950_proxy_array::iterator end = zservers.end();
230 while (here != end) {
231 if((*here)->getName()==collection) {
232 break;
233 }
234 ++here;
235 }
236
237 if (here==end) {
238 err=protocolError;
239 char *coll_str=collection.getcstr();
240 logout << "z39.50: couldn't find collection"
241 << coll_str
242 << endl;
243 delete []coll_str;
244 return;
245 }
246
247 const ShortColInfo_t *colinfo=(*here)->getInfo();
248 collectinfo.shortInfo.name=colinfo->name;
249 collectinfo.shortInfo.host=colinfo->host;
250 collectinfo.shortInfo.port=colinfo->port;
251
252 collectinfo.isPublic=true;
253 // don't use beta field
254 /*collectinfo.isBeta=false;*/
255 collectinfo.buildDate=1;
256 // leave ccsCols empty (no cross-coll. searching - for now)
257 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
258 // This info is available from the config file -- johnmcp
259 /*******collectinfo.languages.push_back("en");
260 collectinfo.languages.push_back("fr");********/
261 collectinfo.numDocs=0;
262 collectinfo.numWords=0;
263 collectinfo.numBytes=0;
264 // copy the text maps over.
265 // collectinfo.collectionmeta; // text_tmap
266 text_tmap collmeta = *((*here)->getMeta());
267 text_tmap::iterator mhere = collmeta.begin();
268 text_tmap::iterator mend = collmeta.end();
269 while (mhere != mend) {
270 collectinfo.collectionmeta[(*mhere).first][g_EmptyText] = (*mhere).second;
271 mhere ++;
272 }
273 //collectinfo.collectionmeta=*((*here)->getMeta());
274 collectinfo.format=*((*here)->getFormat()); //text_tmap
275 /* collectinfo.building; //text_tmap */
276
277 ////collectinfo.receptionist="z3950";
278 /* for now... this is a url, relative to .../cgi-bin.
279 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
280 */
281}
282
283void z3950proto::get_filterinfo (const text_t &/*collection*/,
284 InfoFiltersResponse_t &response,
285 comerror_t &/*err*/, ostream &/*logout*/) {
286 // we'll fake it here, and say we have set up some filters
287 response.filterNames.insert("BrowseFilter");
288 response.filterNames.insert("QueryFilter");
289 response.filterNames.insert("NullFilter");
290
291}
292
293void z3950proto::get_filteroptions (const text_t &/*collection*/,
294 const InfoFilterOptionsRequest_t &/*req*/,
295 InfoFilterOptionsResponse_t &response,
296 comerror_t &err, ostream &/*logout*/) {
297 // for now, assume all servers have the same characteristics
298 /* if (request.filterName=="QueryFilter") { }
299 else if (request.filterName=="BrowseFilter") { }
300 else if (request.filterName=="NullFilter") { } */
301 response.filterOptions["Index"].type=FilterOption_t::stringt;
302 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
303 response.filterOptions["Index"].defaultValue="any";
304 response.filterOptions["Index"].validValues.push_back(".any");
305 response.filterOptions["Index"].validValues.push_back(".title");
306 response.filterOptions["Index"].validValues.push_back(".author");
307 // and maybe ["Language"] option as well?
308 err=noError;
309}
310
311void z3950proto::filter (const text_t &collection,
312 FilterRequest_t &request,
313 FilterResponse_t &response,
314 comerror_t &err, ostream &logout) {
315 // this function is called when:
316 // * creating the title page,(looking for iconcoll* & collectname metadata)
317 // * creating the about page (looking for "Title" metadata)
318 // * doing the query - (note that a request for metadata comes first, then
319 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
320
321 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
322 // For the title page, we should not create a connection to the target
323 // (target means the actual z39.50 server, origin means us), but
324 // for the about page and query pages, we need to get information from the
325 // origin. (eg for the about page, we will print out some info such as ID,
326 // name and version.
327
328 // cerr now goes to errout.txt in etc directory
329 err=noError;
330
331 // get relevant "collection"
332 z3950_proxy_array::iterator zserver = zservers.begin();
333 z3950_proxy_array::iterator zend = zservers.end();
334 while (zserver != zend) {
335 if((*zserver)->getName()==collection) {
336 break;
337 }
338 ++zserver;
339 }
340 // now have collection in zserver.
341
342 ColInfoResponse_t info;
343 ResultDocInfo_t *docInfo;
344
345 // leave response.termInfo empty
346 // response.termInfo.push_back(g_EmptyText); ??????? (should be empty if not req.)
347
348 // See if this is for a query action
349 if (request.filterName=="QueryFilter") {
350 /* Sample OptionValue pairs
351 `StartResults'=`1'
352 `EndResults'=`20'
353 `Term'=`firstword secondword' (term is just whatever the user typed in)
354 `QueryType'=ranked|boolean -> OR|AND
355 //`MatchMode'=`some' => 'OR'
356 //`MatchMode' = `all' => 'AND'
357 `Casefold'=`true'
358 `Stem'=`false'
359 `Maxdocs'=`50'
360 */
361 // go through options
362 text_t opt_term; // the term(s) that the user entered
363 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
364 text_t opt_fields; // which fields to search on
365 int opt_start=1, opt_end=20; // default values
366 int nummatches=0, maxdocs=50; // default values
367 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
368 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
369 while (ov_here != ov_end) {
370 if (ov_here->name=="Term") {
371 opt_term=ov_here->value;
372 } else if (ov_here->name=="QueryType") {
373 if (ov_here->value=="ranked") opt_querytype=1;
374 else if (ov_here->value=="boolean") opt_querytype=2;
375 else { /* error - shouldn't happen */
376 /* currently unhandled */
377 }
378 } else if (ov_here->name=="Index") {
379 opt_fields=ov_here->value;
380 } else if (ov_here->name=="StartResults") {
381 opt_start=ov_here->value.getint();
382 } else if (ov_here->name=="EndResults") {
383 opt_end=ov_here->value.getint();
384 } else if (ov_here->name=="Maxdocs") {
385 maxdocs=ov_here->value.getint();
386 }
387 ++ov_here;
388 }
389 err=noError;
390 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
391 opt_querytype,
392 opt_fields,
393 opt_start, // first to get
394 opt_end-opt_start, //count
395 &nummatches,err);
396 if (err!=noError) {
397 // can we return an err msg in a response, or just use
398 // the more drastic Greenstone error mechanism?
399 docInfo=new ResultDocInfo_t;
400 response.docInfo.push_back(*docInfo);
401 docInfo->metadata["Title"].values.push_back("Error - query err?");
402 logout << "\nz3950 filter query: error connecting to server\n";
403 // for now, DON'T use GSDL protocol err.
404 err=noError;
405 return;
406 }
407 // check if (titles==NULL) - only happens on error?
408 if (nummatches>0) {
409 text_tarray::iterator titles_here=titles->begin();
410 text_tarray::iterator titles_end=titles->end();
411 int counter=1;
412 while (titles_here!=titles_end) {
413 docInfo=new ResultDocInfo_t;
414 docInfo->metadata["Title"].values.push_back(*titles_here);
415 docInfo->result_num=counter;
416 // we need to give some OID, so we'll just use counter for now...
417 // make it the number into the whole possible retrieved set.
418 docInfo->OID=counter+opt_start-1;
419 response.docInfo.push_back(*docInfo);
420 ++counter;
421 ++titles_here;
422 }
423 }
424
425 if (request.filterResultOptions & FRtermFreq) {
426 if (nummatches>maxdocs) {
427 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
428 response.isApprox=MoreThan;
429 } else {
430 response.numDocs=nummatches; // eg "36 documents"
431 response.isApprox=Exact; // Exact | Approximate | MoreThan
432 }
433 } // end of if (... & FRtermFreq)
434
435 } // end of if (... == "QueryFilter")
436 else {
437 // this wasn't a query action
438
439 if (request.filterOptions.size()>0 &&
440 request.filterOptions[0].name=="ParentNode") {
441 // don't want to return anything
442 return;
443 /* } else if (request.docSet.size() &&
444 request.docSet[0]!="collection") {
445 // documentaction
446 // if docSet is not empty, it is either "collection", or an array
447 // of OIDs
448 docInfo=new ResultDocInfo_t;
449 response.docInfo.push_back(*docInfo);
450 */
451 } else {
452 // in case we need to return only metadata
453 docInfo=new ResultDocInfo_t;
454 response.docInfo.push_back(*docInfo);
455 }
456 } // end of not a query action
457
458
459
460 // Fill in metadata for each response.docInfo (if wanted)
461 if (request.filterResultOptions & FRmetadata) {
462 get_collectinfo (collection, info, err, logout);
463 // should check err returned here....
464
465 if (!request.fields.empty()) {
466 // currently, this is only true for NullFilter when getting the "Title"
467 // for a documentaction.
468
469 // loop on each document being returned
470 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
471 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
472 while (docs_here!=docs_end) {
473 // loop on all the metadata fields in request.fields (type text_tset)
474 text_tset::iterator fields_here=request.fields.begin();
475 text_tset::iterator fields_end=request.fields.end();
476 //text_tmap::iterator it;
477 collectionmeta_map::iterator it;
478 while (fields_here!=fields_end) {
479 it=info.collectionmeta.find(*fields_here);
480 if (it!=info.collectionmeta.end())
481 docs_here->metadata[*fields_here].values.push_back(((*it).second)[g_EmptyText]);
482 else if (*fields_here=="Title" && !request.docSet.empty()) {
483 // We only do this for a document action.
484 // (This comes through as a NullQuery).
485 // hopefully docSet is only not empty for documentaction...
486 text_t doctitle;
487 int i;
488 // check that docSet isn't empty first!!!!!!
489 i=request.docSet[0].getint();
490 text_t doctext="unneeded";
491 /* following variables aren't used, as our query result has been
492 cached in z3950proxy.cpp (but really we shouldn't know that
493 here...) But for the NullFilter, we don't get given these
494 again in the request, so for now we'll take advantage of this.*/
495
496 int querytype=0; text_t field=g_EmptyText;
497
498 // get the Query out of the filterOptions. (we need get the Title)
499 text_t query=g_EmptyText;
500 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
501 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
502 while (opthere!=opt_end) {
503 if (opthere->name=="Term") {
504 query=opthere->value;
505 } else if (opthere->name=="Index") {
506 field=opthere->value;
507 } else if (opthere->name=="QueryType") {
508 if (opthere->value=="ranked") querytype=1;
509 else if (opthere->value=="boolean") querytype=2;
510 else { /* error - shouldn't happen */
511 /* currently unhandled */
512 }
513 }
514 ++opthere;
515 }
516 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
517 // check err value!
518 docs_here->metadata["Title"].values.push_back(doctitle);
519 } else {
520 docs_here->metadata[*fields_here].values.push_back(g_EmptyText);
521 }
522 ++fields_here;
523 } // end of inner while loop
524 ++docs_here;
525 } // end of outer while loop
526 } // end of if (!request.fields.empty())
527
528 else { // request.fields empty: return all metadata for about page or query
529 // we'll only put it in the first docInfo.
530 collectionmeta_map::iterator colmeta_here=info.collectionmeta.begin();
531 collectionmeta_map::iterator colmeta_end=info.collectionmeta.end();
532 while (colmeta_here!=colmeta_end) {
533 response.docInfo[0].metadata[(*colmeta_here).first].
534 values.push_back(((*colmeta_here).second)[g_EmptyText]);
535 ++colmeta_here;
536 }
537
538 // check if "collectionextra" metadata is set. If it isn't, we should
539 // create connection to target to get it.
540 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
541 // it hasn't been set yet...
542 text_t abouttext="<B>Server Online</B><br>\n";
543 abouttext+=(*zserver)->getzAbout();
544 // add in the "About" text we read in from config file.
545 // how do we incorporate multi-lingual metadata?
546 abouttext+="<P>\n";
547 text_t tmpabout;
548
549 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
550 abouttext+=tmpabout;
551
552 (*zserver)->setMeta("collectionextra",abouttext);
553 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
554 }
555 } // end of else
556
557 // do indices' names, regardless of whether asked for or not...
558 if (!response.docInfo.empty()) {
559 response.docInfo[0].metadata[".author"].values.push_back("author fields");
560 response.docInfo[0].metadata[".title"].values.push_back("title fields");
561 response.docInfo[0].metadata[".any"].values.push_back("any fields");
562 }
563 } //end of if (... & FRmetadata) ...
564}
565
566
567
568void z3950proto::get_document (const text_t &collection,
569 const DocumentRequest_t &request,
570 DocumentResponse_t &response,
571 comerror_t &err, ostream &logout) {
572
573 err=noError;
574
575 // get relevant "collection"
576 z3950_proxy_array::iterator zserver = zservers.begin();
577 z3950_proxy_array::iterator zend = zservers.end();
578 while (zserver != zend) {
579 if((*zserver)->getName()==collection) {
580 break;
581 }
582 ++zserver;
583 }
584 // now have collection in zserver.
585
586 /* docresponse consists of
587 text_t response.doc */
588 text_t title="unneeded";
589 text_t doctext;
590 text_t query; // this should not be needed, as we have already connected to
591 // get the title....
592 int querytype = 1; //ditto...
593 text_t field; // ditto...
594 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
595 title,doctext,err);
596 // check return value of above? (false=>not connected)
597 if (err==noError)
598 response.doc=doctext;
599 else {
600 // could print out different messages based on error type....
601 response.doc="<h2>Error</h2>There was an error while connecting to the ";
602 response.doc+="z39.50 server (ie target). Most likely this was a \n";
603 response.doc+="\"Connection Refused\" error.\n";
604
605 }
606 if (0) {
607 err=protocolError;
608 logout << "Some error\n";
609 }
610}
611
612// sets issearchable to true if the given colection is searchable
613void z3950proto::is_searchable (const text_t &/*collection*/, bool &issearchable,
614 comerror_t &err, ostream &/*logout*/) {
615 issearchable = true; // assume all collections are searchable?
616 err = noError;
617}
618
Note: See TracBrowser for help on using the repository browser.