source: trunk/gsdl/src/recpt/z3950proto.cpp@ 10361

Last change on this file since 10361 was 9933, checked in by kjdon, 19 years ago

made it compile with the new ColInfoResponse collectionmeta type

  • Property svn:keywords set to Author Date Id Revision
File size: 20.4 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32#include <stdio.h> // for (FILE *) type for yyin and fopen.
33// config file parsing stuff
34#include "z3950cfg.h" // for reading in config files -
35// defines "struct z3950cfg *zserver_list" as the head of the list.
36
37// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
38// name conflicts with "other" yyins.
39extern FILE *yyin;
40extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
41
42extern "C" {
43 extern int zconfigparse();
44}
45
46
47
48
49z3950proto::z3950proto() {
50 zserver_count=0;
51}
52
53z3950proto::~z3950proto() {
54}
55
56void z3950proto::add_server (z3950_proxy& zserver) {
57
58 // append the new server
59 ++zserver_count;
60 zservers.push_back(&zserver);
61}
62
63void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
64 struct z3950cfg *here;
65 struct z3950cfg *oldhere;
66 z3950_proxy *zserver;
67 ShortColInfo_t *tempinfo;
68
69 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
70
71 char *errf_str=errf.getcstr();
72 if ((errfile=fopen(errf_str,"a"))==NULL) {
73 // what do we do if we can't open the error file?
74 // this means that errors will go to stderr, which may stuff up
75 // any cgi headers and the page.
76 errfile=stderr;
77 }
78 delete []errf_str;
79 yyout=errfile;
80
81 // zconfigparse() is defined in zparse.tab.c,
82 // which is the bison output of zparse.y
83
84 char *filename_str=filename.getcstr();
85 yyin=fopen(filename_str,"r");
86 if (yyin==NULL) {
87 cerr << "Could not open "<<filename_str<<" for reading.\n";
88 delete []filename_str;
89 return;
90 }
91 delete []filename_str;
92 zconfigparse();
93
94 if (errfile!=stderr)
95 fclose(errfile);
96
97 // we now have the config files in the ptr zserver_list
98 if (zserver_list==NULL)
99 return; // no valid servers found in the config file - note that
100 // the parser will have already spat out any errors.
101
102 // now create z3950 proxies for each structure in server_list
103 here=zserver_list;
104 while (here!=NULL) {
105 zserver=new z3950_proxy;
106 tempinfo=new ShortColInfo_t;
107
108 tempinfo->host.setcstr(here->hostname);
109 tempinfo->port=here->port;
110 tempinfo->name.setcstr(here->dbname);
111 zserver->setInfo(tempinfo);
112 zserver->setName(here->shortname);
113 // now collection metadata.
114 zserver->setMeta("collectionname",here->longname);
115 if (here->icon!=NULL)
116 zserver->setMeta("iconcollection",here->icon);
117 if (here->smallicon!=NULL)
118 zserver->setMeta("iconcollectionsmall",here->smallicon);
119
120 /* filterclass *filter = new filterclass ();
121 zserver->add_filter (filter);
122 browsefilterclass *browsefilter = new browsefilterclass();
123 zserver->add_filter (browsefilter);
124 queryfilterclass *queryfilter = new queryfilterclass();
125 zserver->add_filter (queryfilter);
126 */
127
128 // About list
129 if (here->about!=NULL) {
130 struct z3950aboutlist *about_here=here->about;
131 struct z3950aboutlist *oldabout;
132
133 while (about_here!=NULL) {
134 // problem with default lang (null): can't add ("",..)
135 if (about_here->lang==NULL)
136 zserver->addcfgAbout("en",about_here->text);
137 else
138 zserver->addcfgAbout(about_here->lang, about_here->text);
139 oldabout=about_here;
140 about_here=about_here->next;
141 free(oldabout->lang);
142 free(oldabout->text);
143 free(oldabout);
144 }
145 }
146
147 oldhere=here;
148 here=here->next;
149 free(oldhere->shortname); // these 4 strings should all be non-NULL...
150 free(oldhere->hostname);
151 free(oldhere->dbname);
152 free(oldhere->longname);
153 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
154 if (oldhere->smallicon) free(oldhere->smallicon);
155 free(oldhere);
156
157 add_server(*zserver);
158 } // end of while loop.
159
160}
161
162void z3950proto::configure (const text_t &/*key*/,
163 const text_tarray &/*cfgline*/) {
164 // this is called for each line in the gsdlsite.cfg file
165}
166
167
168bool z3950proto::init (ostream &/*logout*/) {
169 // set up tcp connection to server here?
170 // we might also read in the config file here (instead of librarymain.cpp)
171
172 //
173
174 // logout goes to initout.txt
175 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
176 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
177 return true;
178
179}
180
181/*text_t z3950proto::get_protocol_name () {
182 return "z3950proto";
183}
184*/
185
186void z3950proto::get_collection_list (text_tarray &collist,
187 comerror_t &/*err*/,
188 ostream &/*logout*/) {
189
190 z3950_proxy_array::iterator here = zservers.begin();
191 z3950_proxy_array::iterator end = zservers.end();
192 while (here != end) {
193 collist.push_back((*here)->getName());
194 ++here;
195 }
196}
197
198void z3950proto::has_collection (const text_t &collection, bool &hascollection,
199 comerror_t &/*err*/, ostream &/*logout*/) {
200 z3950_proxy_array::iterator here = zservers.begin();
201 z3950_proxy_array::iterator end = zservers.end();
202 while (here != end) {
203 if((*here)->getName()==collection) {
204 hascollection=true;
205 return;
206 }
207 ++here;
208 }
209 hascollection=false;
210}
211
212void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
213 comerror_t &/*err*/, ostream &/*logout*/) {
214 // should we just ping the server, or actually create a connection
215 // to the z39.50 server process on the machine ?
216 wassuccess = true;
217}
218
219void z3950proto::get_collectinfo (const text_t &collection,
220 ColInfoResponse_t &collectinfo,
221 comerror_t &err, ostream &logout) {
222
223 // set err to protocolError if something goes wrong...
224 err=noError;
225
226 z3950_proxy_array::iterator here = zservers.begin();
227 z3950_proxy_array::iterator end = zservers.end();
228 while (here != end) {
229 if((*here)->getName()==collection) {
230 break;
231 }
232 ++here;
233 }
234
235 if (here==end) {
236 err=protocolError;
237 char *coll_str=collection.getcstr();
238 logout << "z39.50: couldn't find collection"
239 << coll_str
240 << endl;
241 delete []coll_str;
242 return;
243 }
244
245 const ShortColInfo_t *colinfo=(*here)->getInfo();
246 collectinfo.shortInfo.name=colinfo->name;
247 collectinfo.shortInfo.host=colinfo->host;
248 collectinfo.shortInfo.port=colinfo->port;
249
250 collectinfo.isPublic=true;
251 // don't use beta field
252 /*collectinfo.isBeta=false;*/
253 collectinfo.buildDate=1;
254 // leave ccsCols empty (no cross-coll. searching - for now)
255 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
256 // This info is available from the config file -- johnmcp
257 /*******collectinfo.languages.push_back("en");
258 collectinfo.languages.push_back("fr");********/
259 collectinfo.numDocs=0;
260 collectinfo.numWords=0;
261 collectinfo.numBytes=0;
262 // copy the text maps over.
263 // collectinfo.collectionmeta; // text_tmap
264 text_tmap collmeta = *((*here)->getMeta());
265 text_tmap::iterator mhere = collmeta.begin();
266 text_tmap::iterator mend = collmeta.end();
267 while (mhere != mend) {
268 collectinfo.collectionmeta[(*mhere).first][g_EmptyText] = (*mhere).second;
269 mhere ++;
270 }
271 //collectinfo.collectionmeta=*((*here)->getMeta());
272 collectinfo.format=*((*here)->getFormat()); //text_tmap
273 /* collectinfo.building; //text_tmap */
274
275 ////collectinfo.receptionist="z3950";
276 /* for now... this is a url, relative to .../cgi-bin.
277 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
278 */
279}
280
281void z3950proto::get_filterinfo (const text_t &/*collection*/,
282 InfoFiltersResponse_t &response,
283 comerror_t &/*err*/, ostream &/*logout*/) {
284 // we'll fake it here, and say we have set up some filters
285 response.filterNames.insert("BrowseFilter");
286 response.filterNames.insert("QueryFilter");
287 response.filterNames.insert("NullFilter");
288
289}
290
291void z3950proto::get_filteroptions (const text_t &/*collection*/,
292 const InfoFilterOptionsRequest_t &/*req*/,
293 InfoFilterOptionsResponse_t &response,
294 comerror_t &err, ostream &/*logout*/) {
295 // for now, assume all servers have the same characteristics
296 /* if (request.filterName=="QueryFilter") { }
297 else if (request.filterName=="BrowseFilter") { }
298 else if (request.filterName=="NullFilter") { } */
299 response.filterOptions["Index"].type=FilterOption_t::stringt;
300 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
301 response.filterOptions["Index"].defaultValue="any";
302 response.filterOptions["Index"].validValues.push_back(".any");
303 response.filterOptions["Index"].validValues.push_back(".title");
304 response.filterOptions["Index"].validValues.push_back(".author");
305 // and maybe ["Language"] option as well?
306 err=noError;
307}
308
309void z3950proto::filter (const text_t &collection,
310 FilterRequest_t &request,
311 FilterResponse_t &response,
312 comerror_t &err, ostream &logout) {
313 // this function is called when:
314 // * creating the title page,(looking for iconcoll* & collectname metadata)
315 // * creating the about page (looking for "Title" metadata)
316 // * doing the query - (note that a request for metadata comes first, then
317 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
318
319 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
320 // For the title page, we should not create a connection to the target
321 // (target means the actual z39.50 server, origin means us), but
322 // for the about page and query pages, we need to get information from the
323 // origin. (eg for the about page, we will print out some info such as ID,
324 // name and version.
325
326 // cerr now goes to errout.txt in etc directory
327 err=noError;
328
329 // get relevant "collection"
330 z3950_proxy_array::iterator zserver = zservers.begin();
331 z3950_proxy_array::iterator zend = zservers.end();
332 while (zserver != zend) {
333 if((*zserver)->getName()==collection) {
334 break;
335 }
336 ++zserver;
337 }
338 // now have collection in zserver.
339
340 ColInfoResponse_t info;
341 ResultDocInfo_t *docInfo;
342
343 // leave response.termInfo empty
344 // response.termInfo.push_back(g_EmptyText); ??????? (should be empty if not req.)
345
346 // See if this is for a query action
347 if (request.filterName=="QueryFilter") {
348 /* Sample OptionValue pairs
349 `StartResults'=`1'
350 `EndResults'=`20'
351 `Term'=`firstword secondword' (term is just whatever the user typed in)
352 `QueryType'=ranked|boolean -> OR|AND
353 //`MatchMode'=`some' => 'OR'
354 //`MatchMode' = `all' => 'AND'
355 `Casefold'=`true'
356 `Stem'=`false'
357 `Maxdocs'=`50'
358 */
359 // go through options
360 text_t opt_term; // the term(s) that the user entered
361 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
362 text_t opt_fields; // which fields to search on
363 int opt_start=1, opt_end=20; // default values
364 int nummatches=0, maxdocs=50; // default values
365 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
366 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
367 while (ov_here != ov_end) {
368 if (ov_here->name=="Term") {
369 opt_term=ov_here->value;
370 } else if (ov_here->name=="QueryType") {
371 if (ov_here->value=="ranked") opt_querytype=1;
372 else if (ov_here->value=="boolean") opt_querytype=2;
373 else { /* error - shouldn't happen */
374 /* currently unhandled */
375 }
376 } else if (ov_here->name=="Index") {
377 opt_fields=ov_here->value;
378 } else if (ov_here->name=="StartResults") {
379 opt_start=ov_here->value.getint();
380 } else if (ov_here->name=="EndResults") {
381 opt_end=ov_here->value.getint();
382 } else if (ov_here->name=="Maxdocs") {
383 maxdocs=ov_here->value.getint();
384 }
385 ++ov_here;
386 }
387 err=noError;
388 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
389 opt_querytype,
390 opt_fields,
391 opt_start, // first to get
392 opt_end-opt_start, //count
393 &nummatches,err);
394 if (err!=noError) {
395 // can we return an err msg in a response, or just use
396 // the more drastic Greenstone error mechanism?
397 docInfo=new ResultDocInfo_t;
398 response.docInfo.push_back(*docInfo);
399 docInfo->metadata["Title"].values.push_back("Error - query err?");
400 logout << "\nz3950 filter query: error connecting to server\n";
401 // for now, DON'T use GSDL protocol err.
402 err=noError;
403 return;
404 }
405 // check if (titles==NULL) - only happens on error?
406 if (nummatches>0) {
407 text_tarray::iterator titles_here=titles->begin();
408 text_tarray::iterator titles_end=titles->end();
409 int counter=1;
410 while (titles_here!=titles_end) {
411 docInfo=new ResultDocInfo_t;
412 docInfo->metadata["Title"].values.push_back(*titles_here);
413 docInfo->result_num=counter;
414 // we need to give some OID, so we'll just use counter for now...
415 // make it the number into the whole possible retrieved set.
416 docInfo->OID=counter+opt_start-1;
417 response.docInfo.push_back(*docInfo);
418 ++counter;
419 ++titles_here;
420 }
421 }
422
423 if (request.filterResultOptions & FRtermFreq) {
424 if (nummatches>maxdocs) {
425 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
426 response.isApprox=MoreThan;
427 } else {
428 response.numDocs=nummatches; // eg "36 documents"
429 response.isApprox=Exact; // Exact | Approximate | MoreThan
430 }
431 } // end of if (... & FRtermFreq)
432
433 } // end of if (... == "QueryFilter")
434 else {
435 // this wasn't a query action
436
437 if (request.filterOptions.size()>0 &&
438 request.filterOptions[0].name=="ParentNode") {
439 // don't want to return anything
440 return;
441 /* } else if (request.docSet.size() &&
442 request.docSet[0]!="collection") {
443 // documentaction
444 // if docSet is not empty, it is either "collection", or an array
445 // of OIDs
446 docInfo=new ResultDocInfo_t;
447 response.docInfo.push_back(*docInfo);
448 */
449 } else {
450 // in case we need to return only metadata
451 docInfo=new ResultDocInfo_t;
452 response.docInfo.push_back(*docInfo);
453 }
454 } // end of not a query action
455
456
457
458 // Fill in metadata for each response.docInfo (if wanted)
459 if (request.filterResultOptions & FRmetadata) {
460 get_collectinfo (collection, info, err, logout);
461 // should check err returned here....
462
463 if (!request.fields.empty()) {
464 // currently, this is only true for NullFilter when getting the "Title"
465 // for a documentaction.
466
467 // loop on each document being returned
468 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
469 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
470 while (docs_here!=docs_end) {
471 // loop on all the metadata fields in request.fields (type text_tset)
472 text_tset::iterator fields_here=request.fields.begin();
473 text_tset::iterator fields_end=request.fields.end();
474 //text_tmap::iterator it;
475 collectionmeta_map::iterator it;
476 while (fields_here!=fields_end) {
477 it=info.collectionmeta.find(*fields_here);
478 if (it!=info.collectionmeta.end())
479 docs_here->metadata[*fields_here].values.push_back(((*it).second)[g_EmptyText]);
480 else if (*fields_here=="Title" && !request.docSet.empty()) {
481 // We only do this for a document action.
482 // (This comes through as a NullQuery).
483 // hopefully docSet is only not empty for documentaction...
484 text_t doctitle;
485 int i;
486 // check that docSet isn't empty first!!!!!!
487 i=request.docSet[0].getint();
488 text_t doctext="unneeded";
489 /* following variables aren't used, as our query result has been
490 cached in z3950proxy.cpp (but really we shouldn't know that
491 here...) But for the NullFilter, we don't get given these
492 again in the request, so for now we'll take advantage of this.*/
493
494 int querytype=0; text_t field=g_EmptyText;
495
496 // get the Query out of the filterOptions. (we need get the Title)
497 text_t query=g_EmptyText;
498 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
499 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
500 while (opthere!=opt_end) {
501 if (opthere->name=="Term") {
502 query=opthere->value;
503 } else if (opthere->name=="Index") {
504 field=opthere->value;
505 } else if (opthere->name=="QueryType") {
506 if (opthere->value=="ranked") querytype=1;
507 else if (opthere->value=="boolean") querytype=2;
508 else { /* error - shouldn't happen */
509 /* currently unhandled */
510 }
511 }
512 ++opthere;
513 }
514 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
515 // check err value!
516 docs_here->metadata["Title"].values.push_back(doctitle);
517 } else {
518 docs_here->metadata[*fields_here].values.push_back(g_EmptyText);
519 }
520 ++fields_here;
521 } // end of inner while loop
522 ++docs_here;
523 } // end of outer while loop
524 } // end of if (!request.fields.empty())
525
526 else { // request.fields empty: return all metadata for about page or query
527 // we'll only put it in the first docInfo.
528 collectionmeta_map::iterator colmeta_here=info.collectionmeta.begin();
529 collectionmeta_map::iterator colmeta_end=info.collectionmeta.end();
530 while (colmeta_here!=colmeta_end) {
531 response.docInfo[0].metadata[(*colmeta_here).first].
532 values.push_back(((*colmeta_here).second)[g_EmptyText]);
533 ++colmeta_here;
534 }
535
536 // check if "collectionextra" metadata is set. If it isn't, we should
537 // create connection to target to get it.
538 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
539 // it hasn't been set yet...
540 text_t abouttext="<B>Server Online</B><br>\n";
541 abouttext+=(*zserver)->getzAbout();
542 // add in the "About" text we read in from config file.
543 // how do we incorporate multi-lingual metadata?
544 abouttext+="<P>\n";
545 text_t tmpabout;
546
547 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
548 abouttext+=tmpabout;
549
550 (*zserver)->setMeta("collectionextra",abouttext);
551 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
552 }
553 } // end of else
554
555 // do indices' names, regardless of whether asked for or not...
556 if (!response.docInfo.empty()) {
557 response.docInfo[0].metadata[".author"].values.push_back("author fields");
558 response.docInfo[0].metadata[".title"].values.push_back("title fields");
559 response.docInfo[0].metadata[".any"].values.push_back("any fields");
560 }
561 } //end of if (... & FRmetadata) ...
562}
563
564
565
566void z3950proto::get_document (const text_t &collection,
567 const DocumentRequest_t &request,
568 DocumentResponse_t &response,
569 comerror_t &err, ostream &logout) {
570
571 err=noError;
572
573 // get relevant "collection"
574 z3950_proxy_array::iterator zserver = zservers.begin();
575 z3950_proxy_array::iterator zend = zservers.end();
576 while (zserver != zend) {
577 if((*zserver)->getName()==collection) {
578 break;
579 }
580 ++zserver;
581 }
582 // now have collection in zserver.
583
584 /* docresponse consists of
585 text_t response.doc */
586 text_t title="unneeded";
587 text_t doctext;
588 text_t query; // this should not be needed, as we have already connected to
589 // get the title....
590 int querytype; //ditto...
591 text_t field; // ditto...
592 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
593 title,doctext,err);
594 // check return value of above? (false=>not connected)
595 if (err==noError)
596 response.doc=doctext;
597 else {
598 // could print out different messages based on error type....
599 response.doc="<h2>Error</h2>There was an error while connecting to the ";
600 response.doc+="z39.50 server (ie target). Most likely this was a \n";
601 response.doc+="\"Connection Refused\" error.\n";
602
603 }
604 if (0) {
605 err=protocolError;
606 logout << "Some error\n";
607 }
608}
609
610// sets issearchable to true if the given colection is searchable
611void z3950proto::is_searchable (const text_t &/*collection*/, bool &issearchable,
612 comerror_t &err, ostream &/*logout*/) {
613 issearchable = true; // assume all collections are searchable?
614 err = noError;
615}
616
Note: See TracBrowser for help on using the repository browser.