source: gsdl/trunk/runtime-src/src/z3950/z3950proto.cpp@ 17863

Last change on this file since 17863 was 17863, checked in by anna, 15 years ago

change the name tidy_html switch in HTMLPlug into use_realistic_book in HTMLPlugin

File size: 20.3 KB
Line 
1/**********************************************************************
2 *
3 * z3950proto.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "z3950proto.h"
27#include "comtypes.h"
28#include "browsefilter.h"
29#include "queryfilter.h"
30#include "filter.h"
31// z39.50 yaz stuff
32
33#include "z3950cfg.h" // for reading in config files -
34// defines "struct z3950cfg *zserver_list" as the head of the list.
35
36// note! yyin is hash-defined in z3950cfg.h to something else, to avoid
37// name conflicts with "other" yyins.
38extern FILE *yyin;
39extern FILE *yyout; // redirect for `errors' (unmatched tokens in flex)
40
41
42
43z3950proto::z3950proto() {
44 zserver_count=0;
45}
46
47z3950proto::~z3950proto() {
48}
49
50void z3950proto::add_server (z3950_proxy& zserver) {
51
52 // append the new server
53 ++zserver_count;
54 zservers.push_back(&zserver);
55}
56
57void z3950proto::read_config_file(const text_t &filename, const text_t &errf) {
58 struct z3950cfg *here;
59 struct z3950cfg *oldhere;
60 z3950_proxy *zserver;
61 ShortColInfo_t *tempinfo;
62
63 // FILE *errfile declared in z3950cfg.h, defined in zparse.y
64
65 char *errf_str=errf.getcstr();
66 if ((errfile=fopen(errf_str,"a"))==NULL) {
67 // what do we do if we can't open the error file?
68 // this means that errors will go to stderr, which may stuff up
69 // any cgi headers and the page.
70 errfile=stderr;
71 }
72 delete []errf_str;
73 yyout=errfile;
74
75
76 char *filename_str=filename.getcstr();
77 yyin=fopen(filename_str,"r");
78 if (yyin==NULL) {
79 cerr << "Could not open "<<filename_str<<" for reading.\n";
80 delete []filename_str;
81 return;
82 }
83 delete []filename_str;
84
85 // this is in zparse.tab.c,
86 // which is the bison output of zparse.y
87 yyparse();
88
89 if (errfile!=stderr)
90 fclose(errfile);
91
92 // we now have the config files in the ptr zserver_list
93 if (zserver_list==NULL)
94 return; // no valid servers found in the config file - note that
95 // the parser will have already spat out any errors.
96
97 // now create z3950 proxies for each structure in server_list
98 here=zserver_list;
99 while (here!=NULL) {
100 zserver=new z3950_proxy;
101 tempinfo=new ShortColInfo_t;
102
103 tempinfo->host.setcstr(here->hostname);
104 tempinfo->port=here->port;
105 tempinfo->name.setcstr(here->dbname);
106 zserver->setInfo(tempinfo);
107 zserver->setName(here->shortname);
108 // now collection metadata.
109 zserver->setMeta("collectionname",here->longname);
110 if (here->icon!=NULL)
111 zserver->setMeta("iconcollection",here->icon);
112 if (here->smallicon!=NULL)
113 zserver->setMeta("iconcollectionsmall",here->smallicon);
114
115 /* filterclass *filter = new filterclass ();
116 zserver->add_filter (filter);
117 browsefilterclass *browsefilter = new browsefilterclass();
118 zserver->add_filter (browsefilter);
119 queryfilterclass *queryfilter = new queryfilterclass();
120 zserver->add_filter (queryfilter);
121 */
122
123 // About list
124 if (here->about!=NULL) {
125 struct z3950aboutlist *about_here=here->about;
126 struct z3950aboutlist *oldabout;
127
128 while (about_here!=NULL) {
129 // problem with default lang (null): can't add ("",..)
130 if (about_here->lang==NULL)
131 zserver->addcfgAbout("en",about_here->text);
132 else
133 zserver->addcfgAbout(about_here->lang, about_here->text);
134 oldabout=about_here;
135 about_here=about_here->next;
136 free(oldabout->lang);
137 free(oldabout->text);
138 free(oldabout);
139 }
140 }
141
142 oldhere=here;
143 here=here->next;
144 free(oldhere->shortname); // these 4 strings should all be non-NULL...
145 free(oldhere->hostname);
146 free(oldhere->dbname);
147 free(oldhere->longname);
148 if (oldhere->icon) free(oldhere->icon); // these 2 may be NULL
149 if (oldhere->smallicon) free(oldhere->smallicon);
150 free(oldhere);
151
152 add_server(*zserver);
153 } // end of while loop.
154
155}
156
157void z3950proto::configure (const text_t &/*key*/,
158 const text_tarray &/*cfgline*/) {
159 // this is called for each line in the gsdlsite.cfg file
160}
161
162
163bool z3950proto::init (ostream &/*logout*/) {
164 // set up tcp connection to server here?
165 // we might also read in the config file here (instead of librarymain.cpp)
166
167 //
168
169 // logout goes to initout.txt
170 // logout <<"zdebug:init:Number of z3950 servers: "<< zserver_count << "\n";
171 //logout << "\t1st server name: " << zservers[0].getName().getcstr() << "\n";
172 return true;
173
174}
175
176/*text_t z3950proto::get_protocol_name () {
177 return "z3950proto";
178}
179*/
180
181void z3950proto::get_collection_list (text_tarray &collist,
182 comerror_t &/*err*/,
183 ostream &/*logout*/) {
184
185 z3950_proxy_array::iterator here = zservers.begin();
186 z3950_proxy_array::iterator end = zservers.end();
187 while (here != end) {
188 collist.push_back((*here)->getName());
189 ++here;
190 }
191}
192
193void z3950proto::has_collection (const text_t &collection, bool &hascollection,
194 comerror_t &/*err*/, ostream &/*logout*/) {
195 z3950_proxy_array::iterator here = zservers.begin();
196 z3950_proxy_array::iterator end = zservers.end();
197 while (here != end) {
198 if((*here)->getName()==collection) {
199 hascollection=true;
200 return;
201 }
202 ++here;
203 }
204 hascollection=false;
205}
206
207void z3950proto::ping (const text_t &/*collection*/, bool &wassuccess,
208 comerror_t &/*err*/, ostream &/*logout*/) {
209 // should we just ping the server, or actually create a connection
210 // to the z39.50 server process on the machine ?
211 wassuccess = true;
212}
213
214void z3950proto::get_collectinfo (const text_t &collection,
215 ColInfoResponse_t &collectinfo,
216 comerror_t &err, ostream &logout) {
217
218 // set err to protocolError if something goes wrong...
219 err=noError;
220
221 z3950_proxy_array::iterator here = zservers.begin();
222 z3950_proxy_array::iterator end = zservers.end();
223 while (here != end) {
224 if((*here)->getName()==collection) {
225 break;
226 }
227 ++here;
228 }
229
230 if (here==end) {
231 err=protocolError;
232 char *coll_str=collection.getcstr();
233 logout << "z39.50: couldn't find collection"
234 << coll_str
235 << endl;
236 delete []coll_str;
237 return;
238 }
239
240 const ShortColInfo_t *colinfo=(*here)->getInfo();
241 collectinfo.shortInfo.name=colinfo->name;
242 collectinfo.shortInfo.host=colinfo->host;
243 collectinfo.shortInfo.port=colinfo->port;
244
245 collectinfo.isPublic=true;
246 // don't use beta field
247 /*collectinfo.isBeta=false;*/
248 collectinfo.useBook=false;
249 collectinfo.buildDate=1;
250 // leave ccsCols empty (no cross-coll. searching - for now)
251 /*collectinfo.ccsCols=(text_tarray);*/ //not like this!!!
252 // This info is available from the config file -- johnmcp
253 /*******collectinfo.languages.push_back("en");
254 collectinfo.languages.push_back("fr");********/
255 collectinfo.numDocs=0;
256 collectinfo.numWords=0;
257 collectinfo.numBytes=0;
258 // copy the text maps over.
259 // collectinfo.collectionmeta; // text_tmap
260 text_tmap collmeta = *((*here)->getMeta());
261 text_tmap::iterator mhere = collmeta.begin();
262 text_tmap::iterator mend = collmeta.end();
263 while (mhere != mend) {
264 collectinfo.collectionmeta[(*mhere).first][g_EmptyText] = (*mhere).second;
265 mhere ++;
266 }
267 //collectinfo.collectionmeta=*((*here)->getMeta());
268 collectinfo.format=*((*here)->getFormat()); //text_tmap
269 /* collectinfo.building; //text_tmap */
270
271 ////collectinfo.receptionist="z3950";
272 /* for now... this is a url, relative to .../cgi-bin.
273 NOTE: if this is empty, it defaults to _gwcgi_?a=p&p=about&c=<colname>
274 */
275}
276
277void z3950proto::get_filterinfo (const text_t &/*collection*/,
278 InfoFiltersResponse_t &response,
279 comerror_t &/*err*/, ostream &/*logout*/) {
280 // we'll fake it here, and say we have set up some filters
281 response.filterNames.insert("BrowseFilter");
282 response.filterNames.insert("QueryFilter");
283 response.filterNames.insert("NullFilter");
284
285}
286
287void z3950proto::get_filteroptions (const text_t &/*collection*/,
288 const InfoFilterOptionsRequest_t &/*req*/,
289 InfoFilterOptionsResponse_t &response,
290 comerror_t &err, ostream &/*logout*/) {
291 // for now, assume all servers have the same characteristics
292 /* if (request.filterName=="QueryFilter") { }
293 else if (request.filterName=="BrowseFilter") { }
294 else if (request.filterName=="NullFilter") { } */
295 response.filterOptions["Index"].type=FilterOption_t::stringt;
296 response.filterOptions["Index"].repeatable=FilterOption_t::onePerQuery;
297 response.filterOptions["Index"].defaultValue="any";
298 response.filterOptions["Index"].validValues.push_back(".any");
299 response.filterOptions["Index"].validValues.push_back(".title");
300 response.filterOptions["Index"].validValues.push_back(".author");
301 // and maybe ["Language"] option as well?
302 err=noError;
303}
304
305void z3950proto::filter (const text_t &collection,
306 FilterRequest_t &request,
307 FilterResponse_t &response,
308 comerror_t &err, ostream &logout) {
309 // this function is called when:
310 // * creating the title page,(looking for iconcoll* & collectname metadata)
311 // * creating the about page (looking for "Title" metadata)
312 // * doing the query - (note that a request for metadata comes first, then
313 // filterOptions = FRmetadata | FROID | FRtermFreq (64+4+1)
314
315 // metadata-only requests have filterName="NullFilter", else "QueryFilter".
316 // For the title page, we should not create a connection to the target
317 // (target means the actual z39.50 server, origin means us), but
318 // for the about page and query pages, we need to get information from the
319 // origin. (eg for the about page, we will print out some info such as ID,
320 // name and version.
321
322 // cerr now goes to errout.txt in etc directory
323 err=noError;
324
325 // get relevant "collection"
326 z3950_proxy_array::iterator zserver = zservers.begin();
327 z3950_proxy_array::iterator zend = zservers.end();
328 while (zserver != zend) {
329 if((*zserver)->getName()==collection) {
330 break;
331 }
332 ++zserver;
333 }
334 // now have collection in zserver.
335
336 ColInfoResponse_t info;
337 ResultDocInfo_t *docInfo;
338
339 // leave response.termInfo empty
340 // response.termInfo.push_back(g_EmptyText); ??????? (should be empty if not req.)
341
342 // See if this is for a query action
343 if (request.filterName=="QueryFilter") {
344 /* Sample OptionValue pairs
345 `StartResults'=`1'
346 `EndResults'=`20'
347 `Term'=`firstword secondword' (term is just whatever the user typed in)
348 `QueryType'=ranked|boolean -> OR|AND
349 //`MatchMode'=`some' => 'OR'
350 //`MatchMode' = `all' => 'AND'
351 `Casefold'=`true'
352 `Stem'=`false'
353 `Maxdocs'=`50'
354 */
355 // go through options
356 text_t opt_term; // the term(s) that the user entered
357 int opt_querytype=0; // 1=>ranked (or), 2=>boolean (and)
358 text_t opt_fields; // which fields to search on
359 int opt_start=1, opt_end=20; // default values
360 int nummatches=0, maxdocs=50; // default values
361 OptionValue_tarray::iterator ov_here=request.filterOptions.begin();
362 OptionValue_tarray::iterator ov_end=request.filterOptions.end();
363 while (ov_here != ov_end) {
364 if (ov_here->name=="Term") {
365 opt_term=ov_here->value;
366 } else if (ov_here->name=="QueryType") {
367 if (ov_here->value=="ranked") opt_querytype=1;
368 else if (ov_here->value=="boolean") opt_querytype=2;
369 else { /* error - shouldn't happen */
370 /* currently unhandled */
371 }
372 } else if (ov_here->name=="Index") {
373 opt_fields=ov_here->value;
374 } else if (ov_here->name=="StartResults") {
375 opt_start=ov_here->value.getint();
376 } else if (ov_here->name=="EndResults") {
377 opt_end=ov_here->value.getint();
378 } else if (ov_here->name=="Maxdocs") {
379 maxdocs=ov_here->value.getint();
380 }
381 ++ov_here;
382 }
383 err=noError;
384 text_tarray *titles=(*zserver)->getrecordTitles(opt_term,
385 opt_querytype,
386 opt_fields,
387 opt_start, // first to get
388 opt_end-opt_start, //count
389 &nummatches,err);
390 if (err!=noError) {
391 // can we return an err msg in a response, or just use
392 // the more drastic Greenstone error mechanism?
393 docInfo=new ResultDocInfo_t;
394 response.docInfo.push_back(*docInfo);
395 docInfo->metadata["Title"].values.push_back("Error - query err?");
396 logout << "\nz3950 filter query: error connecting to server\n";
397 // for now, DON'T use GSDL protocol err.
398 err=noError;
399 return;
400 }
401 // check if (titles==NULL) - only happens on error?
402 if (nummatches>0) {
403 text_tarray::iterator titles_here=titles->begin();
404 text_tarray::iterator titles_end=titles->end();
405 int counter=1;
406 while (titles_here!=titles_end) {
407 docInfo=new ResultDocInfo_t;
408 docInfo->metadata["Title"].values.push_back(*titles_here);
409 docInfo->result_num=counter;
410 // we need to give some OID, so we'll just use counter for now...
411 // make it the number into the whole possible retrieved set.
412 docInfo->OID=counter+opt_start-1;
413 response.docInfo.push_back(*docInfo);
414 ++counter;
415 ++titles_here;
416 }
417 }
418
419 if (request.filterResultOptions & FRtermFreq) {
420 if (nummatches>maxdocs) {
421 response.numDocs=maxdocs; // eg "more than 50" (if Maxdocs==50)
422 response.isApprox=MoreThan;
423 } else {
424 response.numDocs=nummatches; // eg "36 documents"
425 response.isApprox=Exact; // Exact | Approximate | MoreThan
426 }
427 } // end of if (... & FRtermFreq)
428
429 } // end of if (... == "QueryFilter")
430 else {
431 // this wasn't a query action
432
433 if (request.filterOptions.size()>0 &&
434 request.filterOptions[0].name=="ParentNode") {
435 // don't want to return anything
436 return;
437 /* } else if (request.docSet.size() &&
438 request.docSet[0]!="collection") {
439 // documentaction
440 // if docSet is not empty, it is either "collection", or an array
441 // of OIDs
442 docInfo=new ResultDocInfo_t;
443 response.docInfo.push_back(*docInfo);
444 */
445 } else {
446 // in case we need to return only metadata
447 docInfo=new ResultDocInfo_t;
448 response.docInfo.push_back(*docInfo);
449 }
450 } // end of not a query action
451
452
453
454 // Fill in metadata for each response.docInfo (if wanted)
455 if (request.filterResultOptions & FRmetadata) {
456 get_collectinfo (collection, info, err, logout);
457 // should check err returned here....
458
459 if (!request.fields.empty()) {
460 // currently, this is only true for NullFilter when getting the "Title"
461 // for a documentaction.
462
463 // loop on each document being returned
464 ResultDocInfo_tarray::iterator docs_here=response.docInfo.begin();
465 ResultDocInfo_tarray::iterator docs_end=response.docInfo.end();
466 while (docs_here!=docs_end) {
467 // loop on all the metadata fields in request.fields (type text_tset)
468 text_tset::iterator fields_here=request.fields.begin();
469 text_tset::iterator fields_end=request.fields.end();
470 //text_tmap::iterator it;
471 collectionmeta_map::iterator it;
472 while (fields_here!=fields_end) {
473 it=info.collectionmeta.find(*fields_here);
474 if (it!=info.collectionmeta.end())
475 docs_here->metadata[*fields_here].values.push_back(((*it).second)[g_EmptyText]);
476 else if (*fields_here=="Title" && !request.docSet.empty()) {
477 // We only do this for a document action.
478 // (This comes through as a NullQuery).
479 // hopefully docSet is only not empty for documentaction...
480 text_t doctitle;
481 int i;
482 // check that docSet isn't empty first!!!!!!
483 i=request.docSet[0].getint();
484 text_t doctext="unneeded";
485 /* following variables aren't used, as our query result has been
486 cached in z3950proxy.cpp (but really we shouldn't know that
487 here...) But for the NullFilter, we don't get given these
488 again in the request, so for now we'll take advantage of this.*/
489
490 int querytype=0; text_t field=g_EmptyText;
491
492 // get the Query out of the filterOptions. (we need get the Title)
493 text_t query=g_EmptyText;
494 OptionValue_tarray::iterator opthere=request.filterOptions.begin();
495 OptionValue_tarray::iterator opt_end=request.filterOptions.end();
496 while (opthere!=opt_end) {
497 if (opthere->name=="Term") {
498 query=opthere->value;
499 } else if (opthere->name=="Index") {
500 field=opthere->value;
501 } else if (opthere->name=="QueryType") {
502 if (opthere->value=="ranked") querytype=1;
503 else if (opthere->value=="boolean") querytype=2;
504 else { /* error - shouldn't happen */
505 /* currently unhandled */
506 }
507 }
508 ++opthere;
509 }
510 (*zserver)->getfullrecord(query, querytype, field, i, doctitle, doctext, err);
511 // check err value!
512 docs_here->metadata["Title"].values.push_back(doctitle);
513 } else {
514 docs_here->metadata[*fields_here].values.push_back(g_EmptyText);
515 }
516 ++fields_here;
517 } // end of inner while loop
518 ++docs_here;
519 } // end of outer while loop
520 } // end of if (!request.fields.empty())
521
522 else { // request.fields empty: return all metadata for about page or query
523 // we'll only put it in the first docInfo.
524 collectionmeta_map::iterator colmeta_here=info.collectionmeta.begin();
525 collectionmeta_map::iterator colmeta_end=info.collectionmeta.end();
526 while (colmeta_here!=colmeta_end) {
527 response.docInfo[0].metadata[(*colmeta_here).first].
528 values.push_back(((*colmeta_here).second)[g_EmptyText]);
529 ++colmeta_here;
530 }
531
532 // check if "collectionextra" metadata is set. If it isn't, we should
533 // create connection to target to get it.
534 if (info.collectionmeta.find("collectionextra")==colmeta_end) {
535 // it hasn't been set yet...
536 text_t abouttext="<B>Server Online</B><br>\n";
537 abouttext+=(*zserver)->getzAbout();
538 // add in the "About" text we read in from config file.
539 // how do we incorporate multi-lingual metadata?
540 abouttext+="<P>\n";
541 text_t tmpabout;
542
543 if ((*zserver)->getcfgAbout("en", tmpabout)==true)
544 abouttext+=tmpabout;
545
546 (*zserver)->setMeta("collectionextra",abouttext);
547 response.docInfo[0].metadata["collectionextra"].values.push_back(abouttext);
548 }
549 } // end of else
550
551 // do indices' names, regardless of whether asked for or not...
552 if (!response.docInfo.empty()) {
553 response.docInfo[0].metadata[".author"].values.push_back("author fields");
554 response.docInfo[0].metadata[".title"].values.push_back("title fields");
555 response.docInfo[0].metadata[".any"].values.push_back("any fields");
556 }
557 } //end of if (... & FRmetadata) ...
558}
559
560
561
562void z3950proto::get_document (const text_t &collection,
563 const DocumentRequest_t &request,
564 DocumentResponse_t &response,
565 comerror_t &err, ostream &logout) {
566
567 err=noError;
568
569 // get relevant "collection"
570 z3950_proxy_array::iterator zserver = zservers.begin();
571 z3950_proxy_array::iterator zend = zservers.end();
572 while (zserver != zend) {
573 if((*zserver)->getName()==collection) {
574 break;
575 }
576 ++zserver;
577 }
578 // now have collection in zserver.
579
580 /* docresponse consists of
581 text_t response.doc */
582 text_t title="unneeded";
583 text_t doctext;
584 text_t query; // this should not be needed, as we have already connected to
585 // get the title....
586 int querytype = 1; //ditto...
587 text_t field; // ditto...
588 (*zserver)->getfullrecord(query,querytype,field,request.OID.getint(),
589 title,doctext,err);
590 // check return value of above? (false=>not connected)
591 if (err==noError)
592 response.doc=doctext;
593 else {
594 // could print out different messages based on error type....
595 response.doc="<h2>Error</h2>There was an error while connecting to the ";
596 response.doc+="z39.50 server (ie target). Most likely this was a \n";
597 response.doc+="\"Connection Refused\" error.\n";
598
599 }
600 if (0) {
601 err=protocolError;
602 logout << "Some error\n";
603 }
604}
605
606// sets issearchable to true if the given colection is searchable
607void z3950proto::is_searchable (const text_t &/*collection*/, bool &issearchable,
608 comerror_t &err, ostream &/*logout*/) {
609 issearchable = true; // assume all collections are searchable?
610 err = noError;
611}
612
Note: See TracBrowser for help on using the repository browser.