source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 24114

Last change on this file since 24114 was 24114, checked in by ak19, 13 years ago

Now GS2 works out the earliestDatestamp of the repository in the manner GS3 does it (read each OAI-enabled collection's build.cfg to get the collection's earliestDatestamp field and choose the oldest such date among the OAI collections). Previously GS2 used to always set the earliestDatestamp to the unix epoch of 1970, which, while it would validate, wasn't the right thing to do as it wouldn't help with resumptiontokens and other date based things. Checked that the GS server still validates.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.1 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
39
40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
86
87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
95collectserver::collectserver ()
96 : collectinfo()
97{
98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
109 ++source_here;
110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
119 ++filter_here;
120 }
121 filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128 if (cfgline.size() >= 1) {
129 const text_t &value = cfgline[0];
130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
135 if (name == "HTMLPlugin")
136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
141 if (plugOption == "-use_realistic_book")
142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
150 else if (key == "gdbmhome") configinfo.dbhome = value;
151 else if (key == "collecthome") configinfo.collecthome = value;
152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
180 else if (key == "builddate") collectinfo.buildDate = value.getint();
181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
183 else if (key == "numsections") collectinfo.numSections = value.getint();
184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187 else if (key == "collectionmeta") {
188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
205 if (indexmap.from2to(name, new_name)) {
206 meta_name = new_name;
207 }
208 } else {
209 // add them to collectionmeta
210 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
211 if (cfgline.size() == 2) {
212 lang_map[g_EmptyText] = cfgline[1];
213 } else if (cfgline.size() == 3 ) {
214 // get the lang out of params
215 paramhashtype params_hash;
216 splitparams(params, params_hash);
217
218 text_t lang = params_hash["l"];
219 lang_map[lang] = cfgline[2];
220 if (lang_map[g_EmptyText].empty()) {
221 // want the first one as the default if no default specified
222 lang_map[g_EmptyText] = cfgline[2];
223 }
224 }
225 collectinfo.collectionmeta[cfgline[0]] = lang_map;
226
227 }
228
229 // add all collectionmeta to macro list
230 text_tmap params_map = collectinfo.collection_macros[meta_name];
231
232 if (cfgline.size() == 2) {// no params for this macro
233 params_map[g_EmptyText] = cfgline[1];
234 }
235 else if (cfgline.size() == 3) {// has params
236 params_map[params] = cfgline[2];
237 if (params_map[g_EmptyText].empty()) {
238 params_map[g_EmptyText] = cfgline[2];
239 }
240 }
241 collectinfo.collection_macros[meta_name] = params_map;
242 }
243 else if (key == "collectionmacro") {
244 text_t nobrackets;
245 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
246 // add all to macro list
247 if (cfgline.size() == 2) { // no params for this macro
248 params_map[g_EmptyText] = cfgline[1];
249 }
250 else if (cfgline.size() == 3) {// has params
251 // strip [ ] brackets from params
252 text_t::const_iterator first=cfgline[1].begin()+1;
253 text_t::const_iterator last=cfgline[1].end()-1;
254 nobrackets=substr(first, last);
255 params_map[nobrackets] = cfgline[2];
256 }
257 collectinfo.collection_macros[cfgline[0]] = params_map;
258
259 } else if (key == "format" && cfgline.size() == 2)
260 collectinfo.format[cfgline[0]] = cfgline[1];
261 else if (key == "building" && cfgline.size() == 2)
262 collectinfo.building[cfgline[0]] = cfgline[1];
263 else if (key == "httpdomain") collectinfo.httpdomain = value;
264 else if (key == "httpprefix") collectinfo.httpprefix = value;
265 else if (key == "receptionist") collectinfo.receptionist = value;
266 else if (key == "buildtype") {
267 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
268 collectinfo.buildType = value;
269 }
270 // backwards compatibility - searchytpes is now a format statement
271 else if (key == "searchtype") { // means buildtype is mgpp
272 if (collectinfo.buildType.empty()) {
273 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
274 collectinfo.buildType = "mgpp";
275 }
276 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
277 //collectinfo.searchTypes = cfgline;
278 }
279 else if (key == "infodbtype") {
280 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
281 collectinfo.infodbType = value;
282 }
283 else if (key == "separate_cjk") {
284 if (value == "true") collectinfo.isSegmented = true;
285 else collectinfo.isSegmented = false;
286 }
287 // What have we set in our collect.cfg file : document or collection ?
288 else if (key == "authenticate") collectinfo.authenticate = value;
289
290 // What have we set for our group list
291 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
292
293 // build.cfg, earliestDatestamp of this collection needed for
294 // OAIServer to work out earliestDatestamp of this repository
295 else if (key == "earliestdatestamp") {
296 collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
297 }
298
299 // store all the mappings for use when collection meta is read later
300 // (build.cfg read before collect.cfg)
301 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
302 indexmap.importmap (cfgline, true);
303
304 }
305 // In the map the key-value pair contain the same
306 // data i.e key == data, if key is 2 then data is 2
307
308 // What have we set for our public_documents ACL
309 else if (key == "public_documents")
310 {
311 text_tarray::const_iterator begin = cfgline.begin();
312 text_tarray::const_iterator end = cfgline.end();
313 while(begin != end)
314 {
315 // key = data i.e if key is 2 then data is 2
316 // collectinfo.public_documents[*begin] is the key
317 // *begin is the data value
318
319 collectinfo.public_documents[*begin] = *begin;
320 ++begin;
321 }
322 }
323
324 // What have we set for our private_documents ACL
325 else if (key == "private_documents")
326 {
327 text_tarray::const_iterator begin = cfgline.begin();
328 text_tarray::const_iterator end = cfgline.end();
329 while(begin != end)
330 {
331 // key = data i.e if key is 2 then data is 2
332 // collectinfo.public_documents[*begin] is the key
333 // *begin is the data value
334
335 collectinfo.private_documents[*begin] = *begin;
336 ++begin;
337 }
338 }
339
340 // dynamic_classifier <UniqueID> "<Options>"
341 else if (key == "dynamic_classifier")
342 {
343 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
344 }
345 }
346
347 // configure the filters
348 filtermapclass::iterator filter_here = filters.begin();
349 filtermapclass::iterator filter_end = filters.end();
350 while (filter_here != filter_end) {
351 assert ((*filter_here).second.f != NULL);
352 if ((*filter_here).second.f != NULL)
353 (*filter_here).second.f->configure(key, cfgline);
354
355 ++filter_here;
356 }
357
358 // configure the sources
359 sourcelistclass::iterator source_here = sources.begin();
360 sourcelistclass::iterator source_end = sources.end();
361 while (source_here != source_end) {
362 assert ((*source_here).s != NULL);
363 if ((*source_here).s != NULL)
364 (*source_here).s->configure(key, cfgline);
365
366 ++source_here;
367 }
368}
369
370
371void collectserver::configure (const text_t &key, const text_t &value) {
372 text_tarray cfgline;
373 cfgline.push_back (value);
374 configure(key, cfgline);
375}
376
377void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
378 // if we've not been properly configured, then it is a foregone
379 // conclusion that we cannot be active
380 if (this->configinfo.collection == "null")
381 {
382 wasSuccess = false;
383 }
384 // if no build date exists, then the collection was probably not built;
385 // ditto if the number of documents is zero, then something is pretty
386 // wrong
387 else if (this->collectinfo.buildDate == 0 ||
388 this->collectinfo.numDocs == 0)
389 {
390 wasSuccess = false;
391 }
392 // it is probably okay
393 else
394 wasSuccess = true;
395}
396
397
398bool collectserver::init (ostream &logout) {
399 // delete the indexmap
400 indexmap.clear();
401
402 // init the filters
403 filtermapclass::iterator filter_here = filters.begin();
404 filtermapclass::iterator filter_end = filters.end();
405 while (filter_here != filter_end) {
406 assert ((*filter_here).second.f != NULL);
407 if (((*filter_here).second.f != NULL) &&
408 !(*filter_here).second.f->init(logout)) return false;
409
410 ++filter_here;
411 }
412
413 // init the sources
414 sourcelistclass::iterator source_here = sources.begin();
415 sourcelistclass::iterator source_end = sources.end();
416 while (source_here != source_end) {
417 assert ((*source_here).s != NULL);
418 if (((*source_here).s != NULL) &&
419 !(*source_here).s->init(logout)) return false;
420
421 ++source_here;
422 }
423
424 return true;
425}
426
427
428void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
429 comerror_t &err, ostream &/*logout*/) {
430 reponse = collectinfo;
431 err = noError;
432}
433
434void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
435 comerror_t &err, ostream &/*logout*/) {
436 response.clear ();
437
438 // get a list of filter names
439 filtermapclass::iterator filter_here = filters.begin();
440 filtermapclass::iterator filter_end = filters.end();
441 while (filter_here != filter_end) {
442 response.filterNames.insert ((*filter_here).first);
443 ++filter_here;
444 }
445
446 err = noError;
447}
448
449void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
450 InfoFilterOptionsResponse_t &response,
451 comerror_t &err, ostream &logout) {
452 outconvertclass text_t2ascii;
453
454 filterclass *thisfilter = filters.getfilter(request.filterName);
455 if (thisfilter != NULL) {
456 thisfilter->get_filteroptions (response, err, logout);
457 } else {
458 response.clear ();
459 err = protocolError;
460 text_t& infodbtype = collectinfo.infodbType;
461
462 // Don't print out the warning if were's asking about SQLQueryFilter
463 // when we know the infodbtype is something other than .*sql.*
464
465 if ((request.filterName != "SQLQueryFilter")
466 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
467 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
468 << "filter \"" << request.filterName << "\".\n\n";
469 }
470 }
471}
472
473void collectserver::filter (FilterRequest_t &request,
474 FilterResponse_t &response,
475 comerror_t &err, ostream &logout) {
476 outconvertclass text_t2ascii;
477
478 // translate any ".fc", ".pr" etc. stuff in the docSet
479 text_t translatedOID;
480 text_tarray translatedOIDs;
481 text_tarray::iterator doc_here = request.docSet.begin();
482 text_tarray::iterator doc_end = request.docSet.end();
483 while (doc_here != doc_end) {
484 if (needs_translating (*doc_here)) {
485 sourcelistclass::iterator source_here = sources.begin();
486 sourcelistclass::iterator source_end = sources.end();
487 while (source_here != source_end) {
488 assert ((*source_here).s != NULL);
489 if (((*source_here).s != NULL) &&
490 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
491 if (err != noError) return;
492 break;
493 }
494 ++source_here;
495 }
496 translatedOIDs.push_back (translatedOID);
497 } else {
498 translatedOIDs.push_back (*doc_here);
499 }
500 ++doc_here;
501 }
502 request.docSet = translatedOIDs;
503
504 response.clear();
505
506 filterclass *thisfilter = filters.getfilter(request.filterName);
507 if (thisfilter != NULL) {
508 // filter the data
509 thisfilter->filter (request, response, err, logout);
510 if (err != noError) return;
511 // fill in the metadata for each of the OIDs (if it is requested)
512 if (request.filterResultOptions & FRmetadata) {
513 bool processed = false;
514 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
515 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
516 while (resultdoc_here != resultdoc_end) {
517 // try each of the sources in turn
518 sourcelistclass::iterator source_here = sources.begin();
519 sourcelistclass::iterator source_end = sources.end();
520 while (source_here != source_end) {
521 assert ((*source_here).s != NULL);
522 if (((*source_here).s != NULL) &&
523 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
524 request.getParents, request.fields,
525 (*resultdoc_here).OID, (*resultdoc_here).metadata,
526 err, logout))) {
527 if (err != noError) return;
528 processed = true;
529 break;
530 }
531 ++source_here;
532 }
533 if (!processed) {
534
535 logout << text_t2ascii << "Protocol Error: nothing processed for "
536 << "filter \"" << request.filterName << "\".\n\n";
537
538 err = protocolError;
539 return;
540 }
541 ++resultdoc_here;
542 }
543 }
544
545 err = noError;
546 }
547 else
548 {
549 response.clear ();
550 err = protocolError;
551 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
552 << "filter \"" << request.filterName << "\".\n\n";
553 }
554}
555
556void collectserver::get_document (const DocumentRequest_t &request,
557 DocumentResponse_t &response,
558 comerror_t &err, ostream &logout) {
559
560 sourcelistclass::iterator source_here = sources.begin();
561 sourcelistclass::iterator source_end = sources.end();
562 while (source_here != source_end) {
563 assert ((*source_here).s != NULL);
564 if (((*source_here).s != NULL) &&
565 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
566 if (err != noError) return;
567 break;
568 }
569 ++source_here;
570 }
571}
572
573void collectserver::is_searchable (bool &issearchable, comerror_t &err,
574 ostream &logout) {
575
576 sourcelistclass::iterator source_here = sources.begin();
577 sourcelistclass::iterator source_end = sources.end();
578 while (source_here != source_end) {
579 assert ((*source_here).s != NULL);
580 if (((*source_here).s != NULL) &&
581 ((*source_here).s->is_searchable (issearchable, err, logout))) {
582 if (err != noError) return;
583 break;
584 }
585 ++source_here;
586 }
587}
588
589
590bool operator==(const collectserverptr &x, const collectserverptr &y) {
591 return (x.c == y.c);
592}
593
594bool operator<(const collectserverptr &x, const collectserverptr &y) {
595 return (x.c < y.c);
596}
597
598
599// thecollectserver remains the property of the calling code but
600// should not be deleted until it is removed from this list.
601void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
602 // can't add a null collection server
603 assert (thecollectserver != NULL);
604 if (thecollectserver == NULL) return;
605
606 // can't add an collection server with no collection name
607 assert (!(thecollectserver->get_collection_name()).empty());
608 if ((thecollectserver->get_collection_name()).empty()) return;
609
610 collectserverptr cptr;
611 cptr.c = thecollectserver;
612 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
613}
614
615// getcollectserver will return NULL if the collectserver could not be found
616collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
617 // can't find a collection with no name
618 if (collection.empty()) return NULL;
619
620 iterator here = collectserverptrs.find (collection);
621 if (here == collectserverptrs.end()) return NULL;
622
623 return (*here).second.c;
624}
Note: See TracBrowser for help on using the repository browser.