source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 23492

Last change on this file since 23492 was 23492, checked in by davidb, 13 years ago

Additionoal check added before printing out warning about a filter not being available/recognized.

  • Property svn:keywords set to Author Date Id Revision
File size: 19.8 KB
RevLine 
[17863]1
[166]2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
[534]7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
[166]10 *
[534]11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
[166]25 *********************************************************************/
26
27#include "collectserver.h"
[249]28#include "OIDtools.h"
[166]29#include <assert.h>
[9929]30#include "display.h"
[166]31
[21324]32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
[166]39
[21324]40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
[22050]59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
[21324]86
[22050]87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
[20799]95collectserver::collectserver ()
96 : collectinfo()
97{
[166]98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
[1459]102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
[9620]109 ++source_here;
[1459]110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
[9620]119 ++filter_here;
[1459]120 }
121 filters.clear();
[166]122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
[17863]126// it contains. The configuration should take place just before initialisation
[166]127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
[226]128 if (cfgline.size() >= 1) {
[166]129 const text_t &value = cfgline[0];
[13982]130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
[17863]135 if (name == "HTMLPlugin")
[13982]136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
[17863]141 if (plugOption == "-use_realistic_book")
[13982]142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
[15587]150 else if (key == "gdbmhome") configinfo.dbhome = value;
[16310]151 else if (key == "collecthome") configinfo.collecthome = value;
[166]152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
[16310]155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
[166]157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
[15002]165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
[2712]168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
[10789]169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
[830]180 else if (key == "builddate") collectinfo.buildDate = value.getint();
[166]181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
[1253]183 else if (key == "numsections") collectinfo.numSections = value.getint();
[166]184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
[12867]186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
[8453]187 else if (key == "collectionmeta") {
[11964]188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
[9929]190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
205 if (indexmap.from2to(name, new_name)) {
206 meta_name = new_name;
207 }
208 } else {
209 // add them to collectionmeta
210 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
211 if (cfgline.size() == 2) {
212 lang_map[g_EmptyText] = cfgline[1];
213 } else if (cfgline.size() == 3 ) {
214 // get the lang out of params
215 paramhashtype params_hash;
216 splitparams(params, params_hash);
217
218 text_t lang = params_hash["l"];
219 lang_map[lang] = cfgline[2];
220 if (lang_map[g_EmptyText].empty()) {
221 // want the first one as the default if no default specified
222 lang_map[g_EmptyText] = cfgline[2];
223 }
224 }
225 collectinfo.collectionmeta[cfgline[0]] = lang_map;
226
227 }
228
229 // add all collectionmeta to macro list
[11964]230 text_tmap params_map = collectinfo.collection_macros[meta_name];
231
[17863]232 if (cfgline.size() == 2) {// no params for this macro
[11964]233 params_map[g_EmptyText] = cfgline[1];
234 }
[9929]235 else if (cfgline.size() == 3) {// has params
[11964]236 params_map[params] = cfgline[2];
237 if (params_map[g_EmptyText].empty()) {
238 params_map[g_EmptyText] = cfgline[2];
239 }
[9929]240 }
[11964]241 collectinfo.collection_macros[meta_name] = params_map;
[9929]242 }
243 else if (key == "collectionmacro") {
244 text_t nobrackets;
[11964]245 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
[9929]246 // add all to macro list
[11964]247 if (cfgline.size() == 2) { // no params for this macro
248 params_map[g_EmptyText] = cfgline[1];
249 }
[8453]250 else if (cfgline.size() == 3) {// has params
251 // strip [ ] brackets from params
252 text_t::const_iterator first=cfgline[1].begin()+1;
253 text_t::const_iterator last=cfgline[1].end()-1;
[9929]254 nobrackets=substr(first, last);
[11964]255 params_map[nobrackets] = cfgline[2];
[8453]256 }
[11964]257 collectinfo.collection_macros[cfgline[0]] = params_map;
258
[8453]259 } else if (key == "format" && cfgline.size() == 2)
[352]260 collectinfo.format[cfgline[0]] = cfgline[1];
[722]261 else if (key == "building" && cfgline.size() == 2)
262 collectinfo.building[cfgline[0]] = cfgline[1];
[1860]263 else if (key == "httpdomain") collectinfo.httpdomain = value;
264 else if (key == "httpprefix") collectinfo.httpprefix = value;
[432]265 else if (key == "receptionist") collectinfo.receptionist = value;
[21324]266 else if (key == "buildtype") {
267 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
268 collectinfo.buildType = value;
269 }
[11984]270 // backwards compatibility - searchytpes is now a format statement
[4751]271 else if (key == "searchtype") { // means buildtype is mgpp
[9904]272 if (collectinfo.buildType.empty()) {
[21324]273 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
[9904]274 collectinfo.buildType = "mgpp";
275 }
[11984]276 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
277 //collectinfo.searchTypes = cfgline;
[4751]278 }
[22050]279 else if (key == "infodbtype") {
280 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
281 collectinfo.infodbType = value;
282 }
[6584]283 else if (key == "separate_cjk") {
284 if (value == "true") collectinfo.isSegmented = true;
285 else collectinfo.isSegmented = false;
286 }
[4974]287 // What have we set in our collect.cfg file : document or collection ?
[5024]288 else if (key == "authenticate") collectinfo.authenticate = value;
[4974]289
290 // What have we set for our group list
[20799]291 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
[4974]292
[9929]293 // store all the mappings for use when collection meta is read later
294 // (build.cfg read before collect.cfg)
[9934]295 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
[9929]296 indexmap.importmap (cfgline, true);
297
298 }
[4974]299 // In the map the key-value pair contain the same
[5024]300 // data i.e key == data, if key is 2 then data is 2
[4974]301
[5024]302 // What have we set for our public_documents ACL
303 else if (key == "public_documents")
[4974]304 {
305 text_tarray::const_iterator begin = cfgline.begin();
306 text_tarray::const_iterator end = cfgline.end();
307 while(begin != end)
308 {
[5024]309 // key = data i.e if key is 2 then data is 2
310 // collectinfo.public_documents[*begin] is the key
311 // *begin is the data value
312
313 collectinfo.public_documents[*begin] = *begin;
[9620]314 ++begin;
[4974]315 }
316 }
317
[5024]318 // What have we set for our private_documents ACL
319 else if (key == "private_documents")
[4974]320 {
321 text_tarray::const_iterator begin = cfgline.begin();
322 text_tarray::const_iterator end = cfgline.end();
323 while(begin != end)
324 {
[5024]325 // key = data i.e if key is 2 then data is 2
326 // collectinfo.public_documents[*begin] is the key
327 // *begin is the data value
328
329 collectinfo.private_documents[*begin] = *begin;
[9620]330 ++begin;
[4974]331 }
332 }
[15771]333
334 // dynamic_classifier <UniqueID> "<Options>"
335 else if (key == "dynamic_classifier")
336 {
337 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
338 }
[226]339 }
[9929]340
[226]341 // configure the filters
342 filtermapclass::iterator filter_here = filters.begin();
343 filtermapclass::iterator filter_end = filters.end();
344 while (filter_here != filter_end) {
345 assert ((*filter_here).second.f != NULL);
346 if ((*filter_here).second.f != NULL)
347 (*filter_here).second.f->configure(key, cfgline);
[191]348
[9620]349 ++filter_here;
[166]350 }
[226]351
352 // configure the sources
353 sourcelistclass::iterator source_here = sources.begin();
354 sourcelistclass::iterator source_end = sources.end();
355 while (source_here != source_end) {
356 assert ((*source_here).s != NULL);
357 if ((*source_here).s != NULL)
358 (*source_here).s->configure(key, cfgline);
359
[9620]360 ++source_here;
[226]361 }
[166]362}
363
[9929]364
[166]365void collectserver::configure (const text_t &key, const text_t &value) {
366 text_tarray cfgline;
367 cfgline.push_back (value);
368 configure(key, cfgline);
369}
370
[2173]371void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
372 // if we've not been properly configured, then it is a foregone
373 // conclusion that we cannot be active
374 if (this->configinfo.collection == "null")
375 {
376 wasSuccess = false;
377 }
378 // if no build date exists, then the collection was probably not built;
379 // ditto if the number of documents is zero, then something is pretty
380 // wrong
381 else if (this->collectinfo.buildDate == 0 ||
382 this->collectinfo.numDocs == 0)
383 {
384 wasSuccess = false;
385 }
386 // it is probably okay
387 else
388 wasSuccess = true;
389}
[166]390
[2173]391
[226]392bool collectserver::init (ostream &logout) {
[9929]393 // delete the indexmap
394 indexmap.clear();
395
[226]396 // init the filters
397 filtermapclass::iterator filter_here = filters.begin();
398 filtermapclass::iterator filter_end = filters.end();
399 while (filter_here != filter_end) {
400 assert ((*filter_here).second.f != NULL);
401 if (((*filter_here).second.f != NULL) &&
402 !(*filter_here).second.f->init(logout)) return false;
403
[9620]404 ++filter_here;
[226]405 }
406
407 // init the sources
408 sourcelistclass::iterator source_here = sources.begin();
409 sourcelistclass::iterator source_end = sources.end();
410 while (source_here != source_end) {
411 assert ((*source_here).s != NULL);
412 if (((*source_here).s != NULL) &&
413 !(*source_here).s->init(logout)) return false;
414
[9620]415 ++source_here;
[226]416 }
417
[166]418 return true;
419}
420
421
[186]422void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
423 comerror_t &err, ostream &/*logout*/) {
424 reponse = collectinfo;
425 err = noError;
426}
[166]427
[226]428void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
[220]429 comerror_t &err, ostream &/*logout*/) {
[226]430 response.clear ();
431
432 // get a list of filter names
433 filtermapclass::iterator filter_here = filters.begin();
434 filtermapclass::iterator filter_end = filters.end();
435 while (filter_here != filter_end) {
436 response.filterNames.insert ((*filter_here).first);
[9745]437 ++filter_here;
[226]438 }
439
440 err = noError;
[220]441}
442
[226]443void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
[220]444 InfoFilterOptionsResponse_t &response,
[226]445 comerror_t &err, ostream &logout) {
446 outconvertclass text_t2ascii;
447
448 filterclass *thisfilter = filters.getfilter(request.filterName);
449 if (thisfilter != NULL) {
450 thisfilter->get_filteroptions (response, err, logout);
451 } else {
452 response.clear ();
453 err = protocolError;
[23492]454 text_t& infodbtype = collectinfo.infodbType;
455
456 // Don't print out the warning if were's asking about SQLQueryFilter
457 // when we know the infodbtype is something other than .*sql.*
458
459 if ((request.filterName != "SQLQueryFilter")
460 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
461 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
462 << "filter \"" << request.filterName << "\".\n\n";
463 }
[226]464 }
[186]465}
466
[249]467void collectserver::filter (FilterRequest_t &request,
[186]468 FilterResponse_t &response,
[226]469 comerror_t &err, ostream &logout) {
470 outconvertclass text_t2ascii;
471
[249]472 // translate any ".fc", ".pr" etc. stuff in the docSet
473 text_t translatedOID;
[472]474 text_tarray translatedOIDs;
475 text_tarray::iterator doc_here = request.docSet.begin();
476 text_tarray::iterator doc_end = request.docSet.end();
[249]477 while (doc_here != doc_end) {
478 if (needs_translating (*doc_here)) {
479 sourcelistclass::iterator source_here = sources.begin();
480 sourcelistclass::iterator source_end = sources.end();
481 while (source_here != source_end) {
482 assert ((*source_here).s != NULL);
483 if (((*source_here).s != NULL) &&
484 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
485 if (err != noError) return;
486 break;
487 }
[9620]488 ++source_here;
[249]489 }
[472]490 translatedOIDs.push_back (translatedOID);
[249]491 } else {
[472]492 translatedOIDs.push_back (*doc_here);
[249]493 }
[9620]494 ++doc_here;
[249]495 }
496 request.docSet = translatedOIDs;
497
[196]498 response.clear();
[650]499
[226]500 filterclass *thisfilter = filters.getfilter(request.filterName);
501 if (thisfilter != NULL) {
502 // filter the data
503 thisfilter->filter (request, response, err, logout);
[4216]504 if (err != noError) return;
[226]505 // fill in the metadata for each of the OIDs (if it is requested)
506 if (request.filterResultOptions & FRmetadata) {
[466]507 bool processed = false;
[226]508 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
509 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
510 while (resultdoc_here != resultdoc_end) {
511 // try each of the sources in turn
512 sourcelistclass::iterator source_here = sources.begin();
513 sourcelistclass::iterator source_end = sources.end();
514 while (source_here != source_end) {
515 assert ((*source_here).s != NULL);
516 if (((*source_here).s != NULL) &&
517 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
[271]518 request.getParents, request.fields,
519 (*resultdoc_here).OID, (*resultdoc_here).metadata,
520 err, logout))) {
[226]521 if (err != noError) return;
[466]522 processed = true;
[226]523 break;
524 }
[9620]525 ++source_here;
[226]526 }
[466]527 if (!processed) {
[12018]528
529 logout << text_t2ascii << "Protocol Error: nothing processed for "
530 << "filter \"" << request.filterName << "\".\n\n";
531
[466]532 err = protocolError;
533 return;
534 }
[9620]535 ++resultdoc_here;
[226]536 }
[16027]537 }
538
539 err = noError;
540 }
541 else
542 {
[226]543 response.clear ();
[196]544 err = protocolError;
[226]545 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
546 << "filter \"" << request.filterName << "\".\n\n";
[191]547 }
[186]548}
549
[259]550void collectserver::get_document (const DocumentRequest_t &request,
551 DocumentResponse_t &response,
552 comerror_t &err, ostream &logout) {
[196]553
[259]554 sourcelistclass::iterator source_here = sources.begin();
555 sourcelistclass::iterator source_end = sources.end();
556 while (source_here != source_end) {
557 assert ((*source_here).s != NULL);
558 if (((*source_here).s != NULL) &&
559 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
560 if (err != noError) return;
561 break;
562 }
[9620]563 ++source_here;
[259]564 }
565}
[186]566
[4774]567void collectserver::is_searchable (bool &issearchable, comerror_t &err,
568 ostream &logout) {
[492]569
[4774]570 sourcelistclass::iterator source_here = sources.begin();
571 sourcelistclass::iterator source_end = sources.end();
572 while (source_here != source_end) {
573 assert ((*source_here).s != NULL);
574 if (((*source_here).s != NULL) &&
575 ((*source_here).s->is_searchable (issearchable, err, logout))) {
576 if (err != noError) return;
577 break;
578 }
[9620]579 ++source_here;
[4774]580 }
581}
582
583
[492]584bool operator==(const collectserverptr &x, const collectserverptr &y) {
585 return (x.c == y.c);
586}
587
588bool operator<(const collectserverptr &x, const collectserverptr &y) {
589 return (x.c < y.c);
590}
591
592
[166]593// thecollectserver remains the property of the calling code but
594// should not be deleted until it is removed from this list.
595void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
596 // can't add a null collection server
597 assert (thecollectserver != NULL);
598 if (thecollectserver == NULL) return;
599
600 // can't add an collection server with no collection name
601 assert (!(thecollectserver->get_collection_name()).empty());
602 if ((thecollectserver->get_collection_name()).empty()) return;
603
604 collectserverptr cptr;
605 cptr.c = thecollectserver;
606 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
607}
608
609// getcollectserver will return NULL if the collectserver could not be found
610collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
611 // can't find a collection with no name
612 if (collection.empty()) return NULL;
613
614 iterator here = collectserverptrs.find (collection);
615 if (here == collectserverptrs.end()) return NULL;
616
617 return (*here).second.c;
618}
Note: See TracBrowser for help on using the repository browser.