source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 24305

Last change on this file since 24305 was 24305, checked in by ak19, 13 years ago

The collectionmeta field related to titles in modelcol's collect.cfg now contains ex.dc.Title. For its display in the indexes drop-down of the search page to map to the titles display name, needed to adjust the lookup that runtime-src's collectserver does.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.2 KB
RevLine 
[17863]1
[166]2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
[534]7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
[166]10 *
[534]11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
[166]25 *********************************************************************/
26
27#include "collectserver.h"
[249]28#include "OIDtools.h"
[166]29#include <assert.h>
[9929]30#include "display.h"
[166]31
[21324]32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
[166]39
[21324]40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
[22050]59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
[21324]86
[22050]87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
[20799]95collectserver::collectserver ()
96 : collectinfo()
97{
[166]98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
[1459]102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
[9620]109 ++source_here;
[1459]110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
[9620]119 ++filter_here;
[1459]120 }
121 filters.clear();
[166]122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
[17863]126// it contains. The configuration should take place just before initialisation
[166]127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
[226]128 if (cfgline.size() >= 1) {
[166]129 const text_t &value = cfgline[0];
[13982]130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
[17863]135 if (name == "HTMLPlugin")
[13982]136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
[17863]141 if (plugOption == "-use_realistic_book")
[13982]142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
[15587]150 else if (key == "gdbmhome") configinfo.dbhome = value;
[16310]151 else if (key == "collecthome") configinfo.collecthome = value;
[166]152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
[16310]155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
[166]157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
[15002]165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
[2712]168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
[10789]169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
[830]180 else if (key == "builddate") collectinfo.buildDate = value.getint();
[166]181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
[1253]183 else if (key == "numsections") collectinfo.numSections = value.getint();
[166]184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
[12867]186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
[8453]187 else if (key == "collectionmeta") {
[11964]188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
[9929]190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
[24305]205
206 // make sure that any ex.XXX metadata in the string is turned into XXX for matching
207 name.replace("ex.","");
208
[9929]209 if (indexmap.from2to(name, new_name)) {
210 meta_name = new_name;
211 }
212 } else {
213 // add them to collectionmeta
214 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
215 if (cfgline.size() == 2) {
216 lang_map[g_EmptyText] = cfgline[1];
217 } else if (cfgline.size() == 3 ) {
218 // get the lang out of params
219 paramhashtype params_hash;
220 splitparams(params, params_hash);
221
222 text_t lang = params_hash["l"];
223 lang_map[lang] = cfgline[2];
224 if (lang_map[g_EmptyText].empty()) {
225 // want the first one as the default if no default specified
226 lang_map[g_EmptyText] = cfgline[2];
227 }
228 }
229 collectinfo.collectionmeta[cfgline[0]] = lang_map;
230
231 }
232
233 // add all collectionmeta to macro list
[11964]234 text_tmap params_map = collectinfo.collection_macros[meta_name];
235
[17863]236 if (cfgline.size() == 2) {// no params for this macro
[11964]237 params_map[g_EmptyText] = cfgline[1];
238 }
[9929]239 else if (cfgline.size() == 3) {// has params
[11964]240 params_map[params] = cfgline[2];
241 if (params_map[g_EmptyText].empty()) {
242 params_map[g_EmptyText] = cfgline[2];
243 }
[9929]244 }
[11964]245 collectinfo.collection_macros[meta_name] = params_map;
[9929]246 }
247 else if (key == "collectionmacro") {
248 text_t nobrackets;
[11964]249 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
[9929]250 // add all to macro list
[11964]251 if (cfgline.size() == 2) { // no params for this macro
252 params_map[g_EmptyText] = cfgline[1];
253 }
[8453]254 else if (cfgline.size() == 3) {// has params
255 // strip [ ] brackets from params
256 text_t::const_iterator first=cfgline[1].begin()+1;
257 text_t::const_iterator last=cfgline[1].end()-1;
[9929]258 nobrackets=substr(first, last);
[11964]259 params_map[nobrackets] = cfgline[2];
[8453]260 }
[11964]261 collectinfo.collection_macros[cfgline[0]] = params_map;
262
[8453]263 } else if (key == "format" && cfgline.size() == 2)
[352]264 collectinfo.format[cfgline[0]] = cfgline[1];
[722]265 else if (key == "building" && cfgline.size() == 2)
266 collectinfo.building[cfgline[0]] = cfgline[1];
[1860]267 else if (key == "httpdomain") collectinfo.httpdomain = value;
268 else if (key == "httpprefix") collectinfo.httpprefix = value;
[432]269 else if (key == "receptionist") collectinfo.receptionist = value;
[21324]270 else if (key == "buildtype") {
271 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
272 collectinfo.buildType = value;
273 }
[11984]274 // backwards compatibility - searchytpes is now a format statement
[4751]275 else if (key == "searchtype") { // means buildtype is mgpp
[9904]276 if (collectinfo.buildType.empty()) {
[21324]277 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
[9904]278 collectinfo.buildType = "mgpp";
279 }
[11984]280 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
281 //collectinfo.searchTypes = cfgline;
[4751]282 }
[22050]283 else if (key == "infodbtype") {
284 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
285 collectinfo.infodbType = value;
286 }
[6584]287 else if (key == "separate_cjk") {
288 if (value == "true") collectinfo.isSegmented = true;
289 else collectinfo.isSegmented = false;
290 }
[4974]291 // What have we set in our collect.cfg file : document or collection ?
[5024]292 else if (key == "authenticate") collectinfo.authenticate = value;
[4974]293
294 // What have we set for our group list
[20799]295 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
[4974]296
[24114]297 // build.cfg, earliestDatestamp of this collection needed for
298 // OAIServer to work out earliestDatestamp of this repository
299 else if (key == "earliestdatestamp") {
300 collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
301 }
302
[9929]303 // store all the mappings for use when collection meta is read later
304 // (build.cfg read before collect.cfg)
[9934]305 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
[9929]306 indexmap.importmap (cfgline, true);
307
308 }
[4974]309 // In the map the key-value pair contain the same
[5024]310 // data i.e key == data, if key is 2 then data is 2
[4974]311
[5024]312 // What have we set for our public_documents ACL
313 else if (key == "public_documents")
[4974]314 {
315 text_tarray::const_iterator begin = cfgline.begin();
316 text_tarray::const_iterator end = cfgline.end();
317 while(begin != end)
318 {
[5024]319 // key = data i.e if key is 2 then data is 2
320 // collectinfo.public_documents[*begin] is the key
321 // *begin is the data value
322
323 collectinfo.public_documents[*begin] = *begin;
[9620]324 ++begin;
[4974]325 }
326 }
327
[5024]328 // What have we set for our private_documents ACL
329 else if (key == "private_documents")
[4974]330 {
331 text_tarray::const_iterator begin = cfgline.begin();
332 text_tarray::const_iterator end = cfgline.end();
333 while(begin != end)
334 {
[5024]335 // key = data i.e if key is 2 then data is 2
336 // collectinfo.public_documents[*begin] is the key
337 // *begin is the data value
338
339 collectinfo.private_documents[*begin] = *begin;
[9620]340 ++begin;
[4974]341 }
342 }
[15771]343
344 // dynamic_classifier <UniqueID> "<Options>"
345 else if (key == "dynamic_classifier")
346 {
347 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
348 }
[226]349 }
[9929]350
[226]351 // configure the filters
352 filtermapclass::iterator filter_here = filters.begin();
353 filtermapclass::iterator filter_end = filters.end();
354 while (filter_here != filter_end) {
355 assert ((*filter_here).second.f != NULL);
356 if ((*filter_here).second.f != NULL)
357 (*filter_here).second.f->configure(key, cfgline);
[191]358
[9620]359 ++filter_here;
[166]360 }
[226]361
362 // configure the sources
363 sourcelistclass::iterator source_here = sources.begin();
364 sourcelistclass::iterator source_end = sources.end();
365 while (source_here != source_end) {
366 assert ((*source_here).s != NULL);
367 if ((*source_here).s != NULL)
368 (*source_here).s->configure(key, cfgline);
369
[9620]370 ++source_here;
[226]371 }
[166]372}
373
[9929]374
[166]375void collectserver::configure (const text_t &key, const text_t &value) {
376 text_tarray cfgline;
377 cfgline.push_back (value);
378 configure(key, cfgline);
379}
380
[2173]381void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
382 // if we've not been properly configured, then it is a foregone
383 // conclusion that we cannot be active
384 if (this->configinfo.collection == "null")
385 {
386 wasSuccess = false;
387 }
388 // if no build date exists, then the collection was probably not built;
389 // ditto if the number of documents is zero, then something is pretty
390 // wrong
391 else if (this->collectinfo.buildDate == 0 ||
392 this->collectinfo.numDocs == 0)
393 {
394 wasSuccess = false;
395 }
396 // it is probably okay
397 else
398 wasSuccess = true;
399}
[166]400
[2173]401
[226]402bool collectserver::init (ostream &logout) {
[9929]403 // delete the indexmap
404 indexmap.clear();
405
[226]406 // init the filters
407 filtermapclass::iterator filter_here = filters.begin();
408 filtermapclass::iterator filter_end = filters.end();
409 while (filter_here != filter_end) {
410 assert ((*filter_here).second.f != NULL);
411 if (((*filter_here).second.f != NULL) &&
412 !(*filter_here).second.f->init(logout)) return false;
413
[9620]414 ++filter_here;
[226]415 }
416
417 // init the sources
418 sourcelistclass::iterator source_here = sources.begin();
419 sourcelistclass::iterator source_end = sources.end();
420 while (source_here != source_end) {
421 assert ((*source_here).s != NULL);
422 if (((*source_here).s != NULL) &&
423 !(*source_here).s->init(logout)) return false;
424
[9620]425 ++source_here;
[226]426 }
427
[166]428 return true;
429}
430
431
[186]432void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
433 comerror_t &err, ostream &/*logout*/) {
434 reponse = collectinfo;
435 err = noError;
436}
[166]437
[226]438void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
[220]439 comerror_t &err, ostream &/*logout*/) {
[226]440 response.clear ();
441
442 // get a list of filter names
443 filtermapclass::iterator filter_here = filters.begin();
444 filtermapclass::iterator filter_end = filters.end();
445 while (filter_here != filter_end) {
446 response.filterNames.insert ((*filter_here).first);
[9745]447 ++filter_here;
[226]448 }
449
450 err = noError;
[220]451}
452
[226]453void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
[220]454 InfoFilterOptionsResponse_t &response,
[226]455 comerror_t &err, ostream &logout) {
456 outconvertclass text_t2ascii;
457
458 filterclass *thisfilter = filters.getfilter(request.filterName);
459 if (thisfilter != NULL) {
460 thisfilter->get_filteroptions (response, err, logout);
461 } else {
462 response.clear ();
463 err = protocolError;
[23492]464 text_t& infodbtype = collectinfo.infodbType;
465
466 // Don't print out the warning if were's asking about SQLQueryFilter
467 // when we know the infodbtype is something other than .*sql.*
468
469 if ((request.filterName != "SQLQueryFilter")
470 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
471 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
472 << "filter \"" << request.filterName << "\".\n\n";
473 }
[226]474 }
[186]475}
476
[249]477void collectserver::filter (FilterRequest_t &request,
[186]478 FilterResponse_t &response,
[226]479 comerror_t &err, ostream &logout) {
480 outconvertclass text_t2ascii;
481
[249]482 // translate any ".fc", ".pr" etc. stuff in the docSet
483 text_t translatedOID;
[472]484 text_tarray translatedOIDs;
485 text_tarray::iterator doc_here = request.docSet.begin();
486 text_tarray::iterator doc_end = request.docSet.end();
[249]487 while (doc_here != doc_end) {
488 if (needs_translating (*doc_here)) {
489 sourcelistclass::iterator source_here = sources.begin();
490 sourcelistclass::iterator source_end = sources.end();
491 while (source_here != source_end) {
492 assert ((*source_here).s != NULL);
493 if (((*source_here).s != NULL) &&
494 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
495 if (err != noError) return;
496 break;
497 }
[9620]498 ++source_here;
[249]499 }
[472]500 translatedOIDs.push_back (translatedOID);
[249]501 } else {
[472]502 translatedOIDs.push_back (*doc_here);
[249]503 }
[9620]504 ++doc_here;
[249]505 }
506 request.docSet = translatedOIDs;
507
[196]508 response.clear();
[650]509
[226]510 filterclass *thisfilter = filters.getfilter(request.filterName);
511 if (thisfilter != NULL) {
512 // filter the data
513 thisfilter->filter (request, response, err, logout);
[4216]514 if (err != noError) return;
[226]515 // fill in the metadata for each of the OIDs (if it is requested)
516 if (request.filterResultOptions & FRmetadata) {
[466]517 bool processed = false;
[226]518 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
519 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
520 while (resultdoc_here != resultdoc_end) {
521 // try each of the sources in turn
522 sourcelistclass::iterator source_here = sources.begin();
523 sourcelistclass::iterator source_end = sources.end();
524 while (source_here != source_end) {
525 assert ((*source_here).s != NULL);
526 if (((*source_here).s != NULL) &&
527 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
[271]528 request.getParents, request.fields,
529 (*resultdoc_here).OID, (*resultdoc_here).metadata,
530 err, logout))) {
[226]531 if (err != noError) return;
[466]532 processed = true;
[226]533 break;
534 }
[9620]535 ++source_here;
[226]536 }
[466]537 if (!processed) {
[12018]538
539 logout << text_t2ascii << "Protocol Error: nothing processed for "
540 << "filter \"" << request.filterName << "\".\n\n";
541
[466]542 err = protocolError;
543 return;
544 }
[9620]545 ++resultdoc_here;
[226]546 }
[16027]547 }
548
549 err = noError;
550 }
551 else
552 {
[226]553 response.clear ();
[196]554 err = protocolError;
[226]555 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
556 << "filter \"" << request.filterName << "\".\n\n";
[191]557 }
[186]558}
559
[259]560void collectserver::get_document (const DocumentRequest_t &request,
561 DocumentResponse_t &response,
562 comerror_t &err, ostream &logout) {
[196]563
[259]564 sourcelistclass::iterator source_here = sources.begin();
565 sourcelistclass::iterator source_end = sources.end();
566 while (source_here != source_end) {
567 assert ((*source_here).s != NULL);
568 if (((*source_here).s != NULL) &&
569 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
570 if (err != noError) return;
571 break;
572 }
[9620]573 ++source_here;
[259]574 }
575}
[186]576
[4774]577void collectserver::is_searchable (bool &issearchable, comerror_t &err,
578 ostream &logout) {
[492]579
[4774]580 sourcelistclass::iterator source_here = sources.begin();
581 sourcelistclass::iterator source_end = sources.end();
582 while (source_here != source_end) {
583 assert ((*source_here).s != NULL);
584 if (((*source_here).s != NULL) &&
585 ((*source_here).s->is_searchable (issearchable, err, logout))) {
586 if (err != noError) return;
587 break;
588 }
[9620]589 ++source_here;
[4774]590 }
591}
592
593
[492]594bool operator==(const collectserverptr &x, const collectserverptr &y) {
595 return (x.c == y.c);
596}
597
598bool operator<(const collectserverptr &x, const collectserverptr &y) {
599 return (x.c < y.c);
600}
601
602
[166]603// thecollectserver remains the property of the calling code but
604// should not be deleted until it is removed from this list.
605void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
606 // can't add a null collection server
607 assert (thecollectserver != NULL);
608 if (thecollectserver == NULL) return;
609
610 // can't add an collection server with no collection name
611 assert (!(thecollectserver->get_collection_name()).empty());
612 if ((thecollectserver->get_collection_name()).empty()) return;
613
614 collectserverptr cptr;
615 cptr.c = thecollectserver;
616 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
617}
618
619// getcollectserver will return NULL if the collectserver could not be found
620collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
621 // can't find a collection with no name
622 if (collection.empty()) return NULL;
623
624 iterator here = collectserverptrs.find (collection);
625 if (here == collectserverptrs.end()) return NULL;
626
627 return (*here).second.c;
628}
Note: See TracBrowser for help on using the repository browser.