source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 21326

Last change on this file since 21326 was 21324, checked in by ak19, 14 years ago

Changes to makefiles, configure files, and source code to work with the new configure flags that allow indexers to be individually compiled up by setting each indexer to be enabled or disabled (enable-mg, enable-mgpp, enable-lucene)

  • Property svn:keywords set to Author Date Id Revision
File size: 18.3 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
39
40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
59
60collectserver::collectserver ()
61 : collectinfo()
62{
63 configinfo.collection = "null";
64}
65
66collectserver::~collectserver () {
67
68 // clean up the sources
69 sourcelistclass::iterator source_here = sources.begin();
70 sourcelistclass::iterator source_end = sources.end();
71 while (source_here != source_end) {
72 if ((*source_here).s != NULL)
73 delete (*source_here).s;
74 ++source_here;
75 }
76 sources.clear();
77
78 // clean up the filters
79 filtermapclass::iterator filter_here = filters.begin();
80 filtermapclass::iterator filter_end = filters.end();
81 while (filter_here != filter_end) {
82 if ((*filter_here).second.f != NULL)
83 delete (*filter_here).second.f;
84 ++filter_here;
85 }
86 filters.clear();
87}
88
89// configure should be called for each line in the
90// configuration files to configure the collection server and everything
91// it contains. The configuration should take place just before initialisation
92void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
93 if (cfgline.size() >= 1) {
94 const text_t &value = cfgline[0];
95 if (key == "plugin")
96 {
97 //get the plugin name
98 const text_t &name = cfgline[0];
99
100 if (name == "HTMLPlugin")
101 {
102 for (int hI = 1; hI < cfgline.size(); hI++)
103 {
104 const text_t &plugOption = cfgline[hI];
105
106 if (plugOption == "-use_realistic_book")
107 {
108 collectinfo.useBook = true;
109 break;
110 }
111 }
112 }
113 }
114 else if (key == "gsdlhome") configinfo.gsdlhome = value;
115 else if (key == "gdbmhome") configinfo.dbhome = value;
116 else if (key == "collecthome") configinfo.collecthome = value;
117 else if (key == "collection") {
118 configinfo.collection = value;
119 collectinfo.shortInfo.name = value;
120 }
121 else if (key == "collectdir") configinfo.collectdir = value;
122 else if (key == "host") collectinfo.shortInfo.host = value;
123 else if (key == "port") collectinfo.shortInfo.port = value.getint();
124 else if (key == "public") {
125 if (value == "true") collectinfo.isPublic = true;
126 else collectinfo.isPublic = false;
127 } else if (key == "beta") {
128 if (value == "true") collectinfo.isBeta = true;
129 else collectinfo.isBeta = false;
130 } else if (key == "collectgroup") {
131 if (value == "true") collectinfo.isCollectGroup = true;
132 else collectinfo.isCollectGroup = false;
133 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
134 else if (key == "supercollectionoptions") {
135 text_tarray::const_iterator begin = cfgline.begin();
136 text_tarray::const_iterator end = cfgline.end();
137 while(begin != end) {
138
139 if (*begin == "uniform_search_results_formatting") {
140 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
141 }
142 begin++;
143 }
144 }
145 else if (key == "builddate") collectinfo.buildDate = value.getint();
146 else if (key == "languages") collectinfo.languages = cfgline;
147 else if (key == "numdocs") collectinfo.numDocs = value.getint();
148 else if (key == "numsections") collectinfo.numSections = value.getint();
149 else if (key == "numwords") collectinfo.numWords = value.getint();
150 else if (key == "numbytes") collectinfo.numBytes = value.getint();
151 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
152 else if (key == "collectionmeta") {
153 // genuine collmeta get added as collectionmeta and collection_macros
154 // .collmeta just get added as collection_macros
155 text_t params;
156 if (cfgline.size() == 3) {
157 // get the params for later
158 text_t::const_iterator first=cfgline[1].begin()+1;
159 text_t::const_iterator last=cfgline[1].end()-1;
160 params=substr(first, last);
161 }
162
163 text_t meta_name = cfgline[0];
164 if (*(meta_name.begin())=='.') {
165 // a .xxx collectionmeta. strip off the . and
166 // look it up in the indexmap to get the actual value
167
168 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
169 text_t new_name;
170 if (indexmap.from2to(name, new_name)) {
171 meta_name = new_name;
172 }
173 } else {
174 // add them to collectionmeta
175 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
176 if (cfgline.size() == 2) {
177 lang_map[g_EmptyText] = cfgline[1];
178 } else if (cfgline.size() == 3 ) {
179 // get the lang out of params
180 paramhashtype params_hash;
181 splitparams(params, params_hash);
182
183 text_t lang = params_hash["l"];
184 lang_map[lang] = cfgline[2];
185 if (lang_map[g_EmptyText].empty()) {
186 // want the first one as the default if no default specified
187 lang_map[g_EmptyText] = cfgline[2];
188 }
189 }
190 collectinfo.collectionmeta[cfgline[0]] = lang_map;
191
192 }
193
194 // add all collectionmeta to macro list
195 text_tmap params_map = collectinfo.collection_macros[meta_name];
196
197 if (cfgline.size() == 2) {// no params for this macro
198 params_map[g_EmptyText] = cfgline[1];
199 }
200 else if (cfgline.size() == 3) {// has params
201 params_map[params] = cfgline[2];
202 if (params_map[g_EmptyText].empty()) {
203 params_map[g_EmptyText] = cfgline[2];
204 }
205 }
206 collectinfo.collection_macros[meta_name] = params_map;
207 }
208 else if (key == "collectionmacro") {
209 text_t nobrackets;
210 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
211 // add all to macro list
212 if (cfgline.size() == 2) { // no params for this macro
213 params_map[g_EmptyText] = cfgline[1];
214 }
215 else if (cfgline.size() == 3) {// has params
216 // strip [ ] brackets from params
217 text_t::const_iterator first=cfgline[1].begin()+1;
218 text_t::const_iterator last=cfgline[1].end()-1;
219 nobrackets=substr(first, last);
220 params_map[nobrackets] = cfgline[2];
221 }
222 collectinfo.collection_macros[cfgline[0]] = params_map;
223
224 } else if (key == "format" && cfgline.size() == 2)
225 collectinfo.format[cfgline[0]] = cfgline[1];
226 else if (key == "building" && cfgline.size() == 2)
227 collectinfo.building[cfgline[0]] = cfgline[1];
228 else if (key == "httpdomain") collectinfo.httpdomain = value;
229 else if (key == "httpprefix") collectinfo.httpprefix = value;
230 else if (key == "receptionist") collectinfo.receptionist = value;
231 else if (key == "buildtype") {
232 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
233 collectinfo.buildType = value;
234 }
235 // backwards compatibility - searchytpes is now a format statement
236 else if (key == "searchtype") { // means buildtype is mgpp
237 if (collectinfo.buildType.empty()) {
238 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
239 collectinfo.buildType = "mgpp";
240 }
241 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
242 //collectinfo.searchTypes = cfgline;
243 }
244 else if (key == "separate_cjk") {
245 if (value == "true") collectinfo.isSegmented = true;
246 else collectinfo.isSegmented = false;
247 }
248 // What have we set in our collect.cfg file : document or collection ?
249 else if (key == "authenticate") collectinfo.authenticate = value;
250
251 // What have we set for our group list
252 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
253
254 // store all the mappings for use when collection meta is read later
255 // (build.cfg read before collect.cfg)
256 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
257 indexmap.importmap (cfgline, true);
258
259 }
260 // In the map the key-value pair contain the same
261 // data i.e key == data, if key is 2 then data is 2
262
263 // What have we set for our public_documents ACL
264 else if (key == "public_documents")
265 {
266 text_tarray::const_iterator begin = cfgline.begin();
267 text_tarray::const_iterator end = cfgline.end();
268 while(begin != end)
269 {
270 // key = data i.e if key is 2 then data is 2
271 // collectinfo.public_documents[*begin] is the key
272 // *begin is the data value
273
274 collectinfo.public_documents[*begin] = *begin;
275 ++begin;
276 }
277 }
278
279 // What have we set for our private_documents ACL
280 else if (key == "private_documents")
281 {
282 text_tarray::const_iterator begin = cfgline.begin();
283 text_tarray::const_iterator end = cfgline.end();
284 while(begin != end)
285 {
286 // key = data i.e if key is 2 then data is 2
287 // collectinfo.public_documents[*begin] is the key
288 // *begin is the data value
289
290 collectinfo.private_documents[*begin] = *begin;
291 ++begin;
292 }
293 }
294
295 // dynamic_classifier <UniqueID> "<Options>"
296 else if (key == "dynamic_classifier")
297 {
298 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
299 }
300 }
301
302 // configure the filters
303 filtermapclass::iterator filter_here = filters.begin();
304 filtermapclass::iterator filter_end = filters.end();
305 while (filter_here != filter_end) {
306 assert ((*filter_here).second.f != NULL);
307 if ((*filter_here).second.f != NULL)
308 (*filter_here).second.f->configure(key, cfgline);
309
310 ++filter_here;
311 }
312
313 // configure the sources
314 sourcelistclass::iterator source_here = sources.begin();
315 sourcelistclass::iterator source_end = sources.end();
316 while (source_here != source_end) {
317 assert ((*source_here).s != NULL);
318 if ((*source_here).s != NULL)
319 (*source_here).s->configure(key, cfgline);
320
321 ++source_here;
322 }
323}
324
325
326void collectserver::configure (const text_t &key, const text_t &value) {
327 text_tarray cfgline;
328 cfgline.push_back (value);
329 configure(key, cfgline);
330}
331
332void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
333 // if we've not been properly configured, then it is a foregone
334 // conclusion that we cannot be active
335 if (this->configinfo.collection == "null")
336 {
337 wasSuccess = false;
338 }
339 // if no build date exists, then the collection was probably not built;
340 // ditto if the number of documents is zero, then something is pretty
341 // wrong
342 else if (this->collectinfo.buildDate == 0 ||
343 this->collectinfo.numDocs == 0)
344 {
345 wasSuccess = false;
346 }
347 // it is probably okay
348 else
349 wasSuccess = true;
350}
351
352
353bool collectserver::init (ostream &logout) {
354 // delete the indexmap
355 indexmap.clear();
356
357 // init the filters
358 filtermapclass::iterator filter_here = filters.begin();
359 filtermapclass::iterator filter_end = filters.end();
360 while (filter_here != filter_end) {
361 assert ((*filter_here).second.f != NULL);
362 if (((*filter_here).second.f != NULL) &&
363 !(*filter_here).second.f->init(logout)) return false;
364
365 ++filter_here;
366 }
367
368 // init the sources
369 sourcelistclass::iterator source_here = sources.begin();
370 sourcelistclass::iterator source_end = sources.end();
371 while (source_here != source_end) {
372 assert ((*source_here).s != NULL);
373 if (((*source_here).s != NULL) &&
374 !(*source_here).s->init(logout)) return false;
375
376 ++source_here;
377 }
378
379 return true;
380}
381
382
383void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
384 comerror_t &err, ostream &/*logout*/) {
385 reponse = collectinfo;
386 err = noError;
387}
388
389void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
390 comerror_t &err, ostream &/*logout*/) {
391 response.clear ();
392
393 // get a list of filter names
394 filtermapclass::iterator filter_here = filters.begin();
395 filtermapclass::iterator filter_end = filters.end();
396 while (filter_here != filter_end) {
397 response.filterNames.insert ((*filter_here).first);
398 ++filter_here;
399 }
400
401 err = noError;
402}
403
404void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
405 InfoFilterOptionsResponse_t &response,
406 comerror_t &err, ostream &logout) {
407 outconvertclass text_t2ascii;
408
409 filterclass *thisfilter = filters.getfilter(request.filterName);
410 if (thisfilter != NULL) {
411 thisfilter->get_filteroptions (response, err, logout);
412 } else {
413 response.clear ();
414 err = protocolError;
415 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
416 << "filter \"" << request.filterName << "\".\n\n";
417 }
418}
419
420void collectserver::filter (FilterRequest_t &request,
421 FilterResponse_t &response,
422 comerror_t &err, ostream &logout) {
423 outconvertclass text_t2ascii;
424
425 // translate any ".fc", ".pr" etc. stuff in the docSet
426 text_t translatedOID;
427 text_tarray translatedOIDs;
428 text_tarray::iterator doc_here = request.docSet.begin();
429 text_tarray::iterator doc_end = request.docSet.end();
430 while (doc_here != doc_end) {
431 if (needs_translating (*doc_here)) {
432 sourcelistclass::iterator source_here = sources.begin();
433 sourcelistclass::iterator source_end = sources.end();
434 while (source_here != source_end) {
435 assert ((*source_here).s != NULL);
436 if (((*source_here).s != NULL) &&
437 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
438 if (err != noError) return;
439 break;
440 }
441 ++source_here;
442 }
443 translatedOIDs.push_back (translatedOID);
444 } else {
445 translatedOIDs.push_back (*doc_here);
446 }
447 ++doc_here;
448 }
449 request.docSet = translatedOIDs;
450
451 response.clear();
452
453 filterclass *thisfilter = filters.getfilter(request.filterName);
454 if (thisfilter != NULL) {
455 // filter the data
456 thisfilter->filter (request, response, err, logout);
457 if (err != noError) return;
458 // fill in the metadata for each of the OIDs (if it is requested)
459 if (request.filterResultOptions & FRmetadata) {
460 bool processed = false;
461 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
462 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
463 while (resultdoc_here != resultdoc_end) {
464 // try each of the sources in turn
465 sourcelistclass::iterator source_here = sources.begin();
466 sourcelistclass::iterator source_end = sources.end();
467 while (source_here != source_end) {
468 assert ((*source_here).s != NULL);
469 if (((*source_here).s != NULL) &&
470 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
471 request.getParents, request.fields,
472 (*resultdoc_here).OID, (*resultdoc_here).metadata,
473 err, logout))) {
474 if (err != noError) return;
475 processed = true;
476 break;
477 }
478 ++source_here;
479 }
480 if (!processed) {
481
482 logout << text_t2ascii << "Protocol Error: nothing processed for "
483 << "filter \"" << request.filterName << "\".\n\n";
484
485 err = protocolError;
486 return;
487 }
488 ++resultdoc_here;
489 }
490 }
491
492 err = noError;
493 }
494 else
495 {
496 response.clear ();
497 err = protocolError;
498 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
499 << "filter \"" << request.filterName << "\".\n\n";
500 }
501}
502
503void collectserver::get_document (const DocumentRequest_t &request,
504 DocumentResponse_t &response,
505 comerror_t &err, ostream &logout) {
506
507 sourcelistclass::iterator source_here = sources.begin();
508 sourcelistclass::iterator source_end = sources.end();
509 while (source_here != source_end) {
510 assert ((*source_here).s != NULL);
511 if (((*source_here).s != NULL) &&
512 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
513 if (err != noError) return;
514 break;
515 }
516 ++source_here;
517 }
518}
519
520void collectserver::is_searchable (bool &issearchable, comerror_t &err,
521 ostream &logout) {
522
523 sourcelistclass::iterator source_here = sources.begin();
524 sourcelistclass::iterator source_end = sources.end();
525 while (source_here != source_end) {
526 assert ((*source_here).s != NULL);
527 if (((*source_here).s != NULL) &&
528 ((*source_here).s->is_searchable (issearchable, err, logout))) {
529 if (err != noError) return;
530 break;
531 }
532 ++source_here;
533 }
534}
535
536
537bool operator==(const collectserverptr &x, const collectserverptr &y) {
538 return (x.c == y.c);
539}
540
541bool operator<(const collectserverptr &x, const collectserverptr &y) {
542 return (x.c < y.c);
543}
544
545
546// thecollectserver remains the property of the calling code but
547// should not be deleted until it is removed from this list.
548void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
549 // can't add a null collection server
550 assert (thecollectserver != NULL);
551 if (thecollectserver == NULL) return;
552
553 // can't add an collection server with no collection name
554 assert (!(thecollectserver->get_collection_name()).empty());
555 if ((thecollectserver->get_collection_name()).empty()) return;
556
557 collectserverptr cptr;
558 cptr.c = thecollectserver;
559 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
560}
561
562// getcollectserver will return NULL if the collectserver could not be found
563collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
564 // can't find a collection with no name
565 if (collection.empty()) return NULL;
566
567 iterator here = collectserverptrs.find (collection);
568 if (here == collectserverptrs.end()) return NULL;
569
570 return (*here).second.c;
571}
Note: See TracBrowser for help on using the repository browser.