source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 29100

Last change on this file since 29100 was 29100, checked in by kjdon, 10 years ago

add in PDFPlugin to the line checking for -use_realistic_book option, as I have now aded that option to PDF Plugin

  • Property svn:keywords set to Author Date Id Revision
File size: 20.5 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
39
40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
86
87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
95collectserver::collectserver ()
96 : collectinfo()
97{
98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
109 ++source_here;
110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
119 ++filter_here;
120 }
121 filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128 if (cfgline.size() >= 1) {
129 const text_t &value = cfgline[0];
130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
135 if (name == "HTMLPlugin" || name== "PDFPlugin")
136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
141 if (plugOption == "-use_realistic_book")
142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
150 else if (key == "gdbmhome") configinfo.dbhome = value;
151 else if (key == "collecthome") configinfo.collecthome = value;
152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
180 else if (key == "builddate") collectinfo.buildDate = value.getint();
181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
183 else if (key == "numsections") collectinfo.numSections = value.getint();
184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187 else if (key == "collectionmeta") {
188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
205
206 // Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
207 // Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here.
208 // So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
209 // contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.
210
211 if (indexmap.from2to(name, new_name)) {
212 meta_name = new_name;
213 }
214 } else {
215 // add them to collectionmeta
216 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
217 if (cfgline.size() == 2) {
218 lang_map[g_EmptyText] = cfgline[1];
219 } else if (cfgline.size() == 3 ) {
220 // get the lang out of params
221 paramhashtype params_hash;
222 splitparams(params, params_hash);
223
224 text_t lang = params_hash["l"];
225 lang_map[lang] = cfgline[2];
226 if (lang_map[g_EmptyText].empty()) {
227 // want the first one as the default if no default specified
228 lang_map[g_EmptyText] = cfgline[2];
229 }
230 }
231 collectinfo.collectionmeta[cfgline[0]] = lang_map;
232
233 }
234
235 // add all collectionmeta to macro list
236 text_tmap params_map = collectinfo.collection_macros[meta_name];
237
238 if (cfgline.size() == 2) {// no params for this macro
239 params_map[g_EmptyText] = cfgline[1];
240 }
241 else if (cfgline.size() == 3) {// has params
242 params_map[params] = cfgline[2];
243 if (params_map[g_EmptyText].empty()) {
244 params_map[g_EmptyText] = cfgline[2];
245 }
246 }
247 collectinfo.collection_macros[meta_name] = params_map;
248 }
249 else if (key == "collectionmacro") {
250 text_t nobrackets;
251 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
252 // add all to macro list
253 if (cfgline.size() == 2) { // no params for this macro
254 params_map[g_EmptyText] = cfgline[1];
255 }
256 else if (cfgline.size() == 3) {// has params
257 // strip [ ] brackets from params
258 text_t::const_iterator first=cfgline[1].begin()+1;
259 text_t::const_iterator last=cfgline[1].end()-1;
260 nobrackets=substr(first, last);
261 params_map[nobrackets] = cfgline[2];
262 }
263 collectinfo.collection_macros[cfgline[0]] = params_map;
264
265 } else if (key == "format" && cfgline.size() == 2)
266 collectinfo.format[cfgline[0]] = cfgline[1];
267 else if (key == "building" && cfgline.size() == 2)
268 collectinfo.building[cfgline[0]] = cfgline[1];
269 else if (key == "httpdomain") collectinfo.httpdomain = value;
270 else if (key == "httpprefix") collectinfo.httpprefix = value;
271 else if (key == "receptionist") collectinfo.receptionist = value;
272 else if (key == "buildtype") {
273 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
274 collectinfo.buildType = value;
275 }
276 // backwards compatibility - searchytpes is now a format statement
277 else if (key == "searchtype") { // means buildtype is mgpp
278 if (collectinfo.buildType.empty()) {
279 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
280 collectinfo.buildType = "mgpp";
281 }
282 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
283 //collectinfo.searchTypes = cfgline;
284 }
285 else if (key == "infodbtype") {
286 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
287 collectinfo.infodbType = value;
288 }
289 else if (key == "separate_cjk") {
290 if (value == "true") collectinfo.isSegmented = true;
291 else collectinfo.isSegmented = false;
292 }
293 // What have we set in our collect.cfg file : document or collection ?
294 else if (key == "authenticate") collectinfo.authenticate = value;
295
296 // What have we set for our group list
297 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
298
299 // build.cfg, earliestDatestamp of this collection needed for
300 // OAIServer to work out earliestDatestamp of this repository
301 else if (key == "earliestdatestamp") {
302 collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
303 }
304
305 // store all the mappings for use when collection meta is read later
306 // (build.cfg read before collect.cfg)
307 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
308 indexmap.importmap (cfgline, true);
309
310 }
311 // In the map the key-value pair contain the same
312 // data i.e key == data, if key is 2 then data is 2
313
314 // What have we set for our public_documents ACL
315 else if (key == "public_documents")
316 {
317 text_tarray::const_iterator begin = cfgline.begin();
318 text_tarray::const_iterator end = cfgline.end();
319 while(begin != end)
320 {
321 // key = data i.e if key is 2 then data is 2
322 // collectinfo.public_documents[*begin] is the key
323 // *begin is the data value
324
325 collectinfo.public_documents[*begin] = *begin;
326 ++begin;
327 }
328 }
329
330 // What have we set for our private_documents ACL
331 else if (key == "private_documents")
332 {
333 text_tarray::const_iterator begin = cfgline.begin();
334 text_tarray::const_iterator end = cfgline.end();
335 while(begin != end)
336 {
337 // key = data i.e if key is 2 then data is 2
338 // collectinfo.public_documents[*begin] is the key
339 // *begin is the data value
340
341 collectinfo.private_documents[*begin] = *begin;
342 ++begin;
343 }
344 }
345
346 // dynamic_classifier <UniqueID> "<Options>"
347 else if (key == "dynamic_classifier")
348 {
349 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
350 }
351 }
352
353 // configure the filters
354 filtermapclass::iterator filter_here = filters.begin();
355 filtermapclass::iterator filter_end = filters.end();
356 while (filter_here != filter_end) {
357 assert ((*filter_here).second.f != NULL);
358 if ((*filter_here).second.f != NULL)
359 (*filter_here).second.f->configure(key, cfgline);
360
361 ++filter_here;
362 }
363
364 // configure the sources
365 sourcelistclass::iterator source_here = sources.begin();
366 sourcelistclass::iterator source_end = sources.end();
367 while (source_here != source_end) {
368 assert ((*source_here).s != NULL);
369 if ((*source_here).s != NULL)
370 (*source_here).s->configure(key, cfgline);
371
372 ++source_here;
373 }
374}
375
376
377void collectserver::configure (const text_t &key, const text_t &value) {
378 text_tarray cfgline;
379 cfgline.push_back (value);
380 configure(key, cfgline);
381}
382
383void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
384 // if we've not been properly configured, then it is a foregone
385 // conclusion that we cannot be active
386 if (this->configinfo.collection == "null")
387 {
388 wasSuccess = false;
389 }
390 // if no build date exists, then the collection was probably not built;
391 // ditto if the number of documents is zero, then something is pretty
392 // wrong
393 else if (this->collectinfo.buildDate == 0 ||
394 this->collectinfo.numDocs == 0)
395 {
396 wasSuccess = false;
397 }
398 // it is probably okay
399 else
400 wasSuccess = true;
401}
402
403
404bool collectserver::init (ostream &logout) {
405 // delete the indexmap
406 indexmap.clear();
407
408 // init the filters
409 filtermapclass::iterator filter_here = filters.begin();
410 filtermapclass::iterator filter_end = filters.end();
411 while (filter_here != filter_end) {
412 assert ((*filter_here).second.f != NULL);
413 if (((*filter_here).second.f != NULL) &&
414 !(*filter_here).second.f->init(logout)) return false;
415
416 ++filter_here;
417 }
418
419 // init the sources
420 sourcelistclass::iterator source_here = sources.begin();
421 sourcelistclass::iterator source_end = sources.end();
422 while (source_here != source_end) {
423 assert ((*source_here).s != NULL);
424 if (((*source_here).s != NULL) &&
425 !(*source_here).s->init(logout)) return false;
426
427 ++source_here;
428 }
429
430 return true;
431}
432
433
434void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
435 comerror_t &err, ostream &/*logout*/) {
436 reponse = collectinfo;
437 err = noError;
438}
439
440void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
441 comerror_t &err, ostream &/*logout*/) {
442 response.clear ();
443
444 // get a list of filter names
445 filtermapclass::iterator filter_here = filters.begin();
446 filtermapclass::iterator filter_end = filters.end();
447 while (filter_here != filter_end) {
448 response.filterNames.insert ((*filter_here).first);
449 ++filter_here;
450 }
451
452 err = noError;
453}
454
455void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
456 InfoFilterOptionsResponse_t &response,
457 comerror_t &err, ostream &logout) {
458 outconvertclass text_t2ascii;
459
460 filterclass *thisfilter = filters.getfilter(request.filterName);
461 if (thisfilter != NULL) {
462 thisfilter->get_filteroptions (response, err, logout);
463 } else {
464 response.clear ();
465 err = protocolError;
466 text_t& infodbtype = collectinfo.infodbType;
467
468 // Don't print out the warning if were's asking about SQLQueryFilter
469 // when we know the infodbtype is something other than .*sql.*
470
471 if ((request.filterName != "SQLQueryFilter")
472 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
473 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
474 << "filter \"" << request.filterName << "\".\n\n";
475 }
476 }
477}
478
479void collectserver::filter (FilterRequest_t &request,
480 FilterResponse_t &response,
481 comerror_t &err, ostream &logout) {
482 outconvertclass text_t2ascii;
483
484 // translate any ".fc", ".pr" etc. stuff in the docSet
485 text_t translatedOID;
486 text_tarray translatedOIDs;
487 text_tarray::iterator doc_here = request.docSet.begin();
488 text_tarray::iterator doc_end = request.docSet.end();
489 while (doc_here != doc_end) {
490 if (needs_translating (*doc_here)) {
491 sourcelistclass::iterator source_here = sources.begin();
492 sourcelistclass::iterator source_end = sources.end();
493 while (source_here != source_end) {
494 assert ((*source_here).s != NULL);
495 if (((*source_here).s != NULL) &&
496 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
497 if (err != noError) return;
498 break;
499 }
500 ++source_here;
501 }
502 translatedOIDs.push_back (translatedOID);
503 } else {
504 translatedOIDs.push_back (*doc_here);
505 }
506 ++doc_here;
507 }
508 request.docSet = translatedOIDs;
509
510 response.clear();
511
512 filterclass *thisfilter = filters.getfilter(request.filterName);
513 if (thisfilter != NULL) {
514 // filter the data
515 thisfilter->filter (request, response, err, logout);
516 if (err != noError) return;
517 // fill in the metadata for each of the OIDs (if it is requested)
518 if (request.filterResultOptions & FRmetadata) {
519 bool processed = false;
520 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
521 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
522 while (resultdoc_here != resultdoc_end) {
523 // try each of the sources in turn
524 sourcelistclass::iterator source_here = sources.begin();
525 sourcelistclass::iterator source_end = sources.end();
526 while (source_here != source_end) {
527 assert ((*source_here).s != NULL);
528 if (((*source_here).s != NULL) &&
529 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
530 request.getParents, request.fields,
531 (*resultdoc_here).OID, (*resultdoc_here).metadata,
532 err, logout))) {
533 if (err != noError) return;
534 processed = true;
535 break;
536 }
537 ++source_here;
538 }
539 if (!processed) {
540
541 logout << text_t2ascii << "Protocol Error: nothing processed for "
542 << "filter \"" << request.filterName << "\".\n\n";
543
544 err = protocolError;
545 return;
546 }
547 ++resultdoc_here;
548 }
549 }
550
551 err = noError;
552 }
553 else
554 {
555 response.clear ();
556 err = protocolError;
557 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
558 << "filter \"" << request.filterName << "\".\n\n";
559 }
560}
561
562void collectserver::get_document (const DocumentRequest_t &request,
563 DocumentResponse_t &response,
564 comerror_t &err, ostream &logout) {
565
566 sourcelistclass::iterator source_here = sources.begin();
567 sourcelistclass::iterator source_end = sources.end();
568 while (source_here != source_end) {
569 assert ((*source_here).s != NULL);
570 if (((*source_here).s != NULL) &&
571 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
572 if (err != noError) return;
573 break;
574 }
575 ++source_here;
576 }
577}
578
579void collectserver::is_searchable (bool &issearchable, comerror_t &err,
580 ostream &logout) {
581
582 sourcelistclass::iterator source_here = sources.begin();
583 sourcelistclass::iterator source_end = sources.end();
584 while (source_here != source_end) {
585 assert ((*source_here).s != NULL);
586 if (((*source_here).s != NULL) &&
587 ((*source_here).s->is_searchable (issearchable, err, logout))) {
588 if (err != noError) return;
589 break;
590 }
591 ++source_here;
592 }
593}
594
595
596bool operator==(const collectserverptr &x, const collectserverptr &y) {
597 return (x.c == y.c);
598}
599
600bool operator<(const collectserverptr &x, const collectserverptr &y) {
601 return (x.c < y.c);
602}
603
604
605// thecollectserver remains the property of the calling code but
606// should not be deleted until it is removed from this list.
607void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
608 // can't add a null collection server
609 assert (thecollectserver != NULL);
610 if (thecollectserver == NULL) return;
611
612 // can't add an collection server with no collection name
613 assert (!(thecollectserver->get_collection_name()).empty());
614 if ((thecollectserver->get_collection_name()).empty()) return;
615
616 collectserverptr cptr;
617 cptr.c = thecollectserver;
618 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
619}
620
621// getcollectserver will return NULL if the collectserver could not be found
622collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
623 // can't find a collection with no name
624 if (collection.empty()) return NULL;
625
626 iterator here = collectserverptrs.find (collection);
627 if (here == collectserverptrs.end()) return NULL;
628
629 return (*here).second.c;
630}
Note: See TracBrowser for help on using the repository browser.