source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 24411

Last change on this file since 24411 was 24411, checked in by ak19, 10 years ago

Katherine's commit for modelcol's collect.cfg explained that its collectionmeta section should not contain ex.* prefixes for GS extracted metadata. (It can and does contain ex. prefixes for extracted embedded metadata, since otherwise we may have multiple occurrences of dc.Title in there, when one of them is meant to refer to ex.dc.Title.) This resulted in correcting collect.cfg to not refer to ex.Title anymore in its collectionmeta section. And GLI is processing all ex. prefixes now, so undoing the previous commit in runtime-source where all ex. prefixes were removed from the collectionmeta section: this is no longer applicable (since there will be no GS extracted meta like ex.Title in the collectionmeta section) AND we don't want ex. prefixes removed from embedded ex.* metadata (like ex.dc.*).

  • Property svn:keywords set to Author Date Id Revision
File size: 20.5 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
39
40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
86
87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
95collectserver::collectserver ()
96 : collectinfo()
97{
98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
109 ++source_here;
110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
119 ++filter_here;
120 }
121 filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128 if (cfgline.size() >= 1) {
129 const text_t &value = cfgline[0];
130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
135 if (name == "HTMLPlugin")
136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
141 if (plugOption == "-use_realistic_book")
142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
150 else if (key == "gdbmhome") configinfo.dbhome = value;
151 else if (key == "collecthome") configinfo.collecthome = value;
152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
180 else if (key == "builddate") collectinfo.buildDate = value.getint();
181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
183 else if (key == "numsections") collectinfo.numSections = value.getint();
184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187 else if (key == "collectionmeta") {
188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
205
206 // Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
207 // Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here.
208 // So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
209 // contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.
210
211 if (indexmap.from2to(name, new_name)) {
212 meta_name = new_name;
213 }
214 } else {
215 // add them to collectionmeta
216 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
217 if (cfgline.size() == 2) {
218 lang_map[g_EmptyText] = cfgline[1];
219 } else if (cfgline.size() == 3 ) {
220 // get the lang out of params
221 paramhashtype params_hash;
222 splitparams(params, params_hash);
223
224 text_t lang = params_hash["l"];
225 lang_map[lang] = cfgline[2];
226 if (lang_map[g_EmptyText].empty()) {
227 // want the first one as the default if no default specified
228 lang_map[g_EmptyText] = cfgline[2];
229 }
230 }
231 collectinfo.collectionmeta[cfgline[0]] = lang_map;
232
233 }
234
235 // add all collectionmeta to macro list
236 text_tmap params_map = collectinfo.collection_macros[meta_name];
237
238 if (cfgline.size() == 2) {// no params for this macro
239 params_map[g_EmptyText] = cfgline[1];
240 }
241 else if (cfgline.size() == 3) {// has params
242 params_map[params] = cfgline[2];
243 if (params_map[g_EmptyText].empty()) {
244 params_map[g_EmptyText] = cfgline[2];
245 }
246 }
247 collectinfo.collection_macros[meta_name] = params_map;
248 }
249 else if (key == "collectionmacro") {
250 text_t nobrackets;
251 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
252 // add all to macro list
253 if (cfgline.size() == 2) { // no params for this macro
254 params_map[g_EmptyText] = cfgline[1];
255 }
256 else if (cfgline.size() == 3) {// has params
257 // strip [ ] brackets from params
258 text_t::const_iterator first=cfgline[1].begin()+1;
259 text_t::const_iterator last=cfgline[1].end()-1;
260 nobrackets=substr(first, last);
261 params_map[nobrackets] = cfgline[2];
262 }
263 collectinfo.collection_macros[cfgline[0]] = params_map;
264
265 } else if (key == "format" && cfgline.size() == 2)
266 collectinfo.format[cfgline[0]] = cfgline[1];
267 else if (key == "building" && cfgline.size() == 2)
268 collectinfo.building[cfgline[0]] = cfgline[1];
269 else if (key == "httpdomain") collectinfo.httpdomain = value;
270 else if (key == "httpprefix") collectinfo.httpprefix = value;
271 else if (key == "receptionist") collectinfo.receptionist = value;
272 else if (key == "buildtype") {
273 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
274 collectinfo.buildType = value;
275 }
276 // backwards compatibility - searchytpes is now a format statement
277 else if (key == "searchtype") { // means buildtype is mgpp
278 if (collectinfo.buildType.empty()) {
279 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
280 collectinfo.buildType = "mgpp";
281 }
282 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
283 //collectinfo.searchTypes = cfgline;
284 }
285 else if (key == "infodbtype") {
286 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
287 collectinfo.infodbType = value;
288 }
289 else if (key == "separate_cjk") {
290 if (value == "true") collectinfo.isSegmented = true;
291 else collectinfo.isSegmented = false;
292 }
293 // What have we set in our collect.cfg file : document or collection ?
294 else if (key == "authenticate") collectinfo.authenticate = value;
295
296 // What have we set for our group list
297 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
298
299 // build.cfg, earliestDatestamp of this collection needed for
300 // OAIServer to work out earliestDatestamp of this repository
301 else if (key == "earliestdatestamp") {
302 collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
303 }
304
305 // store all the mappings for use when collection meta is read later
306 // (build.cfg read before collect.cfg)
307 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
308 indexmap.importmap (cfgline, true);
309
310 }
311 // In the map the key-value pair contain the same
312 // data i.e key == data, if key is 2 then data is 2
313
314 // What have we set for our public_documents ACL
315 else if (key == "public_documents")
316 {
317 text_tarray::const_iterator begin = cfgline.begin();
318 text_tarray::const_iterator end = cfgline.end();
319 while(begin != end)
320 {
321 // key = data i.e if key is 2 then data is 2
322 // collectinfo.public_documents[*begin] is the key
323 // *begin is the data value
324
325 collectinfo.public_documents[*begin] = *begin;
326 ++begin;
327 }
328 }
329
330 // What have we set for our private_documents ACL
331 else if (key == "private_documents")
332 {
333 text_tarray::const_iterator begin = cfgline.begin();
334 text_tarray::const_iterator end = cfgline.end();
335 while(begin != end)
336 {
337 // key = data i.e if key is 2 then data is 2
338 // collectinfo.public_documents[*begin] is the key
339 // *begin is the data value
340
341 collectinfo.private_documents[*begin] = *begin;
342 ++begin;
343 }
344 }
345
346 // dynamic_classifier <UniqueID> "<Options>"
347 else if (key == "dynamic_classifier")
348 {
349 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
350 }
351 }
352
353 // configure the filters
354 filtermapclass::iterator filter_here = filters.begin();
355 filtermapclass::iterator filter_end = filters.end();
356 while (filter_here != filter_end) {
357 assert ((*filter_here).second.f != NULL);
358 if ((*filter_here).second.f != NULL)
359 (*filter_here).second.f->configure(key, cfgline);
360
361 ++filter_here;
362 }
363
364 // configure the sources
365 sourcelistclass::iterator source_here = sources.begin();
366 sourcelistclass::iterator source_end = sources.end();
367 while (source_here != source_end) {
368 assert ((*source_here).s != NULL);
369 if ((*source_here).s != NULL)
370 (*source_here).s->configure(key, cfgline);
371
372 ++source_here;
373 }
374}
375
376
377void collectserver::configure (const text_t &key, const text_t &value) {
378 text_tarray cfgline;
379 cfgline.push_back (value);
380 configure(key, cfgline);
381}
382
383void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
384 // if we've not been properly configured, then it is a foregone
385 // conclusion that we cannot be active
386 if (this->configinfo.collection == "null")
387 {
388 wasSuccess = false;
389 }
390 // if no build date exists, then the collection was probably not built;
391 // ditto if the number of documents is zero, then something is pretty
392 // wrong
393 else if (this->collectinfo.buildDate == 0 ||
394 this->collectinfo.numDocs == 0)
395 {
396 wasSuccess = false;
397 }
398 // it is probably okay
399 else
400 wasSuccess = true;
401}
402
403
404bool collectserver::init (ostream &logout) {
405 // delete the indexmap
406 indexmap.clear();
407
408 // init the filters
409 filtermapclass::iterator filter_here = filters.begin();
410 filtermapclass::iterator filter_end = filters.end();
411 while (filter_here != filter_end) {
412 assert ((*filter_here).second.f != NULL);
413 if (((*filter_here).second.f != NULL) &&
414 !(*filter_here).second.f->init(logout)) return false;
415
416 ++filter_here;
417 }
418
419 // init the sources
420 sourcelistclass::iterator source_here = sources.begin();
421 sourcelistclass::iterator source_end = sources.end();
422 while (source_here != source_end) {
423 assert ((*source_here).s != NULL);
424 if (((*source_here).s != NULL) &&
425 !(*source_here).s->init(logout)) return false;
426
427 ++source_here;
428 }
429
430 return true;
431}
432
433
434void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
435 comerror_t &err, ostream &/*logout*/) {
436 reponse = collectinfo;
437 err = noError;
438}
439
440void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
441 comerror_t &err, ostream &/*logout*/) {
442 response.clear ();
443
444 // get a list of filter names
445 filtermapclass::iterator filter_here = filters.begin();
446 filtermapclass::iterator filter_end = filters.end();
447 while (filter_here != filter_end) {
448 response.filterNames.insert ((*filter_here).first);
449 ++filter_here;
450 }
451
452 err = noError;
453}
454
455void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
456 InfoFilterOptionsResponse_t &response,
457 comerror_t &err, ostream &logout) {
458 outconvertclass text_t2ascii;
459
460 filterclass *thisfilter = filters.getfilter(request.filterName);
461 if (thisfilter != NULL) {
462 thisfilter->get_filteroptions (response, err, logout);
463 } else {
464 response.clear ();
465 err = protocolError;
466 text_t& infodbtype = collectinfo.infodbType;
467
468 // Don't print out the warning if were's asking about SQLQueryFilter
469 // when we know the infodbtype is something other than .*sql.*
470
471 if ((request.filterName != "SQLQueryFilter")
472 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
473 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
474 << "filter \"" << request.filterName << "\".\n\n";
475 }
476 }
477}
478
479void collectserver::filter (FilterRequest_t &request,
480 FilterResponse_t &response,
481 comerror_t &err, ostream &logout) {
482 outconvertclass text_t2ascii;
483
484 // translate any ".fc", ".pr" etc. stuff in the docSet
485 text_t translatedOID;
486 text_tarray translatedOIDs;
487 text_tarray::iterator doc_here = request.docSet.begin();
488 text_tarray::iterator doc_end = request.docSet.end();
489 while (doc_here != doc_end) {
490 if (needs_translating (*doc_here)) {
491 sourcelistclass::iterator source_here = sources.begin();
492 sourcelistclass::iterator source_end = sources.end();
493 while (source_here != source_end) {
494 assert ((*source_here).s != NULL);
495 if (((*source_here).s != NULL) &&
496 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
497 if (err != noError) return;
498 break;
499 }
500 ++source_here;
501 }
502 translatedOIDs.push_back (translatedOID);
503 } else {
504 translatedOIDs.push_back (*doc_here);
505 }
506 ++doc_here;
507 }
508 request.docSet = translatedOIDs;
509
510 response.clear();
511
512 filterclass *thisfilter = filters.getfilter(request.filterName);
513 if (thisfilter != NULL) {
514 // filter the data
515 thisfilter->filter (request, response, err, logout);
516 if (err != noError) return;
517 // fill in the metadata for each of the OIDs (if it is requested)
518 if (request.filterResultOptions & FRmetadata) {
519 bool processed = false;
520 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
521 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
522 while (resultdoc_here != resultdoc_end) {
523 // try each of the sources in turn
524 sourcelistclass::iterator source_here = sources.begin();
525 sourcelistclass::iterator source_end = sources.end();
526 while (source_here != source_end) {
527 assert ((*source_here).s != NULL);
528 if (((*source_here).s != NULL) &&
529 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
530 request.getParents, request.fields,
531 (*resultdoc_here).OID, (*resultdoc_here).metadata,
532 err, logout))) {
533 if (err != noError) return;
534 processed = true;
535 break;
536 }
537 ++source_here;
538 }
539 if (!processed) {
540
541 logout << text_t2ascii << "Protocol Error: nothing processed for "
542 << "filter \"" << request.filterName << "\".\n\n";
543
544 err = protocolError;
545 return;
546 }
547 ++resultdoc_here;
548 }
549 }
550
551 err = noError;
552 }
553 else
554 {
555 response.clear ();
556 err = protocolError;
557 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
558 << "filter \"" << request.filterName << "\".\n\n";
559 }
560}
561
562void collectserver::get_document (const DocumentRequest_t &request,
563 DocumentResponse_t &response,
564 comerror_t &err, ostream &logout) {
565
566 sourcelistclass::iterator source_here = sources.begin();
567 sourcelistclass::iterator source_end = sources.end();
568 while (source_here != source_end) {
569 assert ((*source_here).s != NULL);
570 if (((*source_here).s != NULL) &&
571 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
572 if (err != noError) return;
573 break;
574 }
575 ++source_here;
576 }
577}
578
579void collectserver::is_searchable (bool &issearchable, comerror_t &err,
580 ostream &logout) {
581
582 sourcelistclass::iterator source_here = sources.begin();
583 sourcelistclass::iterator source_end = sources.end();
584 while (source_here != source_end) {
585 assert ((*source_here).s != NULL);
586 if (((*source_here).s != NULL) &&
587 ((*source_here).s->is_searchable (issearchable, err, logout))) {
588 if (err != noError) return;
589 break;
590 }
591 ++source_here;
592 }
593}
594
595
596bool operator==(const collectserverptr &x, const collectserverptr &y) {
597 return (x.c == y.c);
598}
599
600bool operator<(const collectserverptr &x, const collectserverptr &y) {
601 return (x.c < y.c);
602}
603
604
605// thecollectserver remains the property of the calling code but
606// should not be deleted until it is removed from this list.
607void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
608 // can't add a null collection server
609 assert (thecollectserver != NULL);
610 if (thecollectserver == NULL) return;
611
612 // can't add an collection server with no collection name
613 assert (!(thecollectserver->get_collection_name()).empty());
614 if ((thecollectserver->get_collection_name()).empty()) return;
615
616 collectserverptr cptr;
617 cptr.c = thecollectserver;
618 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
619}
620
621// getcollectserver will return NULL if the collectserver could not be found
622collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
623 // can't find a collection with no name
624 if (collection.empty()) return NULL;
625
626 iterator here = collectserverptrs.find (collection);
627 if (here == collectserverptrs.end()) return NULL;
628
629 return (*here).second.c;
630}
Note: See TracBrowser for help on using the repository browser.