source: main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34 if (buildtype=="mg") {
35#ifndef ENABLE_MG
36 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38 }
39
40 else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44 }
45
46 else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48 cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50 }
51
52 else {
53 cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54 }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61 if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65 }
66 else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70 }
71 else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75 }
76 else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80 }
81 else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83 cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85 }
86
87 else {
88 cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89 }
90
91}
92
93
94
95collectserver::collectserver ()
96 : collectinfo()
97{
98 configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103 // clean up the sources
104 sourcelistclass::iterator source_here = sources.begin();
105 sourcelistclass::iterator source_end = sources.end();
106 while (source_here != source_end) {
107 if ((*source_here).s != NULL)
108 delete (*source_here).s;
109 ++source_here;
110 }
111 sources.clear();
112
113 // clean up the filters
114 filtermapclass::iterator filter_here = filters.begin();
115 filtermapclass::iterator filter_end = filters.end();
116 while (filter_here != filter_end) {
117 if ((*filter_here).second.f != NULL)
118 delete (*filter_here).second.f;
119 ++filter_here;
120 }
121 filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128 if (cfgline.size() >= 1) {
129 const text_t &value = cfgline[0];
130 if (key == "plugin")
131 {
132 //get the plugin name
133 const text_t &name = cfgline[0];
134
135 if (name == "HTMLPlugin" || name== "PDFPlugin")
136 {
137 for (int hI = 1; hI < cfgline.size(); hI++)
138 {
139 const text_t &plugOption = cfgline[hI];
140
141 if (plugOption == "-use_realistic_book")
142 {
143 collectinfo.useBook = true;
144 break;
145 }
146 }
147 }
148 }
149 else if (key == "gsdlhome") configinfo.gsdlhome = value;
150 else if (key == "gdbmhome") configinfo.dbhome = value;
151 else if (key == "collecthome") configinfo.collecthome = value;
152 else if (key == "collection") {
153 configinfo.collection = value;
154 collectinfo.shortInfo.name = value;
155 }
156 else if (key == "collectdir") configinfo.collectdir = value;
157 else if (key == "host") collectinfo.shortInfo.host = value;
158 else if (key == "port") collectinfo.shortInfo.port = value.getint();
159 else if (key == "public") {
160 if (value == "true") collectinfo.isPublic = true;
161 else collectinfo.isPublic = false;
162 } else if (key == "beta") {
163 if (value == "true") collectinfo.isBeta = true;
164 else collectinfo.isBeta = false;
165 } else if (key == "collectgroup") {
166 if (value == "true") collectinfo.isCollectGroup = true;
167 else collectinfo.isCollectGroup = false;
168 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169 else if (key == "supercollectionoptions") {
170 text_tarray::const_iterator begin = cfgline.begin();
171 text_tarray::const_iterator end = cfgline.end();
172 while(begin != end) {
173
174 if (*begin == "uniform_search_results_formatting") {
175 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176 }
177 begin++;
178 }
179 }
180 else if (key == "builddate") collectinfo.buildDate = value.getint();
181 else if (key == "languages") collectinfo.languages = cfgline;
182 else if (key == "numdocs") collectinfo.numDocs = value.getint();
183 else if (key == "numsections") collectinfo.numSections = value.getint();
184 else if (key == "numwords") collectinfo.numWords = value.getint();
185 else if (key == "numbytes") collectinfo.numBytes = value.getint();
186 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187 else if (key == "collectionmeta") {
188 // genuine collmeta get added as collectionmeta and collection_macros
189 // .collmeta just get added as collection_macros
190 text_t params;
191 if (cfgline.size() == 3) {
192 // get the params for later
193 text_t::const_iterator first=cfgline[1].begin()+1;
194 text_t::const_iterator last=cfgline[1].end()-1;
195 params=substr(first, last);
196 }
197
198 text_t meta_name = cfgline[0];
199 if (*(meta_name.begin())=='.') {
200 // a .xxx collectionmeta. strip off the . and
201 // look it up in the indexmap to get the actual value
202
203 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204 text_t new_name;
205
206 // Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
207 // Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here.
208 // So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
209 // contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.
210
211 if (indexmap.from2to(name, new_name)) {
212 meta_name = new_name;
213 }
214 } else {
215 // add them to collectionmeta
216 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
217 if (cfgline.size() == 2) {
218 lang_map[g_EmptyText] = cfgline[1];
219 } else if (cfgline.size() == 3 ) {
220 // get the lang out of params
221 paramhashtype params_hash;
222 splitparams(params, params_hash);
223
224 text_t lang = params_hash["l"];
225 lang_map[lang] = cfgline[2];
226 if (lang_map[g_EmptyText].empty()) {
227 // want the first one as the default if no default specified
228 lang_map[g_EmptyText] = cfgline[2];
229 }
230 }
231 collectinfo.collectionmeta[cfgline[0]] = lang_map;
232
233 }
234
235 // add all collectionmeta to macro list
236 text_tmap params_map = collectinfo.collection_macros[meta_name];
237
238 if (cfgline.size() == 2) {// no params for this macro
239 params_map[g_EmptyText] = cfgline[1];
240 }
241 else if (cfgline.size() == 3) {// has params
242 params_map[params] = cfgline[2];
243 if (params_map[g_EmptyText].empty()) {
244 params_map[g_EmptyText] = cfgline[2];
245 }
246 }
247 collectinfo.collection_macros[meta_name] = params_map;
248 }
249 else if (key == "collectionmacro") {
250 text_t nobrackets;
251 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
252 // add all to macro list
253 if (cfgline.size() == 2) { // no params for this macro
254 params_map[g_EmptyText] = cfgline[1];
255 }
256 else if (cfgline.size() == 3) {// has params
257 // strip [ ] brackets from params
258 text_t::const_iterator first=cfgline[1].begin()+1;
259 text_t::const_iterator last=cfgline[1].end()-1;
260 nobrackets=substr(first, last);
261 params_map[nobrackets] = cfgline[2];
262 }
263 collectinfo.collection_macros[cfgline[0]] = params_map;
264
265 } else if (key == "format" && cfgline.size() == 2)
266 collectinfo.format[cfgline[0]] = cfgline[1];
267 else if (key == "building" && cfgline.size() == 2)
268 collectinfo.building[cfgline[0]] = cfgline[1];
269 else if (key == "httpdomain") collectinfo.httpdomain = value;
270 else if (key == "httpprefix") collectinfo.httpprefix = value;
271 else if (key == "receptionist") collectinfo.receptionist = value;
272 else if (key == "buildtype") {
273 check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
274 collectinfo.buildType = value;
275 }
276 // backwards compatibility - searchytpes is now a format statement
277 else if (key == "searchtype") { // means buildtype is mgpp
278 if (collectinfo.buildType.empty()) {
279 check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
280 collectinfo.buildType = "mgpp";
281 }
282 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
283 //collectinfo.searchTypes = cfgline;
284 }
285 else if (key == "infodbtype") {
286 check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
287 collectinfo.infodbType = value;
288 }
289 else if (key == "separate_cjk") {
290 if (value == "true") collectinfo.isSegmented = true;
291 else collectinfo.isSegmented = false;
292 }
293 // What have we set in our collect.cfg file : document or collection ?
294 else if (key == "authenticate") collectinfo.authenticate = value;
295
296 // What have we set for our group list
297 else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
298
299 // build.cfg, earliestDatestamp of this collection needed for
300 // OAIServer to work out earliestDatestamp of this repository
301 else if (key == "earliestdatestamp") {
302 collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
303 }
304
305 // store all the mappings for use when collection meta is read later
306 // (build.cfg read before collect.cfg)
307 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
308 indexmap.importmap (cfgline, true);
309
310 }
311 // In the map the key-value pair contain the same
312 // data i.e key == data, if key is 2 then data is 2
313
314 // What have we set for our public_documents ACL
315 else if (key == "public_documents")
316 {
317 text_tarray::const_iterator begin = cfgline.begin();
318 text_tarray::const_iterator end = cfgline.end();
319 while(begin != end)
320 {
321 // key = data i.e if key is 2 then data is 2
322 // collectinfo.public_documents[*begin] is the key
323 // *begin is the data value
324
325 collectinfo.public_documents[*begin] = *begin;
326 ++begin;
327 }
328 }
329
330 // What have we set for our private_documents ACL
331 else if (key == "private_documents")
332 {
333 text_tarray::const_iterator begin = cfgline.begin();
334 text_tarray::const_iterator end = cfgline.end();
335 while(begin != end)
336 {
337 // key = data i.e if key is 2 then data is 2
338 // collectinfo.public_documents[*begin] is the key
339 // *begin is the data value
340
341 collectinfo.private_documents[*begin] = *begin;
342 ++begin;
343 }
344 }
345
346 // dynamic_classifier <UniqueID> "<Options>"
347 else if (key == "dynamic_classifier")
348 {
349 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
350 }
351 }
352
353 // configure the filters
354 filtermapclass::iterator filter_here = filters.begin();
355 filtermapclass::iterator filter_end = filters.end();
356 while (filter_here != filter_end) {
357 assert ((*filter_here).second.f != NULL);
358 if ((*filter_here).second.f != NULL)
359 (*filter_here).second.f->configure(key, cfgline);
360
361 ++filter_here;
362 }
363
364 // configure the sources
365 sourcelistclass::iterator source_here = sources.begin();
366 sourcelistclass::iterator source_end = sources.end();
367 while (source_here != source_end) {
368 assert ((*source_here).s != NULL);
369 if ((*source_here).s != NULL)
370 (*source_here).s->configure(key, cfgline);
371
372 ++source_here;
373 }
374}
375
376
377void collectserver::configure (const text_t &key, const text_t &value) {
378 text_tarray cfgline;
379 cfgline.push_back (value);
380 configure(key, cfgline);
381}
382
383void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
384 // if we've not been properly configured, then it is a foregone
385 // conclusion that we cannot be active
386 if (this->configinfo.collection == "null")
387 {
388 wasSuccess = false;
389 }
390 // if no build date exists, then the collection was probably not built;
391 // ditto if the number of documents is zero, then something is pretty
392 // wrong
393 else if (this->collectinfo.buildDate == 0 ||
394 this->collectinfo.numDocs == 0)
395 {
396 wasSuccess = false;
397 }
398 // it is probably okay
399 else
400 wasSuccess = true;
401}
402
403
404bool collectserver::init (ostream &logout) {
405 // delete the indexmap
406 indexmap.clear();
407
408 // init the filters
409 filtermapclass::iterator filter_here = filters.begin();
410 filtermapclass::iterator filter_end = filters.end();
411 while (filter_here != filter_end) {
412 assert ((*filter_here).second.f != NULL);
413 if (((*filter_here).second.f != NULL) &&
414 !(*filter_here).second.f->init(logout)) return false;
415
416 ++filter_here;
417 }
418
419 // init the sources
420 sourcelistclass::iterator source_here = sources.begin();
421 sourcelistclass::iterator source_end = sources.end();
422 while (source_here != source_end) {
423 assert ((*source_here).s != NULL);
424 if (((*source_here).s != NULL) &&
425 !(*source_here).s->init(logout)) return false;
426
427 ++source_here;
428 }
429
430 return true;
431}
432
433
434void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
435 comerror_t &err, ostream &/*logout*/) {
436 reponse = collectinfo;
437 err = noError;
438}
439
440void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
441 comerror_t &err, ostream &/*logout*/) {
442 response.clear ();
443
444 // get a list of filter names
445 filtermapclass::iterator filter_here = filters.begin();
446 filtermapclass::iterator filter_end = filters.end();
447 while (filter_here != filter_end) {
448 response.filterNames.insert ((*filter_here).first);
449 ++filter_here;
450 }
451
452 err = noError;
453}
454
455void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
456 InfoFilterOptionsResponse_t &response,
457 comerror_t &err, ostream &logout) {
458 outconvertclass text_t2ascii;
459
460 filterclass *thisfilter = filters.getfilter(request.filterName);
461 if (thisfilter != NULL) {
462 thisfilter->get_filteroptions (response, err, logout);
463 } else {
464 response.clear ();
465 err = protocolError;
466 text_t& infodbtype = collectinfo.infodbType;
467
468 // Don't print out the warning if were's asking about SQLQueryFilter
469 // when we know the infodbtype is something other than .*sql.*
470
471 if ((request.filterName != "SQLQueryFilter")
472 || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
473 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
474 << "filter \"" << request.filterName << "\".\n\n";
475 }
476 }
477}
478
479void collectserver::filter (FilterRequest_t &request,
480 FilterResponse_t &response,
481 comerror_t &err, ostream &logout) {
482 outconvertclass text_t2ascii;
483
484 // translate any ".fc", ".pr" etc. stuff in the docSet
485 text_t translatedOID;
486 text_tarray translatedOIDs;
487 text_tarray::iterator doc_here = request.docSet.begin();
488 text_tarray::iterator doc_end = request.docSet.end();
489 while (doc_here != doc_end) {
490 if (needs_translating (*doc_here)) {
491 sourcelistclass::iterator source_here = sources.begin();
492 sourcelistclass::iterator source_end = sources.end();
493 while (source_here != source_end) {
494 assert ((*source_here).s != NULL);
495 if (((*source_here).s != NULL) &&
496 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
497 if (err != noError) return;
498 break;
499 }
500 ++source_here;
501 }
502 translatedOIDs.push_back (translatedOID);
503 } else {
504 translatedOIDs.push_back (*doc_here);
505 }
506 ++doc_here;
507 }
508 request.docSet = translatedOIDs;
509
510 response.clear();
511
512 filterclass *thisfilter = filters.getfilter(request.filterName);
513 if (thisfilter != NULL) {
514 // filter the data
515 thisfilter->filter (request, response, err, logout);
516 if (err != noError) return;
517
518 // fill in the metadata for each of the OIDs (if it is requested)
519 if (request.filterResultOptions & FRmetadata) {
520
521 bool processed = false;
522 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
523 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
524 while (resultdoc_here != resultdoc_end) {
525
526 text_t deleted_status = "";
527 bool append_metadata = (request.filterResultOptions & FROAI) ? true : false;
528
529 // try each of the sources in turn
530 sourcelistclass::iterator source_here = sources.begin();
531 sourcelistclass::iterator source_end = sources.end();
532 while (source_here != source_end) {
533 assert ((*source_here).s != NULL);
534
535 // first check for oai metadata from the oai_db, if asked for it (if FROAI is set)
536 if(((*source_here).s != NULL) &&
537 request.filterResultOptions & FROAI &&
538 ((*source_here).s->get_oai_metadata(request.requestParams, request.refParams,
539 request.getParents, request.fields,
540 (*resultdoc_here).OID, deleted_status, (*resultdoc_here).metadata,
541 err, logout))) {
542
543 if (err != noError) return;
544
545 processed = true;
546 }
547
548 // We may or may not have got oai_meta (depends on if FROAI was set).
549 // If we didn't get oai_meta, then deleted_status would still be "".
550 // If we did get oai_meta, and if the deleted_status for the OID was D for deleted entry,
551 // don't bother getting any other metadata, as there will be no entry for that OID in index db.
552
553 // Note that if we did get oai_meta and OID marked as existing, we're in append_mode:
554 // don't let get_metadata() clear the metadata list, as there's already stuff in there
555 //if(deleted_status == "E") append_metadata = true;
556
557 if (((*source_here).s != NULL) &&
558 deleted_status != "D" &&
559 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
560 request.getParents, request.fields,
561 (*resultdoc_here).OID, (*resultdoc_here).metadata,
562 err, logout, append_metadata))) {
563 if (err != noError) return; // check for errors again
564
565 processed = processed || true; // processed would not have been set yet if not doing FROAI. Set now.
566 // OR-ing isn't necessary, but indicates some consideration of both get oai meta & get meta success
567 }
568
569 if(processed) break;
570
571 ++source_here;
572 }
573 if (!processed) {
574
575 logout << text_t2ascii << "Protocol Error: nothing processed for "
576 << "filter \"" << request.filterName << "\".\n\n";
577
578 err = protocolError;
579 return;
580 }
581 ++resultdoc_here;
582 }
583 }
584
585 err = noError;
586 }
587 else
588 {
589 response.clear ();
590 err = protocolError;
591 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
592 << "filter \"" << request.filterName << "\".\n\n";
593 }
594}
595
596void collectserver::get_document (const DocumentRequest_t &request,
597 DocumentResponse_t &response,
598 comerror_t &err, ostream &logout) {
599
600 sourcelistclass::iterator source_here = sources.begin();
601 sourcelistclass::iterator source_end = sources.end();
602 while (source_here != source_end) {
603 assert ((*source_here).s != NULL);
604 if (((*source_here).s != NULL) &&
605 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
606 if (err != noError) return;
607 break;
608 }
609 ++source_here;
610 }
611}
612
613void collectserver::is_searchable (bool &issearchable, comerror_t &err,
614 ostream &logout) {
615
616 sourcelistclass::iterator source_here = sources.begin();
617 sourcelistclass::iterator source_end = sources.end();
618 while (source_here != source_end) {
619 assert ((*source_here).s != NULL);
620 if (((*source_here).s != NULL) &&
621 ((*source_here).s->is_searchable (issearchable, err, logout))) {
622 if (err != noError) return;
623 break;
624 }
625 ++source_here;
626 }
627}
628
629
630bool operator==(const collectserverptr &x, const collectserverptr &y) {
631 return (x.c == y.c);
632}
633
634bool operator<(const collectserverptr &x, const collectserverptr &y) {
635 return (x.c < y.c);
636}
637
638
639// thecollectserver remains the property of the calling code but
640// should not be deleted until it is removed from this list.
641void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
642 // can't add a null collection server
643 assert (thecollectserver != NULL);
644 if (thecollectserver == NULL) return;
645
646 // can't add an collection server with no collection name
647 assert (!(thecollectserver->get_collection_name()).empty());
648 if ((thecollectserver->get_collection_name()).empty()) return;
649
650 collectserverptr cptr;
651 cptr.c = thecollectserver;
652 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
653}
654
655// getcollectserver will return NULL if the collectserver could not be found
656collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
657 // can't find a collection with no name
658 if (collection.empty()) return NULL;
659
660 iterator here = collectserverptrs.find (collection);
661 if (here == collectserverptrs.end()) return NULL;
662
663 return (*here).second.c;
664}
Note: See TracBrowser for help on using the repository browser.