source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp

Last change on this file was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:keywords set to Author Date Id Revision
File size: 20.1 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "sqlqueryfilter.h"
37#include "queryfilter.h"
38
39#ifdef ENABLE_MG
40#include "mgqueryfilter.h"
41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
44#include "mgppqueryfilter.h"
45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
48#include "lucenequeryfilter.h"
49#include "lucenesource.h"
50#endif
51
52#include <assert.h>
53
54#ifdef USE_GDBM
55#include "gdbmclass.h"
56#endif
57
58#ifdef USE_JDBM
59#include "jdbmnaiveclass.h"
60#endif
61
62#ifdef USE_SQLITE
63#include "sqlitedbclass.h"
64#endif
65
66#ifdef USE_MSSQL
67#include "mssqldbclass.h"
68#endif
69
70// @EXTENSION HEADERS@
71
72collectset::collectset (text_t& gsdlhome, text_t& collecthome)
73{
74 // gsdlhome and collecthome will be set as a result of calling this function
75 // collecthome will default to "<gsdlhome>/collect" if not explicitly
76 // specified in config file
77
78 text_tarray collections;
79
80#ifdef ENABLE_MG
81 mgsearch = NULL;
82#endif
83#ifdef ENABLE_MGPP
84 mgppsearch = NULL;
85#endif
86#ifdef ENABLE_LUCENE
87 lucenesearch = NULL;
88#endif
89
90 // get gsdlhome (if we fail the error will be picked up later -- in
91 // cgiwrapper)
92
93 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
94 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
95 if (read_dir (collecthome, collections)) {
96
97 text_tarray::const_iterator thiscol = collections.begin();
98 text_tarray::const_iterator endcol = collections.end();
99
100 while (thiscol != endcol) {
101 // ignore the modelcol
102 if (*thiscol == "modelcol") {
103 ++thiscol;
104 continue;
105 }
106
107 this->add_collection (*thiscol, gsdlhome, collecthome);
108
109 ++thiscol;
110 }
111
112 this->add_all_collection_groups(gsdlhome, collecthome);
113 }
114 }
115 }
116
117 set_gsdl_env_vars(gsdlhome);
118}
119
120
121collectset::collectset (text_t& httpprefix_arg)
122{
123 httpprefix = httpprefix_arg;
124
125#ifdef ENABLE_MG
126 mgsearch = NULL;
127#endif
128#ifdef ENABLE_MGPP
129 mgppsearch = NULL;
130#endif
131#ifdef ENABLE_LUCENE
132 lucenesearch = NULL;
133#endif
134
135}
136
137collectset::collectset ()
138{
139#ifdef ENABLE_MG
140 mgsearch = NULL;
141#endif
142#ifdef ENABLE_MGPP
143 mgppsearch = NULL;
144#endif
145#ifdef ENABLE_LUCENE
146 lucenesearch = NULL;
147#endif
148}
149
150collectset::~collectset () {
151 collectservermapclass::iterator here = cservers.begin();
152 collectservermapclass::iterator end = cservers.end();
153
154 while (here != end) {
155 if ((*here).second.c != NULL) {
156 delete (*here).second.c;
157 }
158 ++here;
159 }
160 cservers.clear();
161}
162
163bool collectset::init (ostream &logout) {
164 collectservermapclass::iterator here = cservers.begin();
165 collectservermapclass::iterator end = cservers.end();
166
167 while (here != end) {
168 assert ((*here).second.c != NULL);
169 if ((*here).second.c != NULL) {
170 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
171
172 // configure this collection server
173
174 // note that we read build.cfg before collect.cfg so that the indexmaps
175 // are available to decode defaultindex, defaultsubcollection, and
176 // defaultlanguage
177
178 bool failed_build_cfg = false;
179 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
180 configinfo.collecthome, configinfo.collection)) {
181 failed_build_cfg = true;
182
183 outconvertclass text_t2ascii;
184 logout << text_t2ascii
185 << "Warning: couldn't read build.cfg file for collection \""
186 << configinfo.collection << "\""
187 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
188 << " collecthome=\"" << configinfo.collecthome << "\"\n";
189 }
190
191 bool failed_collect_cfg = false;
192 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
193 configinfo.collecthome, configinfo.collection)) {
194 failed_collect_cfg = true;
195 outconvertclass text_t2ascii;
196 logout << text_t2ascii
197 << "Warning: couldn't read collect.cfg file for collection \""
198 << configinfo.collection << "\""
199 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
200 << " collecthome=\"" << configinfo.collecthome << "\"\n";
201 }
202
203
204 bool is_colgroup = (*here).second.c->is_collection_group();
205
206 if (failed_collect_cfg) {
207 ++here;
208 continue;
209 }
210
211 if (failed_build_cfg && (!is_colgroup)) {
212 ++here;
213 continue;
214 }
215 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
216
217 if (!(*here).second.c->init (logout)) return false;
218
219 (*here).second.c->configure("httpdomain",httpdomain);
220 (*here).second.c->configure("httpprefix",httpprefix);
221 }
222 ++here;
223 }
224
225 return true;
226}
227
228collectservermapclass collectset::servers()
229{ return cservers;
230}
231
232
233void collectset::add_all_collections(const text_t &gsdlhome,
234 const text_t& collecthome)
235{
236 text_tarray collections;
237
238 if (read_dir(collecthome, collections)) {
239
240 text_tarray::const_iterator thiscol = collections.begin();
241 text_tarray::const_iterator endcol = collections.end();
242
243 while (thiscol != endcol) {
244
245 // ignore the modelcol
246 if (*thiscol == "modelcol") {
247 ++thiscol;
248 continue;
249 }
250
251 // create collection server for this collection
252 this->add_collection (*thiscol, gsdlhome, collecthome);
253
254 ++thiscol;
255 }
256
257 this->add_all_collection_groups(gsdlhome,collecthome);
258 }
259}
260
261bool collectset::collection_is_collect_group (const text_t& collect_dir)
262{
263 text_t is_collect_group_str = "false";
264 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
265
266 if (file_exists(collect_cfg)) {
267 char *collect_cfgc = collect_cfg.getcstr();
268 ifstream confin(collect_cfgc);
269
270 if (confin) {
271 text_tarray cfgline;
272
273 while (read_cfg_line(confin, cfgline) >= 0) {
274 if (cfgline.size() == 2) {
275 text_t key = cfgline[0];
276 cfgline.erase(cfgline.begin());
277 if (key == "collectgroup") {
278 is_collect_group_str = cfgline[0];
279 break;
280 }
281 }
282 }
283
284 confin.close();
285 }
286
287 delete []collect_cfgc;
288 }
289
290 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
291
292 return is_collect_group;
293}
294
295
296// add_collection sets up the collectionserver and calls
297// add_collectserver
298void collectset::add_collection (const text_t& collection,
299 const text_t& gsdlhome,
300 const text_t& collecthome)
301{
302 // read config file to see if built with mg, mgpp, or lucene
303 text_t buildtype = "mg"; // mg is default
304 text_t infodbtype = "gdbm"; // gdbm is default
305
306 this->remove_collection(collection);
307
308 collectserver *cserver = NULL;
309
310 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
311 if (file_exists (build_cfg)) {
312 char *build_cfgc = build_cfg.getcstr();
313 ifstream confin(build_cfgc);
314
315 if (confin) {
316 text_tarray cfgline;
317
318 while (read_cfg_line(confin, cfgline) >= 0) {
319 if (cfgline.size() == 2) {
320 text_t key = cfgline[0];
321 cfgline.erase(cfgline.begin());
322 if (key == "buildtype") {
323 buildtype = cfgline[0];
324 }
325 if (key == "infodbtype") {
326 infodbtype = cfgline[0];
327 }
328 }
329 }
330 confin.close();
331 }
332 delete []build_cfgc;
333
334 cserver = new collectserver();
335
336 // Create a dbclass of the correct type
337 dbclass *db_ptr = NULL; // index database in index/text
338 dbclass *oaidb_ptr = NULL; // etc/oai-inf database
339
340 if (infodbtype == "sqlite")
341 {
342#ifdef USE_SQLITE
343 sqlitedbclass *sql_db_ptr = new sqlitedbclass(gsdlhome);
344 db_ptr = sql_db_ptr;
345
346 sqlitedbclass *sql_oaidb_ptr = new sqlitedbclass(gsdlhome);
347 oaidb_ptr = sql_oaidb_ptr;
348
349 // add a sql browse filter
350 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
351 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
352 cserver->add_filter (sqlbrowsefilter);
353
354 // add a sql query filter
355 sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
356 sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
357 cserver->add_filter (sqlqueryfilter);
358
359#else
360 cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
361 cerr << " this installation of Greenstone";
362#endif
363 }
364
365 if (infodbtype == "mssql")
366 {
367#ifdef USE_MSSQL
368 mssqldbclass *mssql_db_ptr = new mssqldbclass(gsdlhome);
369 db_ptr = mssql_db_ptr;
370
371 mssqldbclass *mssql_oaidb_ptr = new mssqldbclass(gsdlhome);
372 oaidb_ptr = mssql_oaidb_ptr;
373
374 // add a sql browse filter
375 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
376 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
377 cserver->add_filter (sqlbrowsefilter);
378#else
379 cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
380 cerr << " this installation of Greenstone";
381#endif
382 }
383
384 if (infodbtype == "jdbm") {
385
386#ifdef USE_JDBM
387 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
388 db_ptr = jdbm_db_ptr;
389
390 jdbmnaiveclass *jdbm_oaidb_ptr = new jdbmnaiveclass(gsdlhome);
391 oaidb_ptr = jdbm_oaidb_ptr;
392#else
393 cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
394 cerr << " this installation of Greenstone";
395#endif
396 }
397
398 // @EXTENSION DATASOURCES@
399
400 // Use GDBM if the infodb type is empty or not one of the values above
401 if (db_ptr == NULL) {
402#ifdef USE_GDBM
403 db_ptr = new gdbmclass(gsdlhome);
404 oaidb_ptr = new gdbmclass(gsdlhome);
405#else
406 cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
407 cerr << " this installation of Greenstone";
408#endif
409 }
410
411 // add a null filter
412 filterclass *filter = new filterclass ();
413 cserver->add_filter (filter);
414
415 // add a browse filter
416 browsefilterclass *browsefilter = new browsefilterclass();
417 browsefilter->set_db_ptr(db_ptr);
418 // set the oaidb ptr for the browse filter. As with the db_ptr, the oaidb_ptr
419 // is not managed and deleted by the browsefilter class, but by the sourceclass
420 browsefilter->set_oaidb_ptr(oaidb_ptr);
421 cserver->add_filter (browsefilter);
422
423 if (buildtype == "mg") {
424#ifdef ENABLE_MG
425 mgsearch = new mgsearchclass();
426
427 // add a query filter
428 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
429 queryfilter->set_db_ptr(db_ptr);
430 queryfilter->set_textsearchptr (mgsearch);
431 cserver->add_filter (queryfilter);
432
433 // add a mg source
434 mgsourceclass *mgsource = new mgsourceclass ();
435 mgsource->set_db_ptr(db_ptr);
436 mgsource->set_oaidb_ptr(oaidb_ptr);
437 mgsource->set_textsearchptr (mgsearch);
438 cserver->add_source (mgsource);
439#else
440 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
441#endif
442 }
443 else if (buildtype == "mgpp") {
444#ifdef ENABLE_MGPP
445 mgppsearch = new mgppsearchclass();
446
447 // add a query filter
448 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
449 queryfilter->set_db_ptr(db_ptr);
450 queryfilter->set_textsearchptr (mgppsearch);
451 cserver->add_filter (queryfilter);
452
453 // add a mgpp source
454 mgppsourceclass *mgppsource = new mgppsourceclass ();
455 mgppsource->set_db_ptr(db_ptr);
456 mgppsource->set_oaidb_ptr(oaidb_ptr);
457 mgppsource->set_textsearchptr (mgppsearch);
458 cserver->add_source (mgppsource);
459#else
460 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
461#endif
462 }
463 else if (buildtype == "lucene") {
464#ifdef ENABLE_LUCENE
465 lucenesearch = new lucenesearchclass();
466 lucenesearch->set_gsdlhome(gsdlhome);
467
468 // add a query filter
469 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
470 queryfilter->set_db_ptr(db_ptr);
471 queryfilter->set_textsearchptr (lucenesearch);
472 cserver->add_filter (queryfilter);
473
474 // add a lucene source
475 lucenesourceclass *lucenesource = new lucenesourceclass ();
476 lucenesource->set_db_ptr(db_ptr);
477 lucenesource->set_oaidb_ptr(oaidb_ptr);
478 lucenesource->set_textsearchptr (lucenesearch);
479 cserver->add_source (lucenesource);
480#else
481 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
482#endif
483 }
484 else {
485 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
486 }
487
488 }
489 else {
490 // see if it is a collectgroup col
491 text_t this_collect_dir = filename_cat(collecthome, collection);
492 if (collection_is_collect_group(this_collect_dir)) {
493 // by this point we know we will need a cserver
494 cserver = new collectserver();
495 }
496 // else not a collect group, or there was no collect.cfg
497 // => leave cserver as NULL so it will not be added into cservers
498 }
499
500 if (cserver != NULL) {
501 // inform collection server and everything it contains about its
502 // collection name
503 cserver->configure ("collection", collection);
504 cserver->configure ("gsdlhome", gsdlhome);
505 cserver->configure ("collecthome", collecthome);
506 cservers.addcollectserver (cserver);
507 }
508}
509
510void collectset::remove_all_collections () {
511
512#ifdef ENABLE_MG
513 // first unload any cached mg databases
514 if (mgsearch != NULL) {
515 mgsearch->unload_database();
516 }
517#endif
518#ifdef ENABLE_MGPP
519 if (mgppsearch != NULL) {
520 mgppsearch->unload_database();
521 }
522#endif
523#ifdef ENABLE_LUCENE
524 if (lucenesearch != NULL) {
525 lucenesearch->unload_database();
526 }
527#endif
528
529 // now delete the collection server objects
530 collectservermapclass::iterator here = cservers.begin();
531 collectservermapclass::iterator end = cservers.end();
532
533 while (here != end) {
534 if ((*here).second.c != NULL) {
535 delete (*here).second.c;
536 }
537 ++here;
538 }
539 // since all collection server objects are deleted (which deleted their source objects
540 // which in turn deleted their search objects), we now NULL the local reference to the
541 // search objects. See the extensive comment for this in remove_collection(text_t).
542#ifdef ENABLE_MG
543 if (mgsearch != NULL) {
544 mgsearch = NULL;
545 }
546#endif
547#ifdef ENABLE_MGPP
548 if (mgppsearch != NULL) {
549 mgppsearch = NULL;
550 }
551#endif
552#ifdef ENABLE_LUCENE
553 if (lucenesearch != NULL) {
554 lucenesearch = NULL;
555 }
556#endif
557
558 cservers.clear();
559}
560
561void collectset::add_collection_group(const text_t& collection,
562 const text_t& gsdlhome,
563 const text_t& collecthome)
564{
565 text_tarray group;
566
567 text_t collect_group_dir = filename_cat (collecthome, collection);
568
569 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
570 if (collection_is_collect_group(collect_group_dir)) {
571 if (read_dir (collect_group_dir, group)) {
572
573 text_tarray::const_iterator thiscol = group.begin();
574 text_tarray::const_iterator endcol = group.end();
575
576 while (thiscol != endcol) {
577 // ignore the etc directory
578 if (*thiscol == "etc") {
579 ++thiscol;
580 continue;
581 }
582
583 //text_t group_col = filename_cat(collection,*thiscol);
584 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
585 text_t group_col = collection + "/" + *thiscol;
586 this->add_collection (group_col, gsdlhome, collecthome);
587
588 ++thiscol;
589 }
590 }
591 }
592}
593
594void collectset::add_all_collection_groups (const text_t& gsdlhome,
595 const text_t& collecthome)
596
597{
598 collectservermapclass::iterator here = cservers.begin();
599 collectservermapclass::iterator end = cservers.end();
600
601 while (here != end) {
602 text_t collection = (*here).second.c->get_collection_name();
603 this->add_collection_group(collection,gsdlhome,collecthome);
604
605 ++here;
606 }
607}
608
609
610// remove_collection deletes the collection server of collection.
611// This only needs to be called if a collectionserver is to be
612// removed while the library is running. The destructor function
613// cleans up all collectservers when the program exits.
614void collectset::remove_collection (const text_t &collection) {
615
616 // do nothing if no collection server exists for this collection
617 if (cservers.getcollectserver(collection) == NULL) return;
618
619#ifdef ENABLE_MG
620 // first unload any cached mg databases - we may need to do something
621 // similar to this for mgpp and lucene too
622 if (mgsearch != NULL) {
623 mgsearch->unload_database();
624 }
625#endif
626#ifdef ENABLE_MGPP
627 if (mgppsearch != NULL) {
628 mgppsearch->unload_database();
629 }
630#endif
631#ifdef ENABLE_LUCENE
632 if (lucenesearch != NULL) {
633 lucenesearch->unload_database();
634 }
635#endif
636
637 // now delete the collection server object
638 collectservermapclass::iterator here = cservers.begin();
639 collectservermapclass::iterator end = cservers.end();
640
641 while (here != end) {
642 if ((*here).second.c != NULL && (*here).first == collection) {
643 delete (*here).second.c;
644
645 // The above code deletes the collection server object for this collection, which then
646 // deletes the <indexer>source object, which then deletes the <indexer>search object.
647 // Since we have a reference to the <index>search object here, we have to set it to NULL
648 // at this point, because we test it against null-ness elsewhere in this code. (Without
649 // setting it to NULL, we end up with server crashing issues.)
650 // Ideally, we'd like to know that we are NULLing the pointer to the exact same object
651 // as was freed above, but we can't know that without complicated object access to make
652 // the necessary pointer comparison. Fortunately, this class maintains only one type of
653 // <index>search object (of a/any kind) at any time, so we can NULL this confidently now.
654#ifdef ENABLE_MG
655 if (mgsearch != NULL) {
656 mgsearch = NULL;
657 }
658#endif
659#ifdef ENABLE_MGPP
660 if (mgppsearch != NULL) {
661 mgppsearch = NULL;
662 }
663#endif
664#ifdef ENABLE_LUCENE
665 if (lucenesearch != NULL) {
666 lucenesearch = NULL;
667 }
668#endif
669
670 // continue cleaning up the collection server
671 cservers.erase (here);
672
673 return;
674 } // end if
675 ++here;
676 }
677}
678
679
680// remove_collection deletes the collection server of collection.
681// This only needs to be called if a collectionserver is to be
682// removed while the library is running. The destructor function
683// cleans up all collectservers when the program exits.
684void collectset::remove_collection (const text_t &collection, ostream &logout) {
685
686 remove_collection(collection);
687
688 outconvertclass text_t2ascii;
689 logout << text_t2ascii << "collectset::remove_collection: Removed collectserver for "
690 << collection << "\n";
691}
692
693void collectset::configure(const text_t &key, const text_tarray &cfgline)
694{
695 if ((key == "collection") || (key == "collectdir")) return;
696
697 collectservermapclass::iterator here = cservers.begin();
698 collectservermapclass::iterator end = cservers.end();
699
700 while (here != end) {
701 assert ((*here).second.c != NULL);
702 if ((*here).second.c != NULL) {
703 if (key == "collectinfo") {
704 if ((*here).first == cfgline[0]) {
705 if (cfgline.size()==3) {
706 (*here).second.c->configure ("gsdlhome", cfgline[1]);
707 (*here).second.c->configure ("gdbmhome", cfgline[2]);
708 }
709 else {
710 (*here).second.c->configure ("gsdlhome", cfgline[1]);
711 (*here).second.c->configure ("collecthome", cfgline[2]);
712 (*here).second.c->configure ("gdbmhome", cfgline[3]);
713 }
714 }
715 } else {
716 (*here).second.c->configure (key, cfgline);
717 }
718 }
719
720 ++here;
721 }
722}
723
724void collectset::getCollectionList (text_tarray &collist)
725{
726 collist.erase(collist.begin(),collist.end());
727
728 collectservermapclass::iterator here = cservers.begin();
729 collectservermapclass::iterator end = cservers.end();
730 while (here != end) {
731 assert ((*here).second.c != NULL);
732 if ((*here).second.c != NULL) {
733 collist.push_back ((*here).second.c->get_collection_name());
734 }
735 ++here;
736 }
737}
738
Note: See TracBrowser for help on using the repository browser.