source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 28762

Last change on this file since 28762 was 25235, checked in by jmt12, 12 years ago

Adding two placeholders into the collectset to allow Extensions to add themselves as infodbtypes - required for TDB support

  • Property svn:keywords set to Author Date Id Revision
File size: 19.4 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "sqlqueryfilter.h"
37#include "queryfilter.h"
38
39#ifdef ENABLE_MG
40#include "mgqueryfilter.h"
41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
44#include "mgppqueryfilter.h"
45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
48#include "lucenequeryfilter.h"
49#include "lucenesource.h"
50#endif
51
52#include <assert.h>
53
54#ifdef USE_GDBM
55#include "gdbmclass.h"
56#endif
57
58#ifdef USE_JDBM
59#include "jdbmnaiveclass.h"
60#endif
61
62#ifdef USE_SQLITE
63#include "sqlitedbclass.h"
64#endif
65
66#ifdef USE_MSSQL
67#include "mssqldbclass.h"
68#endif
69
70// @EXTENSION HEADERS@
71
72collectset::collectset (text_t& gsdlhome, text_t& collecthome)
73{
74 // gsdlhome and collecthome will be set as a result of calling this function
75 // collecthome will default to "<gsdlhome>/collect" if not explicitly
76 // specified in config file
77
78 text_tarray collections;
79
80#ifdef ENABLE_MG
81 mgsearch = NULL;
82#endif
83#ifdef ENABLE_MGPP
84 mgppsearch = NULL;
85#endif
86#ifdef ENABLE_LUCENE
87 lucenesearch = NULL;
88#endif
89
90 // get gsdlhome (if we fail the error will be picked up later -- in
91 // cgiwrapper)
92
93 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
94 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
95 if (read_dir (collecthome, collections)) {
96
97 text_tarray::const_iterator thiscol = collections.begin();
98 text_tarray::const_iterator endcol = collections.end();
99
100 while (thiscol != endcol) {
101 // ignore the modelcol
102 if (*thiscol == "modelcol") {
103 ++thiscol;
104 continue;
105 }
106
107 this->add_collection (*thiscol, gsdlhome, collecthome);
108
109 ++thiscol;
110 }
111
112 this->add_all_collection_groups(gsdlhome, collecthome);
113 }
114 }
115 }
116
117 set_gsdl_env_vars(gsdlhome);
118}
119
120
121collectset::collectset (text_t& httpprefix_arg)
122{
123 httpprefix = httpprefix_arg;
124
125#ifdef ENABLE_MG
126 mgsearch = NULL;
127#endif
128#ifdef ENABLE_MGPP
129 mgppsearch = NULL;
130#endif
131#ifdef ENABLE_LUCENE
132 lucenesearch = NULL;
133#endif
134
135}
136
137collectset::collectset ()
138{
139#ifdef ENABLE_MG
140 mgsearch = NULL;
141#endif
142#ifdef ENABLE_MGPP
143 mgppsearch = NULL;
144#endif
145#ifdef ENABLE_LUCENE
146 lucenesearch = NULL;
147#endif
148}
149
150collectset::~collectset () {
151 collectservermapclass::iterator here = cservers.begin();
152 collectservermapclass::iterator end = cservers.end();
153
154 while (here != end) {
155 if ((*here).second.c != NULL) {
156 delete (*here).second.c;
157 }
158 ++here;
159 }
160 cservers.clear();
161}
162
163bool collectset::init (ostream &logout) {
164 collectservermapclass::iterator here = cservers.begin();
165 collectservermapclass::iterator end = cservers.end();
166
167 while (here != end) {
168 assert ((*here).second.c != NULL);
169 if ((*here).second.c != NULL) {
170 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
171
172 // configure this collection server
173
174 // note that we read build.cfg before collect.cfg so that the indexmaps
175 // are available to decode defaultindex, defaultsubcollection, and
176 // defaultlanguage
177
178 bool failed_build_cfg = false;
179 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
180 configinfo.collecthome, configinfo.collection)) {
181 failed_build_cfg = true;
182
183 outconvertclass text_t2ascii;
184 logout << text_t2ascii
185 << "Warning: couldn't read build.cfg file for collection \""
186 << configinfo.collection << "\""
187 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
188 << " collecthome=\"" << configinfo.collecthome << "\"\n";
189 }
190
191 bool failed_collect_cfg = false;
192 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
193 configinfo.collecthome, configinfo.collection)) {
194 failed_collect_cfg = true;
195 outconvertclass text_t2ascii;
196 logout << text_t2ascii
197 << "Warning: couldn't read collect.cfg file for collection \""
198 << configinfo.collection << "\""
199 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
200 << " collecthome=\"" << configinfo.collecthome << "\"\n";
201 }
202
203
204 bool is_colgroup = (*here).second.c->is_collection_group();
205
206 if (failed_collect_cfg) {
207 ++here;
208 continue;
209 }
210
211 if (failed_build_cfg && (!is_colgroup)) {
212 ++here;
213 continue;
214 }
215 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
216
217 if (!(*here).second.c->init (logout)) return false;
218
219 (*here).second.c->configure("httpdomain",httpdomain);
220 (*here).second.c->configure("httpprefix",httpprefix);
221 }
222 ++here;
223 }
224
225 return true;
226}
227
228collectservermapclass collectset::servers()
229{ return cservers;
230}
231
232
233void collectset::add_all_collections(const text_t &gsdlhome,
234 const text_t& collecthome)
235{
236 text_tarray collections;
237
238 if (read_dir(collecthome, collections)) {
239
240 text_tarray::const_iterator thiscol = collections.begin();
241 text_tarray::const_iterator endcol = collections.end();
242
243 while (thiscol != endcol) {
244
245 // ignore the modelcol
246 if (*thiscol == "modelcol") {
247 ++thiscol;
248 continue;
249 }
250
251 // create collection server for this collection
252 this->add_collection (*thiscol, gsdlhome, collecthome);
253
254 ++thiscol;
255 }
256
257 this->add_all_collection_groups(gsdlhome,collecthome);
258 }
259}
260
261bool collectset::collection_is_collect_group (const text_t& collect_dir)
262{
263 text_t is_collect_group_str = "false";
264 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
265
266 if (file_exists(collect_cfg)) {
267 char *collect_cfgc = collect_cfg.getcstr();
268 ifstream confin(collect_cfgc);
269
270 if (confin) {
271 text_tarray cfgline;
272
273 while (read_cfg_line(confin, cfgline) >= 0) {
274 if (cfgline.size() == 2) {
275 text_t key = cfgline[0];
276 cfgline.erase(cfgline.begin());
277 if (key == "collectgroup") {
278 is_collect_group_str = cfgline[0];
279 break;
280 }
281 }
282 }
283
284 confin.close();
285 }
286
287 delete []collect_cfgc;
288 }
289
290 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
291
292 return is_collect_group;
293}
294
295
296// add_collection sets up the collectionserver and calls
297// add_collectserver
298void collectset::add_collection (const text_t& collection,
299 const text_t& gsdlhome,
300 const text_t& collecthome)
301{
302 // read config file to see if built with mg, mgpp, or lucene
303 text_t buildtype = "mg"; // mg is default
304 text_t infodbtype = "gdbm"; // gdbm is default
305
306 this->remove_collection(collection);
307
308 collectserver *cserver = NULL;
309
310 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
311 if (file_exists (build_cfg)) {
312 char *build_cfgc = build_cfg.getcstr();
313 ifstream confin(build_cfgc);
314
315 if (confin) {
316 text_tarray cfgline;
317
318 while (read_cfg_line(confin, cfgline) >= 0) {
319 if (cfgline.size() == 2) {
320 text_t key = cfgline[0];
321 cfgline.erase(cfgline.begin());
322 if (key == "buildtype") {
323 buildtype = cfgline[0];
324 }
325 if (key == "infodbtype") {
326 infodbtype = cfgline[0];
327 }
328 }
329 }
330 confin.close();
331 }
332 delete []build_cfgc;
333
334 cserver = new collectserver();
335
336 // Create a dbclass of the correct type
337 dbclass *db_ptr = NULL;
338
339 if (infodbtype == "sqlite")
340 {
341#ifdef USE_SQLITE
342 sqlitedbclass *sql_db_ptr = new sqlitedbclass(gsdlhome);
343 db_ptr = sql_db_ptr;
344
345 // add a sql browse filter
346 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
347 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
348 cserver->add_filter (sqlbrowsefilter);
349
350 // add a sql query filter
351 sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
352 sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
353 cserver->add_filter (sqlqueryfilter);
354
355#else
356 cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
357 cerr << " this installation of Greenstone";
358#endif
359 }
360
361 if (infodbtype == "mssql")
362 {
363#ifdef USE_MSSQL
364 mssqldbclass *mssql_db_ptr = new mssqldbclass(gsdlhome);
365 db_ptr = mssql_db_ptr;
366
367 // add a sql browse filter
368 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
369 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
370 cserver->add_filter (sqlbrowsefilter);
371#else
372 cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
373 cerr << " this installation of Greenstone";
374#endif
375 }
376
377 if (infodbtype == "jdbm") {
378
379#ifdef USE_JDBM
380 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
381 db_ptr = jdbm_db_ptr;
382#else
383 cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
384 cerr << " this installation of Greenstone";
385#endif
386 }
387
388 // @EXTENSION DATASOURCES@
389
390 // Use GDBM if the infodb type is empty or not one of the values above
391 if (db_ptr == NULL) {
392#ifdef USE_GDBM
393 db_ptr = new gdbmclass(gsdlhome);
394#else
395 cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
396 cerr << " this installation of Greenstone";
397#endif
398 }
399
400 // add a null filter
401 filterclass *filter = new filterclass ();
402 cserver->add_filter (filter);
403
404 // add a browse filter
405 browsefilterclass *browsefilter = new browsefilterclass();
406 browsefilter->set_db_ptr(db_ptr);
407 cserver->add_filter (browsefilter);
408
409 if (buildtype == "mg") {
410#ifdef ENABLE_MG
411 mgsearch = new mgsearchclass();
412
413 // add a query filter
414 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
415 queryfilter->set_db_ptr(db_ptr);
416 queryfilter->set_textsearchptr (mgsearch);
417 cserver->add_filter (queryfilter);
418
419 // add a mg source
420 mgsourceclass *mgsource = new mgsourceclass ();
421 mgsource->set_db_ptr(db_ptr);
422 mgsource->set_textsearchptr (mgsearch);
423 cserver->add_source (mgsource);
424#else
425 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
426#endif
427 }
428 else if (buildtype == "mgpp") {
429#ifdef ENABLE_MGPP
430 mgppsearch = new mgppsearchclass();
431
432 // add a query filter
433 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
434 queryfilter->set_db_ptr(db_ptr);
435 queryfilter->set_textsearchptr (mgppsearch);
436 cserver->add_filter (queryfilter);
437
438 // add a mgpp source
439 mgppsourceclass *mgppsource = new mgppsourceclass ();
440 mgppsource->set_db_ptr(db_ptr);
441 mgppsource->set_textsearchptr (mgppsearch);
442 cserver->add_source (mgppsource);
443#else
444 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
445#endif
446 }
447 else if (buildtype == "lucene") {
448#ifdef ENABLE_LUCENE
449 lucenesearch = new lucenesearchclass();
450 lucenesearch->set_gsdlhome(gsdlhome);
451
452 // add a query filter
453 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
454 queryfilter->set_db_ptr(db_ptr);
455 queryfilter->set_textsearchptr (lucenesearch);
456 cserver->add_filter (queryfilter);
457
458 // add a lucene source
459 lucenesourceclass *lucenesource = new lucenesourceclass ();
460 lucenesource->set_db_ptr(db_ptr);
461 lucenesource->set_textsearchptr (lucenesearch);
462 cserver->add_source (lucenesource);
463#else
464 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
465#endif
466 }
467 else {
468 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
469 }
470
471 }
472 else {
473 // see if it is a collectgroup col
474 text_t this_collect_dir = filename_cat(collecthome, collection);
475 if (collection_is_collect_group(this_collect_dir)) {
476 // by this point we know we will need a cserver
477 cserver = new collectserver();
478 }
479 // else not a collect group, or there was no collect.cfg
480 // => leave cserver as NULL so it will not be added into cservers
481 }
482
483 if (cserver != NULL) {
484 // inform collection server and everything it contains about its
485 // collection name
486 cserver->configure ("collection", collection);
487 cserver->configure ("gsdlhome", gsdlhome);
488 cserver->configure ("collecthome", collecthome);
489 cservers.addcollectserver (cserver);
490 }
491}
492
493void collectset::remove_all_collections () {
494
495#ifdef ENABLE_MG
496 // first unload any cached mg databases
497 if (mgsearch != NULL) {
498 mgsearch->unload_database();
499 }
500#endif
501#ifdef ENABLE_MGPP
502 if (mgppsearch != NULL) {
503 mgppsearch->unload_database();
504 }
505#endif
506#ifdef ENABLE_LUCENE
507 if (lucenesearch != NULL) {
508 lucenesearch->unload_database();
509 }
510#endif
511
512 // now delete the collection server objects
513 collectservermapclass::iterator here = cservers.begin();
514 collectservermapclass::iterator end = cservers.end();
515
516 while (here != end) {
517 if ((*here).second.c != NULL) {
518 delete (*here).second.c;
519 }
520 ++here;
521 }
522 // since all collection server objects are deleted (which deleted their source objects
523 // which in turn deleted their search objects), we now NULL the local reference to the
524 // search objects. See the extensive comment for this in remove_collection(text_t).
525#ifdef ENABLE_MG
526 if (mgsearch != NULL) {
527 mgsearch = NULL;
528 }
529#endif
530#ifdef ENABLE_MGPP
531 if (mgppsearch != NULL) {
532 mgppsearch = NULL;
533 }
534#endif
535#ifdef ENABLE_LUCENE
536 if (lucenesearch != NULL) {
537 lucenesearch = NULL;
538 }
539#endif
540
541 cservers.clear();
542}
543
544void collectset::add_collection_group(const text_t& collection,
545 const text_t& gsdlhome,
546 const text_t& collecthome)
547{
548 text_tarray group;
549
550 text_t collect_group_dir = filename_cat (collecthome, collection);
551
552 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
553 if (collection_is_collect_group(collect_group_dir)) {
554 if (read_dir (collect_group_dir, group)) {
555
556 text_tarray::const_iterator thiscol = group.begin();
557 text_tarray::const_iterator endcol = group.end();
558
559 while (thiscol != endcol) {
560 // ignore the etc directory
561 if (*thiscol == "etc") {
562 ++thiscol;
563 continue;
564 }
565
566 //text_t group_col = filename_cat(collection,*thiscol);
567 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
568 text_t group_col = collection + "/" + *thiscol;
569 this->add_collection (group_col, gsdlhome, collecthome);
570
571 ++thiscol;
572 }
573 }
574 }
575}
576
577void collectset::add_all_collection_groups (const text_t& gsdlhome,
578 const text_t& collecthome)
579
580{
581 collectservermapclass::iterator here = cservers.begin();
582 collectservermapclass::iterator end = cservers.end();
583
584 while (here != end) {
585 text_t collection = (*here).second.c->get_collection_name();
586 this->add_collection_group(collection,gsdlhome,collecthome);
587
588 ++here;
589 }
590}
591
592
593// remove_collection deletes the collection server of collection.
594// This only needs to be called if a collectionserver is to be
595// removed while the library is running. The destructor function
596// cleans up all collectservers when the program exits.
597void collectset::remove_collection (const text_t &collection) {
598
599 // do nothing if no collection server exists for this collection
600 if (cservers.getcollectserver(collection) == NULL) return;
601
602#ifdef ENABLE_MG
603 // first unload any cached mg databases - we may need to do something
604 // similar to this for mgpp and lucene too
605 if (mgsearch != NULL) {
606 mgsearch->unload_database();
607 }
608#endif
609#ifdef ENABLE_MGPP
610 if (mgppsearch != NULL) {
611 mgppsearch->unload_database();
612 }
613#endif
614#ifdef ENABLE_LUCENE
615 if (lucenesearch != NULL) {
616 lucenesearch->unload_database();
617 }
618#endif
619
620 // now delete the collection server object
621 collectservermapclass::iterator here = cservers.begin();
622 collectservermapclass::iterator end = cservers.end();
623
624 while (here != end) {
625 if ((*here).second.c != NULL && (*here).first == collection) {
626 delete (*here).second.c;
627
628 // The above code deletes the collection server object for this collection, which then
629 // deletes the <indexer>source object, which then deletes the <indexer>search object.
630 // Since we have a reference to the <index>search object here, we have to set it to NULL
631 // at this point, because we test it against null-ness elsewhere in this code. (Without
632 // setting it to NULL, we end up with server crashing issues.)
633 // Ideally, we'd like to know that we are NULLing the pointer to the exact same object
634 // as was freed above, but we can't know that without complicated object access to make
635 // the necessary pointer comparison. Fortunately, this class maintains only one type of
636 // <index>search object (of a/any kind) at any time, so we can NULL this confidently now.
637#ifdef ENABLE_MG
638 if (mgsearch != NULL) {
639 mgsearch = NULL;
640 }
641#endif
642#ifdef ENABLE_MGPP
643 if (mgppsearch != NULL) {
644 mgppsearch = NULL;
645 }
646#endif
647#ifdef ENABLE_LUCENE
648 if (lucenesearch != NULL) {
649 lucenesearch = NULL;
650 }
651#endif
652
653 // continue cleaning up the collection server
654 cservers.erase (here);
655
656 return;
657 } // end if
658 ++here;
659 }
660}
661
662
663// remove_collection deletes the collection server of collection.
664// This only needs to be called if a collectionserver is to be
665// removed while the library is running. The destructor function
666// cleans up all collectservers when the program exits.
667void collectset::remove_collection (const text_t &collection, ostream &logout) {
668
669 remove_collection(collection);
670
671 outconvertclass text_t2ascii;
672 logout << text_t2ascii << "collectset::remove_collection: Removed collectserver for "
673 << collection << "\n";
674}
675
676void collectset::configure(const text_t &key, const text_tarray &cfgline)
677{
678 if ((key == "collection") || (key == "collectdir")) return;
679
680 collectservermapclass::iterator here = cservers.begin();
681 collectservermapclass::iterator end = cservers.end();
682
683 while (here != end) {
684 assert ((*here).second.c != NULL);
685 if ((*here).second.c != NULL) {
686 if (key == "collectinfo") {
687 if ((*here).first == cfgline[0]) {
688 if (cfgline.size()==3) {
689 (*here).second.c->configure ("gsdlhome", cfgline[1]);
690 (*here).second.c->configure ("gdbmhome", cfgline[2]);
691 }
692 else {
693 (*here).second.c->configure ("gsdlhome", cfgline[1]);
694 (*here).second.c->configure ("collecthome", cfgline[2]);
695 (*here).second.c->configure ("gdbmhome", cfgline[3]);
696 }
697 }
698 } else {
699 (*here).second.c->configure (key, cfgline);
700 }
701 }
702
703 ++here;
704 }
705}
706
707void collectset::getCollectionList (text_tarray &collist)
708{
709 collist.erase(collist.begin(),collist.end());
710
711 collectservermapclass::iterator here = cservers.begin();
712 collectservermapclass::iterator end = cservers.end();
713 while (here != end) {
714 assert ((*here).second.c != NULL);
715 if ((*here).second.c != NULL) {
716 collist.push_back ((*here).second.c->get_collection_name());
717 }
718 ++here;
719 }
720}
721
Note: See TracBrowser for help on using the repository browser.