source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 25234

Last change on this file since 25234 was 24758, checked in by ak19, 13 years ago

Related to bugfix of previous commit: this time, not only is the mgsearch object unloaded, but following the suggestion in the existing comment, the mgppsearch and lucenesearch objects are also unloaded() in the two occasions where mgsearch is.

  • Property svn:keywords set to Author Date Id Revision
File size: 19.3 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "sqlqueryfilter.h"
37#include "queryfilter.h"
38
39#ifdef ENABLE_MG
40#include "mgqueryfilter.h"
41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
44#include "mgppqueryfilter.h"
45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
48#include "lucenequeryfilter.h"
49#include "lucenesource.h"
50#endif
51
52#include <assert.h>
53
54#ifdef USE_GDBM
55#include "gdbmclass.h"
56#endif
57
58#ifdef USE_JDBM
59#include "jdbmnaiveclass.h"
60#endif
61
62#ifdef USE_SQLITE
63#include "sqlitedbclass.h"
64#endif
65
66#ifdef USE_MSSQL
67#include "mssqldbclass.h"
68#endif
69
70
71collectset::collectset (text_t& gsdlhome, text_t& collecthome)
72{
73 // gsdlhome and collecthome will be set as a result of calling this function
74 // collecthome will default to "<gsdlhome>/collect" if not explicitly
75 // specified in config file
76
77 text_tarray collections;
78
79#ifdef ENABLE_MG
80 mgsearch = NULL;
81#endif
82#ifdef ENABLE_MGPP
83 mgppsearch = NULL;
84#endif
85#ifdef ENABLE_LUCENE
86 lucenesearch = NULL;
87#endif
88
89 // get gsdlhome (if we fail the error will be picked up later -- in
90 // cgiwrapper)
91
92 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
93 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
94 if (read_dir (collecthome, collections)) {
95
96 text_tarray::const_iterator thiscol = collections.begin();
97 text_tarray::const_iterator endcol = collections.end();
98
99 while (thiscol != endcol) {
100 // ignore the modelcol
101 if (*thiscol == "modelcol") {
102 ++thiscol;
103 continue;
104 }
105
106 this->add_collection (*thiscol, gsdlhome, collecthome);
107
108 ++thiscol;
109 }
110
111 this->add_all_collection_groups(gsdlhome, collecthome);
112 }
113 }
114 }
115
116 set_gsdl_env_vars(gsdlhome);
117}
118
119
120collectset::collectset (text_t& httpprefix_arg)
121{
122 httpprefix = httpprefix_arg;
123
124#ifdef ENABLE_MG
125 mgsearch = NULL;
126#endif
127#ifdef ENABLE_MGPP
128 mgppsearch = NULL;
129#endif
130#ifdef ENABLE_LUCENE
131 lucenesearch = NULL;
132#endif
133
134}
135
136collectset::collectset ()
137{
138#ifdef ENABLE_MG
139 mgsearch = NULL;
140#endif
141#ifdef ENABLE_MGPP
142 mgppsearch = NULL;
143#endif
144#ifdef ENABLE_LUCENE
145 lucenesearch = NULL;
146#endif
147}
148
149collectset::~collectset () {
150 collectservermapclass::iterator here = cservers.begin();
151 collectservermapclass::iterator end = cservers.end();
152
153 while (here != end) {
154 if ((*here).second.c != NULL) {
155 delete (*here).second.c;
156 }
157 ++here;
158 }
159 cservers.clear();
160}
161
162bool collectset::init (ostream &logout) {
163 collectservermapclass::iterator here = cservers.begin();
164 collectservermapclass::iterator end = cservers.end();
165
166 while (here != end) {
167 assert ((*here).second.c != NULL);
168 if ((*here).second.c != NULL) {
169 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
170
171 // configure this collection server
172
173 // note that we read build.cfg before collect.cfg so that the indexmaps
174 // are available to decode defaultindex, defaultsubcollection, and
175 // defaultlanguage
176
177 bool failed_build_cfg = false;
178 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
179 configinfo.collecthome, configinfo.collection)) {
180 failed_build_cfg = true;
181
182 outconvertclass text_t2ascii;
183 logout << text_t2ascii
184 << "Warning: couldn't read build.cfg file for collection \""
185 << configinfo.collection << "\""
186 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
187 << " collecthome=\"" << configinfo.collecthome << "\"\n";
188 }
189
190 bool failed_collect_cfg = false;
191 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
192 configinfo.collecthome, configinfo.collection)) {
193 failed_collect_cfg = true;
194 outconvertclass text_t2ascii;
195 logout << text_t2ascii
196 << "Warning: couldn't read collect.cfg file for collection \""
197 << configinfo.collection << "\""
198 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
199 << " collecthome=\"" << configinfo.collecthome << "\"\n";
200 }
201
202
203 bool is_colgroup = (*here).second.c->is_collection_group();
204
205 if (failed_collect_cfg) {
206 ++here;
207 continue;
208 }
209
210 if (failed_build_cfg && (!is_colgroup)) {
211 ++here;
212 continue;
213 }
214 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
215
216 if (!(*here).second.c->init (logout)) return false;
217
218 (*here).second.c->configure("httpdomain",httpdomain);
219 (*here).second.c->configure("httpprefix",httpprefix);
220 }
221 ++here;
222 }
223
224 return true;
225}
226
227collectservermapclass collectset::servers()
228{ return cservers;
229}
230
231
232void collectset::add_all_collections(const text_t &gsdlhome,
233 const text_t& collecthome)
234{
235 text_tarray collections;
236
237 if (read_dir(collecthome, collections)) {
238
239 text_tarray::const_iterator thiscol = collections.begin();
240 text_tarray::const_iterator endcol = collections.end();
241
242 while (thiscol != endcol) {
243
244 // ignore the modelcol
245 if (*thiscol == "modelcol") {
246 ++thiscol;
247 continue;
248 }
249
250 // create collection server for this collection
251 this->add_collection (*thiscol, gsdlhome, collecthome);
252
253 ++thiscol;
254 }
255
256 this->add_all_collection_groups(gsdlhome,collecthome);
257 }
258}
259
260bool collectset::collection_is_collect_group (const text_t& collect_dir)
261{
262 text_t is_collect_group_str = "false";
263 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
264
265 if (file_exists(collect_cfg)) {
266 char *collect_cfgc = collect_cfg.getcstr();
267 ifstream confin(collect_cfgc);
268
269 if (confin) {
270 text_tarray cfgline;
271
272 while (read_cfg_line(confin, cfgline) >= 0) {
273 if (cfgline.size() == 2) {
274 text_t key = cfgline[0];
275 cfgline.erase(cfgline.begin());
276 if (key == "collectgroup") {
277 is_collect_group_str = cfgline[0];
278 break;
279 }
280 }
281 }
282
283 confin.close();
284 }
285
286 delete []collect_cfgc;
287 }
288
289 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
290
291 return is_collect_group;
292}
293
294
295// add_collection sets up the collectionserver and calls
296// add_collectserver
297void collectset::add_collection (const text_t& collection,
298 const text_t& gsdlhome,
299 const text_t& collecthome)
300{
301 // read config file to see if built with mg, mgpp, or lucene
302 text_t buildtype = "mg"; // mg is default
303 text_t infodbtype = "gdbm"; // gdbm is default
304
305 this->remove_collection(collection);
306
307 collectserver *cserver = NULL;
308
309 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
310 if (file_exists (build_cfg)) {
311 char *build_cfgc = build_cfg.getcstr();
312 ifstream confin(build_cfgc);
313
314 if (confin) {
315 text_tarray cfgline;
316
317 while (read_cfg_line(confin, cfgline) >= 0) {
318 if (cfgline.size() == 2) {
319 text_t key = cfgline[0];
320 cfgline.erase(cfgline.begin());
321 if (key == "buildtype") {
322 buildtype = cfgline[0];
323 }
324 if (key == "infodbtype") {
325 infodbtype = cfgline[0];
326 }
327 }
328 }
329 confin.close();
330 }
331 delete []build_cfgc;
332
333 cserver = new collectserver();
334
335 // Create a dbclass of the correct type
336 dbclass *db_ptr = NULL;
337
338 if (infodbtype == "sqlite")
339 {
340#ifdef USE_SQLITE
341 sqlitedbclass *sql_db_ptr = new sqlitedbclass(gsdlhome);
342 db_ptr = sql_db_ptr;
343
344 // add a sql browse filter
345 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
346 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
347 cserver->add_filter (sqlbrowsefilter);
348
349 // add a sql query filter
350 sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
351 sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
352 cserver->add_filter (sqlqueryfilter);
353
354#else
355 cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
356 cerr << " this installation of Greenstone";
357#endif
358 }
359
360 if (infodbtype == "mssql")
361 {
362#ifdef USE_MSSQL
363 mssqldbclass *mssql_db_ptr = new mssqldbclass(gsdlhome);
364 db_ptr = mssql_db_ptr;
365
366 // add a sql browse filter
367 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
368 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
369 cserver->add_filter (sqlbrowsefilter);
370#else
371 cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
372 cerr << " this installation of Greenstone";
373#endif
374 }
375
376 if (infodbtype == "jdbm") {
377
378#ifdef USE_JDBM
379 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
380 db_ptr = jdbm_db_ptr;
381#else
382 cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
383 cerr << " this installation of Greenstone";
384#endif
385 }
386
387 // Use GDBM if the infodb type is empty or not one of the values above
388 if (db_ptr == NULL) {
389#ifdef USE_GDBM
390 db_ptr = new gdbmclass(gsdlhome);
391#else
392 cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
393 cerr << " this installation of Greenstone";
394#endif
395 }
396
397 // add a null filter
398 filterclass *filter = new filterclass ();
399 cserver->add_filter (filter);
400
401 // add a browse filter
402 browsefilterclass *browsefilter = new browsefilterclass();
403 browsefilter->set_db_ptr(db_ptr);
404 cserver->add_filter (browsefilter);
405
406 if (buildtype == "mg") {
407#ifdef ENABLE_MG
408 mgsearch = new mgsearchclass();
409
410 // add a query filter
411 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
412 queryfilter->set_db_ptr(db_ptr);
413 queryfilter->set_textsearchptr (mgsearch);
414 cserver->add_filter (queryfilter);
415
416 // add a mg source
417 mgsourceclass *mgsource = new mgsourceclass ();
418 mgsource->set_db_ptr(db_ptr);
419 mgsource->set_textsearchptr (mgsearch);
420 cserver->add_source (mgsource);
421#else
422 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
423#endif
424 }
425 else if (buildtype == "mgpp") {
426#ifdef ENABLE_MGPP
427 mgppsearch = new mgppsearchclass();
428
429 // add a query filter
430 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
431 queryfilter->set_db_ptr(db_ptr);
432 queryfilter->set_textsearchptr (mgppsearch);
433 cserver->add_filter (queryfilter);
434
435 // add a mgpp source
436 mgppsourceclass *mgppsource = new mgppsourceclass ();
437 mgppsource->set_db_ptr(db_ptr);
438 mgppsource->set_textsearchptr (mgppsearch);
439 cserver->add_source (mgppsource);
440#else
441 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
442#endif
443 }
444 else if (buildtype == "lucene") {
445#ifdef ENABLE_LUCENE
446 lucenesearch = new lucenesearchclass();
447 lucenesearch->set_gsdlhome(gsdlhome);
448
449 // add a query filter
450 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
451 queryfilter->set_db_ptr(db_ptr);
452 queryfilter->set_textsearchptr (lucenesearch);
453 cserver->add_filter (queryfilter);
454
455 // add a lucene source
456 lucenesourceclass *lucenesource = new lucenesourceclass ();
457 lucenesource->set_db_ptr(db_ptr);
458 lucenesource->set_textsearchptr (lucenesearch);
459 cserver->add_source (lucenesource);
460#else
461 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
462#endif
463 }
464 else {
465 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
466 }
467
468 }
469 else {
470 // see if it is a collectgroup col
471 text_t this_collect_dir = filename_cat(collecthome, collection);
472 if (collection_is_collect_group(this_collect_dir)) {
473 // by this point we know we will need a cserver
474 cserver = new collectserver();
475 }
476 // else not a collect group, or there was no collect.cfg
477 // => leave cserver as NULL so it will not be added into cservers
478 }
479
480 if (cserver != NULL) {
481 // inform collection server and everything it contains about its
482 // collection name
483 cserver->configure ("collection", collection);
484 cserver->configure ("gsdlhome", gsdlhome);
485 cserver->configure ("collecthome", collecthome);
486 cservers.addcollectserver (cserver);
487 }
488}
489
490void collectset::remove_all_collections () {
491
492#ifdef ENABLE_MG
493 // first unload any cached mg databases
494 if (mgsearch != NULL) {
495 mgsearch->unload_database();
496 }
497#endif
498#ifdef ENABLE_MGPP
499 if (mgppsearch != NULL) {
500 mgppsearch->unload_database();
501 }
502#endif
503#ifdef ENABLE_LUCENE
504 if (lucenesearch != NULL) {
505 lucenesearch->unload_database();
506 }
507#endif
508
509 // now delete the collection server objects
510 collectservermapclass::iterator here = cservers.begin();
511 collectservermapclass::iterator end = cservers.end();
512
513 while (here != end) {
514 if ((*here).second.c != NULL) {
515 delete (*here).second.c;
516 }
517 ++here;
518 }
519 // since all collection server objects are deleted (which deleted their source objects
520 // which in turn deleted their search objects), we now NULL the local reference to the
521 // search objects. See the extensive comment for this in remove_collection(text_t).
522#ifdef ENABLE_MG
523 if (mgsearch != NULL) {
524 mgsearch = NULL;
525 }
526#endif
527#ifdef ENABLE_MGPP
528 if (mgppsearch != NULL) {
529 mgppsearch = NULL;
530 }
531#endif
532#ifdef ENABLE_LUCENE
533 if (lucenesearch != NULL) {
534 lucenesearch = NULL;
535 }
536#endif
537
538 cservers.clear();
539}
540
541void collectset::add_collection_group(const text_t& collection,
542 const text_t& gsdlhome,
543 const text_t& collecthome)
544{
545 text_tarray group;
546
547 text_t collect_group_dir = filename_cat (collecthome, collection);
548
549 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
550 if (collection_is_collect_group(collect_group_dir)) {
551 if (read_dir (collect_group_dir, group)) {
552
553 text_tarray::const_iterator thiscol = group.begin();
554 text_tarray::const_iterator endcol = group.end();
555
556 while (thiscol != endcol) {
557 // ignore the etc directory
558 if (*thiscol == "etc") {
559 ++thiscol;
560 continue;
561 }
562
563 //text_t group_col = filename_cat(collection,*thiscol);
564 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
565 text_t group_col = collection + "/" + *thiscol;
566 this->add_collection (group_col, gsdlhome, collecthome);
567
568 ++thiscol;
569 }
570 }
571 }
572}
573
574void collectset::add_all_collection_groups (const text_t& gsdlhome,
575 const text_t& collecthome)
576
577{
578 collectservermapclass::iterator here = cservers.begin();
579 collectservermapclass::iterator end = cservers.end();
580
581 while (here != end) {
582 text_t collection = (*here).second.c->get_collection_name();
583 this->add_collection_group(collection,gsdlhome,collecthome);
584
585 ++here;
586 }
587}
588
589
590// remove_collection deletes the collection server of collection.
591// This only needs to be called if a collectionserver is to be
592// removed while the library is running. The destructor function
593// cleans up all collectservers when the program exits.
594void collectset::remove_collection (const text_t &collection) {
595
596 // do nothing if no collection server exists for this collection
597 if (cservers.getcollectserver(collection) == NULL) return;
598
599#ifdef ENABLE_MG
600 // first unload any cached mg databases - we may need to do something
601 // similar to this for mgpp and lucene too
602 if (mgsearch != NULL) {
603 mgsearch->unload_database();
604 }
605#endif
606#ifdef ENABLE_MGPP
607 if (mgppsearch != NULL) {
608 mgppsearch->unload_database();
609 }
610#endif
611#ifdef ENABLE_LUCENE
612 if (lucenesearch != NULL) {
613 lucenesearch->unload_database();
614 }
615#endif
616
617 // now delete the collection server object
618 collectservermapclass::iterator here = cservers.begin();
619 collectservermapclass::iterator end = cservers.end();
620
621 while (here != end) {
622 if ((*here).second.c != NULL && (*here).first == collection) {
623 delete (*here).second.c;
624
625 // The above code deletes the collection server object for this collection, which then
626 // deletes the <indexer>source object, which then deletes the <indexer>search object.
627 // Since we have a reference to the <index>search object here, we have to set it to NULL
628 // at this point, because we test it against null-ness elsewhere in this code. (Without
629 // setting it to NULL, we end up with server crashing issues.)
630 // Ideally, we'd like to know that we are NULLing the pointer to the exact same object
631 // as was freed above, but we can't know that without complicated object access to make
632 // the necessary pointer comparison. Fortunately, this class maintains only one type of
633 // <index>search object (of a/any kind) at any time, so we can NULL this confidently now.
634#ifdef ENABLE_MG
635 if (mgsearch != NULL) {
636 mgsearch = NULL;
637 }
638#endif
639#ifdef ENABLE_MGPP
640 if (mgppsearch != NULL) {
641 mgppsearch = NULL;
642 }
643#endif
644#ifdef ENABLE_LUCENE
645 if (lucenesearch != NULL) {
646 lucenesearch = NULL;
647 }
648#endif
649
650 // continue cleaning up the collection server
651 cservers.erase (here);
652
653 return;
654 } // end if
655 ++here;
656 }
657}
658
659
660// remove_collection deletes the collection server of collection.
661// This only needs to be called if a collectionserver is to be
662// removed while the library is running. The destructor function
663// cleans up all collectservers when the program exits.
664void collectset::remove_collection (const text_t &collection, ostream &logout) {
665
666 remove_collection(collection);
667
668 outconvertclass text_t2ascii;
669 logout << text_t2ascii << "collectset::remove_collection: Removed collectserver for "
670 << collection << "\n";
671}
672
673void collectset::configure(const text_t &key, const text_tarray &cfgline)
674{
675 if ((key == "collection") || (key == "collectdir")) return;
676
677 collectservermapclass::iterator here = cservers.begin();
678 collectservermapclass::iterator end = cservers.end();
679
680 while (here != end) {
681 assert ((*here).second.c != NULL);
682 if ((*here).second.c != NULL) {
683 if (key == "collectinfo") {
684 if ((*here).first == cfgline[0]) {
685 if (cfgline.size()==3) {
686 (*here).second.c->configure ("gsdlhome", cfgline[1]);
687 (*here).second.c->configure ("gdbmhome", cfgline[2]);
688 }
689 else {
690 (*here).second.c->configure ("gsdlhome", cfgline[1]);
691 (*here).second.c->configure ("collecthome", cfgline[2]);
692 (*here).second.c->configure ("gdbmhome", cfgline[3]);
693 }
694 }
695 } else {
696 (*here).second.c->configure (key, cfgline);
697 }
698 }
699
700 ++here;
701 }
702}
703
704void collectset::getCollectionList (text_tarray &collist)
705{
706 collist.erase(collist.begin(),collist.end());
707
708 collectservermapclass::iterator here = cservers.begin();
709 collectservermapclass::iterator end = cservers.end();
710 while (here != end) {
711 assert ((*here).second.c != NULL);
712 if ((*here).second.c != NULL) {
713 collist.push_back ((*here).second.c->get_collection_name());
714 }
715 ++here;
716 }
717}
718
Note: See TracBrowser for help on using the repository browser.