source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 21472

Last change on this file since 21472 was 21472, checked in by mdewsnip, 14 years ago

Created USE_GDBM and USE_JDBM defines that work similar to USE_SQLITE and USE_MSSQL for controlling which infodb types are compiled in. Currently these are not configurable through the configure scripts and are always on.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.6 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "queryfilter.h"
37
38#ifdef ENABLE_MG
39#include "mgqueryfilter.h"
40#include "mgsource.h"
41#endif
42#ifdef ENABLE_MGPP
43#include "mgppqueryfilter.h"
44#include "mgppsource.h"
45#endif
46#ifdef ENABLE_LUCENE
47#include "lucenequeryfilter.h"
48#include "lucenesource.h"
49#endif
50
51#include <assert.h>
52
53#ifdef USE_GDBM
54#include "gdbmclass.h"
55#endif
56
57#ifdef USE_JDBM
58#include "jdbmnaiveclass.h"
59#endif
60
61#ifdef USE_SQLITE
62#include "sqlitedbclass.h"
63#endif
64
65#ifdef USE_MSSQL
66#include "mssqldbclass.h"
67#endif
68
69
70collectset::collectset (text_t& gsdlhome, text_t& collecthome)
71{
72 // gsdlhome and collecthome will be set as a result of calling this function
73 // collecthome will default to "<gsdlhome>/collect" if not explicitly
74 // specified in config file
75
76 text_tarray collections;
77
78#ifdef ENABLE_MG
79 mgsearch = NULL;
80#endif
81#ifdef ENABLE_MGPP
82 mgppsearch = NULL;
83#endif
84#ifdef ENABLE_LUCENE
85 lucenesearch = NULL;
86#endif
87
88 // get gsdlhome (if we fail the error will be picked up later -- in
89 // cgiwrapper)
90
91 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
92 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
93 if (read_dir (collecthome, collections)) {
94
95 text_tarray::const_iterator thiscol = collections.begin();
96 text_tarray::const_iterator endcol = collections.end();
97
98 while (thiscol != endcol) {
99 // ignore the modelcol
100 if (*thiscol == "modelcol") {
101 ++thiscol;
102 continue;
103 }
104
105 this->add_collection (*thiscol, gsdlhome, collecthome);
106
107 ++thiscol;
108 }
109
110 this->add_all_collection_groups(gsdlhome, collecthome);
111 }
112 }
113 }
114
115 set_gsdl_env_vars(gsdlhome);
116}
117
118
119collectset::collectset (text_t& httpprefix_arg)
120{
121 httpprefix = httpprefix_arg;
122
123#ifdef ENABLE_MG
124 mgsearch = NULL;
125#endif
126#ifdef ENABLE_MGPP
127 mgppsearch = NULL;
128#endif
129#ifdef ENABLE_LUCENE
130 lucenesearch = NULL;
131#endif
132
133}
134
135collectset::collectset ()
136{
137#ifdef ENABLE_MG
138 mgsearch = NULL;
139#endif
140#ifdef ENABLE_MGPP
141 mgppsearch = NULL;
142#endif
143#ifdef ENABLE_LUCENE
144 lucenesearch = NULL;
145#endif
146}
147
148collectset::~collectset () {
149 collectservermapclass::iterator here = cservers.begin();
150 collectservermapclass::iterator end = cservers.end();
151
152 while (here != end) {
153 if ((*here).second.c != NULL) {
154 delete (*here).second.c;
155 }
156 ++here;
157 }
158 cservers.clear();
159}
160
161bool collectset::init (ostream &logout) {
162 collectservermapclass::iterator here = cservers.begin();
163 collectservermapclass::iterator end = cservers.end();
164
165 while (here != end) {
166 assert ((*here).second.c != NULL);
167 if ((*here).second.c != NULL) {
168 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
169
170 // configure this collection server
171
172 // note that we read build.cfg before collect.cfg so that the indexmaps
173 // are available to decode defaultindex, defaultsubcollection, and
174 // defaultlanguage
175
176 bool failed_build_cfg = false;
177 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
178 configinfo.collecthome, configinfo.collection)) {
179 failed_build_cfg = true;
180
181 outconvertclass text_t2ascii;
182 logout << text_t2ascii
183 << "Warning: couldn't read build.cfg file for collection \""
184 << configinfo.collection << "\""
185 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
186 << " collecthome=\"" << configinfo.collecthome << "\"\n";
187 }
188
189 bool failed_collect_cfg = false;
190 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
191 configinfo.collecthome, configinfo.collection)) {
192 failed_collect_cfg = true;
193 outconvertclass text_t2ascii;
194 logout << text_t2ascii
195 << "Warning: couldn't read collect.cfg file for collection \""
196 << configinfo.collection << "\""
197 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
198 << " collecthome=\"" << configinfo.collecthome << "\"\n";
199 }
200
201
202 bool is_colgroup = (*here).second.c->is_collection_group();
203
204 if (failed_collect_cfg) {
205 ++here;
206 continue;
207 }
208
209 if (failed_build_cfg && (!is_colgroup)) {
210 ++here;
211 continue;
212 }
213 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
214
215 if (!(*here).second.c->init (logout)) return false;
216
217 (*here).second.c->configure("httpdomain",httpdomain);
218 (*here).second.c->configure("httpprefix",httpprefix);
219 }
220 ++here;
221 }
222
223 return true;
224}
225
226collectservermapclass collectset::servers()
227{ return cservers;
228}
229
230
231void collectset::add_all_collections(const text_t &gsdlhome,
232 const text_t& collecthome)
233{
234 text_tarray collections;
235
236 if (read_dir(collecthome, collections)) {
237
238 text_tarray::const_iterator thiscol = collections.begin();
239 text_tarray::const_iterator endcol = collections.end();
240
241 while (thiscol != endcol) {
242
243 // ignore the modelcol
244 if (*thiscol == "modelcol") {
245 ++thiscol;
246 continue;
247 }
248
249 // create collection server for this collection
250 this->add_collection (*thiscol, gsdlhome, collecthome);
251
252 ++thiscol;
253 }
254
255 this->add_all_collection_groups(gsdlhome,collecthome);
256 }
257}
258
259bool collectset::collection_is_collect_group (const text_t& collect_dir)
260{
261 text_t is_collect_group_str = "false";
262 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
263
264 if (file_exists(collect_cfg)) {
265 char *collect_cfgc = collect_cfg.getcstr();
266 ifstream confin(collect_cfgc);
267
268 if (confin) {
269 text_tarray cfgline;
270
271 while (read_cfg_line(confin, cfgline) >= 0) {
272 if (cfgline.size() == 2) {
273 text_t key = cfgline[0];
274 cfgline.erase(cfgline.begin());
275 if (key == "collectgroup") {
276 is_collect_group_str = cfgline[0];
277 break;
278 }
279 }
280 }
281
282 confin.close();
283 }
284
285 delete []collect_cfgc;
286 }
287
288 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
289
290 return is_collect_group;
291}
292
293
294// add_collection sets up the collectionserver and calls
295// add_collectserver
296void collectset::add_collection (const text_t& collection,
297 const text_t& gsdlhome,
298 const text_t& collecthome)
299{
300 // read config file to see if built with mg, mgpp, or lucene
301 text_t buildtype = "mg"; // mg is default
302 text_t infodbtype = "gdbm"; // gdbm is default
303
304 this->remove_collection(collection);
305
306 collectserver *cserver = NULL;
307
308 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
309 if (file_exists (build_cfg)) {
310 char *build_cfgc = build_cfg.getcstr();
311 ifstream confin(build_cfgc);
312
313 if (confin) {
314 text_tarray cfgline;
315
316 while (read_cfg_line(confin, cfgline) >= 0) {
317 if (cfgline.size() == 2) {
318 text_t key = cfgline[0];
319 cfgline.erase(cfgline.begin());
320 if (key == "buildtype") {
321 buildtype = cfgline[0];
322 }
323 if (key == "infodbtype") {
324 infodbtype = cfgline[0];
325 }
326 }
327 }
328 confin.close();
329 }
330 delete []build_cfgc;
331
332 cserver = new collectserver();
333
334 // Create a dbclass of the correct type
335 dbclass *db_ptr = NULL;
336
337#ifdef USE_SQLITE
338 if (infodbtype == "sqlite")
339 {
340 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
341 db_ptr = sql_db_ptr;
342
343 // add a sql browse filter
344 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
345 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
346 cserver->add_filter (sqlbrowsefilter);
347 }
348#endif
349
350#ifdef USE_MSSQL
351 if (infodbtype == "mssql")
352 {
353 mssqldbclass *mssql_db_ptr = new mssqldbclass();
354 db_ptr = mssql_db_ptr;
355
356 // add a sql browse filter
357 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
358 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
359 cserver->add_filter (sqlbrowsefilter);
360 }
361#endif
362
363#ifdef USE_JDBM
364 if (infodbtype == "jdbm") {
365
366 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
367 db_ptr = jdbm_db_ptr;
368 }
369#endif
370
371 // Use GDBM if the infodb type is empty or not one of the values above
372 if (db_ptr == NULL) {
373#ifdef USE_GDBM
374 db_ptr = new gdbmclass();
375#endif
376 }
377
378 // add a null filter
379 filterclass *filter = new filterclass ();
380 cserver->add_filter (filter);
381
382 // add a browse filter
383 browsefilterclass *browsefilter = new browsefilterclass();
384 browsefilter->set_db_ptr(db_ptr);
385 cserver->add_filter (browsefilter);
386
387 if (buildtype == "mg") {
388#ifdef ENABLE_MG
389 mgsearch = new mgsearchclass();
390
391 // add a query filter
392 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
393 queryfilter->set_db_ptr(db_ptr);
394 queryfilter->set_textsearchptr (mgsearch);
395 cserver->add_filter (queryfilter);
396
397 // add a mg source
398 mgsourceclass *mgsource = new mgsourceclass ();
399 mgsource->set_db_ptr(db_ptr);
400 mgsource->set_textsearchptr (mgsearch);
401 cserver->add_source (mgsource);
402#else
403 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
404#endif
405 }
406 else if (buildtype == "mgpp") {
407#ifdef ENABLE_MGPP
408 mgppsearch = new mgppsearchclass();
409
410 // add a query filter
411 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
412 queryfilter->set_db_ptr(db_ptr);
413 queryfilter->set_textsearchptr (mgppsearch);
414 cserver->add_filter (queryfilter);
415
416 // add a mgpp source
417 mgppsourceclass *mgppsource = new mgppsourceclass ();
418 mgppsource->set_db_ptr(db_ptr);
419 mgppsource->set_textsearchptr (mgppsearch);
420 cserver->add_source (mgppsource);
421#else
422 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
423#endif
424 }
425 else if (buildtype == "lucene") {
426#ifdef ENABLE_LUCENE
427 lucenesearch = new lucenesearchclass();
428 lucenesearch->set_gsdlhome(gsdlhome);
429
430 // add a query filter
431 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
432 queryfilter->set_db_ptr(db_ptr);
433 queryfilter->set_textsearchptr (lucenesearch);
434 cserver->add_filter (queryfilter);
435
436 // add a lucene source
437 lucenesourceclass *lucenesource = new lucenesourceclass ();
438 lucenesource->set_db_ptr(db_ptr);
439 lucenesource->set_textsearchptr (lucenesearch);
440 cserver->add_source (lucenesource);
441#else
442 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
443#endif
444 }
445 else {
446 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
447 }
448
449 }
450 else {
451 // see if it is a collectgroup col
452 text_t this_collect_dir = filename_cat(collecthome, collection);
453 if (collection_is_collect_group(this_collect_dir)) {
454 // by this point we know we will need a cserver
455 cserver = new collectserver();
456 }
457 // else not a collect group, or there was no collect.cfg
458 // => leave cserver as NULL so it will not be added into cservers
459 }
460
461 if (cserver != NULL) {
462 // inform collection server and everything it contains about its
463 // collection name
464 cserver->configure ("collection", collection);
465 cserver->configure ("gsdlhome", gsdlhome);
466 cserver->configure ("collecthome", collecthome);
467 cservers.addcollectserver (cserver);
468 }
469}
470
471void collectset::remove_all_collections () {
472
473#ifdef ENABLE_MG
474 // first unload any cached mg databases
475 if (mgsearch != NULL) {
476 mgsearch->unload_database();
477 }
478#endif
479
480 // now delete the collection server objects
481 collectservermapclass::iterator here = cservers.begin();
482 collectservermapclass::iterator end = cservers.end();
483
484 while (here != end) {
485 if ((*here).second.c != NULL) {
486 delete (*here).second.c;
487 }
488 ++here;
489 }
490 cservers.clear();
491}
492
493void collectset::add_collection_group(const text_t& collection,
494 const text_t& gsdlhome,
495 const text_t& collecthome)
496{
497 text_tarray group;
498
499 text_t collect_group_dir = filename_cat (collecthome, collection);
500
501 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
502 if (collection_is_collect_group(collect_group_dir)) {
503 if (read_dir (collect_group_dir, group)) {
504
505 text_tarray::const_iterator thiscol = group.begin();
506 text_tarray::const_iterator endcol = group.end();
507
508 while (thiscol != endcol) {
509 // ignore the etc directory
510 if (*thiscol == "etc") {
511 ++thiscol;
512 continue;
513 }
514
515 //text_t group_col = filename_cat(collection,*thiscol);
516 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
517 text_t group_col = collection + "/" + *thiscol;
518 this->add_collection (group_col, gsdlhome, collecthome);
519
520 ++thiscol;
521 }
522 }
523 }
524}
525
526void collectset::add_all_collection_groups (const text_t& gsdlhome,
527 const text_t& collecthome)
528
529{
530 collectservermapclass::iterator here = cservers.begin();
531 collectservermapclass::iterator end = cservers.end();
532
533 while (here != end) {
534 text_t collection = (*here).second.c->get_collection_name();
535 this->add_collection_group(collection,gsdlhome,collecthome);
536
537 ++here;
538 }
539}
540
541
542// remove_collection deletes the collection server of collection.
543// This only needs to be called if a collectionserver is to be
544// removed while the library is running. The destructor function
545// cleans up all collectservers when the program exits.
546void collectset::remove_collection (const text_t &collection) {
547
548 // do nothing if no collection server exists for this collection
549 if (cservers.getcollectserver(collection) == NULL) return;
550
551#ifdef ENABLE_MG
552 // first unload any cached mg databases - we may need to do something
553 // similar to this for mgpp and lucene too
554 if (mgsearch != NULL) {
555 mgsearch->unload_database();
556 }
557#endif
558
559 // now delete the collection server object
560 collectservermapclass::iterator here = cservers.begin();
561 collectservermapclass::iterator end = cservers.end();
562
563 while (here != end) {
564 if ((*here).second.c != NULL && (*here).first == collection) {
565 delete (*here).second.c;
566 cservers.erase (here);
567 return;
568 }
569 ++here;
570 }
571}
572
573
574// remove_collection deletes the collection server of collection.
575// This only needs to be called if a collectionserver is to be
576// removed while the library is running. The destructor function
577// cleans up all collectservers when the program exits.
578void collectset::remove_collection (const text_t &collection, ostream &logout) {
579
580 remove_collection(collection);
581
582 outconvertclass text_t2ascii;
583 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
584 << collection << "\n";
585}
586
587void collectset::configure(const text_t &key, const text_tarray &cfgline)
588{
589 if ((key == "collection") || (key == "collectdir")) return;
590
591 collectservermapclass::iterator here = cservers.begin();
592 collectservermapclass::iterator end = cservers.end();
593
594 while (here != end) {
595 assert ((*here).second.c != NULL);
596 if ((*here).second.c != NULL) {
597 if (key == "collectinfo") {
598 if ((*here).first == cfgline[0]) {
599 if (cfgline.size()==3) {
600 (*here).second.c->configure ("gsdlhome", cfgline[1]);
601 (*here).second.c->configure ("gdbmhome", cfgline[2]);
602 }
603 else {
604 (*here).second.c->configure ("gsdlhome", cfgline[1]);
605 (*here).second.c->configure ("collecthome", cfgline[2]);
606 (*here).second.c->configure ("gdbmhome", cfgline[3]);
607 }
608 }
609 } else {
610 (*here).second.c->configure (key, cfgline);
611 }
612 }
613
614 ++here;
615 }
616}
617
618void collectset::getCollectionList (text_tarray &collist)
619{
620 collist.erase(collist.begin(),collist.end());
621
622 collectservermapclass::iterator here = cservers.begin();
623 collectservermapclass::iterator end = cservers.end();
624 while (here != end) {
625 assert ((*here).second.c != NULL);
626 if ((*here).second.c != NULL) {
627 collist.push_back ((*here).second.c->get_collection_name());
628 }
629 ++here;
630 }
631}
632
Note: See TracBrowser for help on using the repository browser.