source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 21415

Last change on this file since 21415 was 21415, checked in by davidb, 14 years ago

Support for JDBM database backend added

  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "jdbmnaiveclass.h"
33#include "gsdltools.h"
34#include "fileutil.h"
35#include "filter.h"
36#include "browsefilter.h"
37#include "sqlbrowsefilter.h"
38#include "queryfilter.h"
39
40#ifdef ENABLE_MG
41#include "mgqueryfilter.h"
42#include "mgsource.h"
43#endif
44#ifdef ENABLE_MGPP
45#include "mgppqueryfilter.h"
46#include "mgppsource.h"
47#endif
48#ifdef ENABLE_LUCENE
49#include "lucenequeryfilter.h"
50#include "lucenesource.h"
51#endif
52
53#include <assert.h>
54
55#ifdef USE_SQLITE
56#include "sqlitedbclass.h"
57#endif
58
59#ifdef USE_MSSQL
60#include "mssqldbclass.h"
61#endif
62
63
64collectset::collectset (text_t& gsdlhome, text_t& collecthome)
65{
66 // gsdlhome and collecthome will be set as a result of calling this function
67 // collecthome will default to "<gsdlhome>/collect" if not explicitly
68 // specified in config file
69
70 text_tarray collections;
71
72#ifdef ENABLE_MG
73 mgsearch = NULL;
74#endif
75#ifdef ENABLE_MGPP
76 mgppsearch = NULL;
77#endif
78#ifdef ENABLE_LUCENE
79 lucenesearch = NULL;
80#endif
81
82 // get gsdlhome (if we fail the error will be picked up later -- in
83 // cgiwrapper)
84
85 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
86 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
87 if (read_dir (collecthome, collections)) {
88
89 text_tarray::const_iterator thiscol = collections.begin();
90 text_tarray::const_iterator endcol = collections.end();
91
92 while (thiscol != endcol) {
93 // ignore the modelcol
94 if (*thiscol == "modelcol") {
95 ++thiscol;
96 continue;
97 }
98
99 this->add_collection (*thiscol, gsdlhome, collecthome);
100
101 ++thiscol;
102 }
103
104 this->add_all_collection_groups(gsdlhome, collecthome);
105 }
106 }
107 }
108
109 set_gsdl_env_vars(gsdlhome);
110}
111
112
113collectset::collectset (text_t& httpprefix_arg)
114{
115 httpprefix = httpprefix_arg;
116
117#ifdef ENABLE_MG
118 mgsearch = NULL;
119#endif
120#ifdef ENABLE_MGPP
121 mgppsearch = NULL;
122#endif
123#ifdef ENABLE_LUCENE
124 lucenesearch = NULL;
125#endif
126
127}
128
129collectset::collectset ()
130{
131#ifdef ENABLE_MG
132 mgsearch = NULL;
133#endif
134#ifdef ENABLE_MGPP
135 mgppsearch = NULL;
136#endif
137#ifdef ENABLE_LUCENE
138 lucenesearch = NULL;
139#endif
140}
141
142collectset::~collectset () {
143 collectservermapclass::iterator here = cservers.begin();
144 collectservermapclass::iterator end = cservers.end();
145
146 while (here != end) {
147 if ((*here).second.c != NULL) {
148 delete (*here).second.c;
149 }
150 ++here;
151 }
152 cservers.clear();
153}
154
155bool collectset::init (ostream &logout) {
156 collectservermapclass::iterator here = cservers.begin();
157 collectservermapclass::iterator end = cservers.end();
158
159 while (here != end) {
160 assert ((*here).second.c != NULL);
161 if ((*here).second.c != NULL) {
162 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
163
164 // configure this collection server
165
166 // note that we read build.cfg before collect.cfg so that the indexmaps
167 // are available to decode defaultindex, defaultsubcollection, and
168 // defaultlanguage
169
170 bool failed_build_cfg = false;
171 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
172 configinfo.collecthome, configinfo.collection)) {
173 failed_build_cfg = true;
174
175 outconvertclass text_t2ascii;
176 logout << text_t2ascii
177 << "Warning: couldn't read build.cfg file for collection \""
178 << configinfo.collection << "\""
179 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
180 << " collecthome=\"" << configinfo.collecthome << "\"\n";
181 }
182
183 bool failed_collect_cfg = false;
184 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
185 configinfo.collecthome, configinfo.collection)) {
186 failed_collect_cfg = true;
187 outconvertclass text_t2ascii;
188 logout << text_t2ascii
189 << "Warning: couldn't read collect.cfg file for collection \""
190 << configinfo.collection << "\""
191 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
192 << " collecthome=\"" << configinfo.collecthome << "\"\n";
193 }
194
195
196 bool is_colgroup = (*here).second.c->is_collection_group();
197
198 if (failed_collect_cfg) {
199 ++here;
200 continue;
201 }
202
203 if (failed_build_cfg && (!is_colgroup)) {
204 ++here;
205 continue;
206 }
207 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
208
209 if (!(*here).second.c->init (logout)) return false;
210
211 (*here).second.c->configure("httpdomain",httpdomain);
212 (*here).second.c->configure("httpprefix",httpprefix);
213 }
214 ++here;
215 }
216
217 return true;
218}
219
220collectservermapclass collectset::servers()
221{ return cservers;
222}
223
224
225void collectset::add_all_collections(const text_t &gsdlhome,
226 const text_t& collecthome)
227{
228 text_tarray collections;
229
230 if (read_dir(collecthome, collections)) {
231
232 text_tarray::const_iterator thiscol = collections.begin();
233 text_tarray::const_iterator endcol = collections.end();
234
235 while (thiscol != endcol) {
236
237 // ignore the modelcol
238 if (*thiscol == "modelcol") {
239 ++thiscol;
240 continue;
241 }
242
243 // create collection server for this collection
244 this->add_collection (*thiscol, gsdlhome, collecthome);
245
246 ++thiscol;
247 }
248
249 this->add_all_collection_groups(gsdlhome,collecthome);
250 }
251}
252
253// add_collection sets up the collectionserver and calls
254// add_collectserver
255void collectset::add_collection (const text_t& collection,
256 const text_t& gsdlhome,
257 const text_t& collecthome)
258{
259
260 this->remove_collection(collection);
261
262 // read config file to see if built with mg, mgpp, or lucene
263 text_t buildtype = "mg"; // mg is default
264 text_t infodbtype = "gdbm"; // gdbm is default
265
266 collectserver *cserver = NULL;
267
268 text_tarray cfgline;
269 text_t key;
270
271 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
272 if (file_exists (build_cfg)) {
273 char *build_cfgc = build_cfg.getcstr();
274 ifstream confin(build_cfgc);
275
276 if (confin) {
277 while (read_cfg_line(confin, cfgline) >= 0) {
278 if (cfgline.size() == 2) {
279 key = cfgline[0];
280 cfgline.erase(cfgline.begin());
281 if (key == "buildtype") {
282 buildtype = cfgline[0];
283 }
284 if (key == "infodbtype") {
285 infodbtype = cfgline[0];
286 }
287 }
288 }
289 confin.close();
290 }
291 delete []build_cfgc;
292
293 cserver = new collectserver();
294
295 // Create a dbclass of the correct type
296 dbclass *db_ptr = NULL;
297
298#ifdef USE_SQLITE
299 if (infodbtype == "sqlite")
300 {
301 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
302 db_ptr = sql_db_ptr;
303
304 // add a sql browse filter
305 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
306 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
307 cserver->add_filter (sqlbrowsefilter);
308 }
309#endif
310
311#ifdef USE_MSSQL
312 if (infodbtype == "mssql")
313 {
314 mssqldbclass *mssql_db_ptr = new mssqldbclass();
315 db_ptr = mssql_db_ptr;
316
317 // add a sql browse filter
318 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
319 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
320 cserver->add_filter (sqlbrowsefilter);
321 }
322#endif
323
324 if (infodbtype == "jdbm") {
325
326 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
327 db_ptr = jdbm_db_ptr;
328 }
329
330 // Use GDBM if the infodb type is empty or not one of the values above
331 if (db_ptr == NULL) {
332 db_ptr = new gdbmclass();
333 }
334
335 // add a null filter
336 filterclass *filter = new filterclass ();
337 cserver->add_filter (filter);
338
339 // add a browse filter
340 browsefilterclass *browsefilter = new browsefilterclass();
341 browsefilter->set_db_ptr(db_ptr);
342 cserver->add_filter (browsefilter);
343
344 if (buildtype == "mg") {
345#ifdef ENABLE_MG
346 mgsearch = new mgsearchclass();
347
348 // add a query filter
349 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
350 queryfilter->set_db_ptr(db_ptr);
351 queryfilter->set_textsearchptr (mgsearch);
352 cserver->add_filter (queryfilter);
353
354 // add a mg source
355 mgsourceclass *mgsource = new mgsourceclass ();
356 mgsource->set_db_ptr(db_ptr);
357 mgsource->set_textsearchptr (mgsearch);
358 cserver->add_source (mgsource);
359#else
360 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
361#endif
362 }
363 else if (buildtype == "mgpp") {
364#ifdef ENABLE_MGPP
365 mgppsearch = new mgppsearchclass();
366
367 // add a query filter
368 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
369 queryfilter->set_db_ptr(db_ptr);
370 queryfilter->set_textsearchptr (mgppsearch);
371 cserver->add_filter (queryfilter);
372
373 // add a mgpp source
374 mgppsourceclass *mgppsource = new mgppsourceclass ();
375 mgppsource->set_db_ptr(db_ptr);
376 mgppsource->set_textsearchptr (mgppsearch);
377 cserver->add_source (mgppsource);
378#else
379 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
380#endif
381 }
382 else if (buildtype == "lucene") {
383#ifdef ENABLE_LUCENE
384 lucenesearch = new lucenesearchclass();
385 lucenesearch->set_gsdlhome(gsdlhome);
386
387 // add a query filter
388 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
389 queryfilter->set_db_ptr(db_ptr);
390 queryfilter->set_textsearchptr (lucenesearch);
391 cserver->add_filter (queryfilter);
392
393 // add a lucene source
394 lucenesourceclass *lucenesource = new lucenesourceclass ();
395 lucenesource->set_db_ptr(db_ptr);
396 lucenesource->set_textsearchptr (lucenesearch);
397 cserver->add_source (lucenesource);
398#else
399 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
400#endif
401 }
402 else {
403 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
404 }
405
406 }
407 else {
408 // see if it is a collectgroup col
409
410
411 // This routine essentially exists twice in the code now (see add_collection_group)
412 // factor out as support routine
413
414 text_t collect_cfg = filename_cat(collecthome, collection, "etc", "collect.cfg");
415 if (file_exists (collect_cfg)) {
416 char *collect_cfgc = collect_cfg.getcstr();
417 ifstream confin(collect_cfgc);
418
419 text_t is_collect_group = "false";
420
421 if (confin) {
422 while (read_cfg_line(confin, cfgline) >= 0) {
423 if (cfgline.size() == 2) {
424 key = cfgline[0];
425
426 cfgline.erase(cfgline.begin());
427 if (key == "collectgroup") {
428 is_collect_group = cfgline[0];
429 }
430 }
431 }
432 confin.close();
433 }
434 delete []collect_cfgc;
435
436 if (is_collect_group != "true") {
437 // an unbuilt leaf collection
438 return;
439 }
440
441 // by this point we know we will need a cserver
442 cserver = new collectserver();
443
444 }
445 else {
446 // no collect.cfg => filter it out from list of collections added
447 return;
448 }
449 }
450
451 // inform collection server and everything it contains about its
452 // collection name
453 cserver->configure ("collection", collection);
454 cserver->configure ("gsdlhome", gsdlhome);
455 cserver->configure ("collecthome", collecthome);
456 cservers.addcollectserver (cserver);
457}
458
459void collectset::remove_all_collections () {
460
461#ifdef ENABLE_MG
462 // first unload any cached mg databases
463 if (mgsearch != NULL) {
464 mgsearch->unload_database();
465 }
466#endif
467
468 // now delete the collection server objects
469 collectservermapclass::iterator here = cservers.begin();
470 collectservermapclass::iterator end = cservers.end();
471
472 while (here != end) {
473 if ((*here).second.c != NULL) {
474 delete (*here).second.c;
475 }
476 ++here;
477 }
478 cservers.clear();
479}
480
481void collectset::add_collection_group(const text_t& collection,
482 const text_t& gsdlhome,
483 const text_t& collecthome)
484{
485 text_tarray group;
486
487 text_t collect_group_dir = filename_cat (collecthome, collection);
488
489 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
490 text_t is_collect_group;
491 text_tarray cfgline;
492 text_t key;
493 text_t collect_cfg = filename_cat(collect_group_dir, "etc", "collect.cfg");
494 char *collect_cfgc = collect_cfg.getcstr();
495 ifstream confin(collect_cfgc);
496
497 if (confin) {
498 while (read_cfg_line(confin, cfgline) >= 0) {
499 if (cfgline.size() == 2) {
500 key = cfgline[0];
501 cfgline.erase(cfgline.begin());
502 if (key == "collectgroup") {
503 is_collect_group = cfgline[0];
504 break;
505 }
506 }
507 }
508 confin.close();
509 }
510 delete []collect_cfgc;
511
512 if (is_collect_group == "true") {
513 if (read_dir (collect_group_dir, group)) {
514
515 text_tarray::const_iterator thiscol = group.begin();
516 text_tarray::const_iterator endcol = group.end();
517
518 while (thiscol != endcol) {
519 // ignore the etc directory
520 if (*thiscol == "etc") {
521 ++thiscol;
522 continue;
523 }
524
525 //text_t group_col = filename_cat(collection,*thiscol);
526 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
527 text_t group_col = collection + "/" + *thiscol;
528 this->add_collection (group_col, gsdlhome, collecthome);
529
530 ++thiscol;
531 }
532 }
533 }
534}
535
536void collectset::add_all_collection_groups (const text_t& gsdlhome,
537 const text_t& collecthome)
538
539{
540 collectservermapclass::iterator here = cservers.begin();
541 collectservermapclass::iterator end = cservers.end();
542
543 while (here != end) {
544 text_t collection = (*here).second.c->get_collection_name();
545 this->add_collection_group(collection,gsdlhome,collecthome);
546
547 ++here;
548 }
549}
550
551
552// remove_collection deletes the collection server of collection.
553// This only needs to be called if a collectionserver is to be
554// removed while the library is running. The destructor function
555// cleans up all collectservers when the program exits.
556void collectset::remove_collection (const text_t &collection) {
557
558 // do nothing if no collection server exists for this collection
559 if (cservers.getcollectserver(collection) == NULL) return;
560
561#ifdef ENABLE_MG
562 // first unload any cached mg databases - we may need to do something
563 // similar to this for mgpp and lucene too
564 if (mgsearch != NULL) {
565 mgsearch->unload_database();
566 }
567#endif
568
569 // now delete the collection server object
570 collectservermapclass::iterator here = cservers.begin();
571 collectservermapclass::iterator end = cservers.end();
572
573 while (here != end) {
574 if ((*here).second.c != NULL && (*here).first == collection) {
575 delete (*here).second.c;
576 cservers.erase (here);
577 return;
578 }
579 ++here;
580 }
581}
582
583
584// remove_collection deletes the collection server of collection.
585// This only needs to be called if a collectionserver is to be
586// removed while the library is running. The destructor function
587// cleans up all collectservers when the program exits.
588void collectset::remove_collection (const text_t &collection, ostream &logout) {
589
590 remove_collection(collection);
591
592 outconvertclass text_t2ascii;
593 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
594 << collection << "\n";
595}
596
597void collectset::configure(const text_t &key, const text_tarray &cfgline)
598{
599 if ((key == "collection") || (key == "collectdir")) return;
600
601 collectservermapclass::iterator here = cservers.begin();
602 collectservermapclass::iterator end = cservers.end();
603
604 while (here != end) {
605 assert ((*here).second.c != NULL);
606 if ((*here).second.c != NULL) {
607 if (key == "collectinfo") {
608 if ((*here).first == cfgline[0]) {
609 if (cfgline.size()==3) {
610 (*here).second.c->configure ("gsdlhome", cfgline[1]);
611 (*here).second.c->configure ("gdbmhome", cfgline[2]);
612 }
613 else {
614 (*here).second.c->configure ("gsdlhome", cfgline[1]);
615 (*here).second.c->configure ("collecthome", cfgline[2]);
616 (*here).second.c->configure ("gdbmhome", cfgline[3]);
617 }
618 }
619 } else {
620 (*here).second.c->configure (key, cfgline);
621 }
622 }
623
624 ++here;
625 }
626}
627
628void collectset::getCollectionList (text_tarray &collist)
629{
630 collist.erase(collist.begin(),collist.end());
631
632 collectservermapclass::iterator here = cservers.begin();
633 collectservermapclass::iterator end = cservers.end();
634 while (here != end) {
635 assert ((*here).second.c != NULL);
636 if ((*here).second.c != NULL) {
637 collist.push_back ((*here).second.c->get_collection_name());
638 }
639 ++here;
640 }
641}
642
Note: See TracBrowser for help on using the repository browser.