source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 21324

Last change on this file since 21324 was 21324, checked in by ak19, 14 years ago

Changes to makefiles, configure files, and source code to work with the new configure flags that allow indexers to be individually compiled up by setting each indexer to be enabled or disabled (enable-mg, enable-mgpp, enable-lucene)

  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "gsdltools.h"
33#include "fileutil.h"
34#include "filter.h"
35#include "browsefilter.h"
36#include "sqlbrowsefilter.h"
37#include "queryfilter.h"
38
39#ifdef ENABLE_MG
40#include "mgqueryfilter.h"
41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
44#include "mgppqueryfilter.h"
45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
48#include "lucenequeryfilter.h"
49#include "lucenesource.h"
50#endif
51
52#include <assert.h>
53
54#ifdef USE_SQLITE
55#include "sqlitedbclass.h"
56#endif
57
58#ifdef USE_MSSQL
59#include "mssqldbclass.h"
60#endif
61
62
63collectset::collectset (text_t& gsdlhome, text_t& collecthome)
64{
65 // gsdlhome and collecthome will be set as a result of calling this function
66 // collecthome will default to "<gsdlhome>/collect" if not explicitly
67 // specified in config file
68
69 text_tarray collections;
70
71#ifdef ENABLE_MG
72 mgsearch = NULL;
73#endif
74#ifdef ENABLE_MGPP
75 mgppsearch = NULL;
76#endif
77#ifdef ENABLE_LUCENE
78 lucenesearch = NULL;
79#endif
80
81 // get gsdlhome (if we fail the error will be picked up later -- in
82 // cgiwrapper)
83
84 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
85 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
86 if (read_dir (collecthome, collections)) {
87
88 text_tarray::const_iterator thiscol = collections.begin();
89 text_tarray::const_iterator endcol = collections.end();
90
91 while (thiscol != endcol) {
92 // ignore the modelcol
93 if (*thiscol == "modelcol") {
94 ++thiscol;
95 continue;
96 }
97
98 this->add_collection (*thiscol, gsdlhome, collecthome);
99
100 ++thiscol;
101 }
102
103 this->add_all_collection_groups(gsdlhome, collecthome);
104 }
105 }
106 }
107
108 set_gsdl_env_vars(gsdlhome);
109}
110
111
112collectset::collectset (text_t& httpprefix_arg)
113{
114 httpprefix = httpprefix_arg;
115
116#ifdef ENABLE_MG
117 mgsearch = NULL;
118#endif
119#ifdef ENABLE_MGPP
120 mgppsearch = NULL;
121#endif
122#ifdef ENABLE_LUCENE
123 lucenesearch = NULL;
124#endif
125
126}
127
128collectset::collectset ()
129{
130#ifdef ENABLE_MG
131 mgsearch = NULL;
132#endif
133#ifdef ENABLE_MGPP
134 mgppsearch = NULL;
135#endif
136#ifdef ENABLE_LUCENE
137 lucenesearch = NULL;
138#endif
139}
140
141collectset::~collectset () {
142 collectservermapclass::iterator here = cservers.begin();
143 collectservermapclass::iterator end = cservers.end();
144
145 while (here != end) {
146 if ((*here).second.c != NULL) {
147 delete (*here).second.c;
148 }
149 ++here;
150 }
151 cservers.clear();
152}
153
154bool collectset::init (ostream &logout) {
155 collectservermapclass::iterator here = cservers.begin();
156 collectservermapclass::iterator end = cservers.end();
157
158 while (here != end) {
159 assert ((*here).second.c != NULL);
160 if ((*here).second.c != NULL) {
161 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
162
163 // configure this collection server
164
165 // note that we read build.cfg before collect.cfg so that the indexmaps
166 // are available to decode defaultindex, defaultsubcollection, and
167 // defaultlanguage
168
169 bool failed_build_cfg = false;
170 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
171 configinfo.collecthome, configinfo.collection)) {
172 failed_build_cfg = true;
173
174 outconvertclass text_t2ascii;
175 logout << text_t2ascii
176 << "Warning: couldn't read build.cfg file for collection \""
177 << configinfo.collection << "\""
178 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
179 << " collecthome=\"" << configinfo.collecthome << "\"\n";
180 }
181
182 bool failed_collect_cfg = false;
183 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
184 configinfo.collecthome, configinfo.collection)) {
185 failed_collect_cfg = true;
186 outconvertclass text_t2ascii;
187 logout << text_t2ascii
188 << "Warning: couldn't read collect.cfg file for collection \""
189 << configinfo.collection << "\""
190 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
191 << " collecthome=\"" << configinfo.collecthome << "\"\n";
192 }
193
194
195 bool is_colgroup = (*here).second.c->is_collection_group();
196
197 if (failed_collect_cfg) {
198 ++here;
199 continue;
200 }
201
202 if (failed_build_cfg && (!is_colgroup)) {
203 ++here;
204 continue;
205 }
206 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
207
208 if (!(*here).second.c->init (logout)) return false;
209
210 (*here).second.c->configure("httpdomain",httpdomain);
211 (*here).second.c->configure("httpprefix",httpprefix);
212 }
213 ++here;
214 }
215
216 return true;
217}
218
219collectservermapclass collectset::servers()
220{ return cservers;
221}
222
223
224void collectset::add_all_collections(const text_t &gsdlhome,
225 const text_t& collecthome)
226{
227 text_tarray collections;
228
229 if (read_dir(collecthome, collections)) {
230
231 text_tarray::const_iterator thiscol = collections.begin();
232 text_tarray::const_iterator endcol = collections.end();
233
234 while (thiscol != endcol) {
235
236 // ignore the modelcol
237 if (*thiscol == "modelcol") {
238 ++thiscol;
239 continue;
240 }
241
242 // create collection server for this collection
243 this->add_collection (*thiscol, gsdlhome, collecthome);
244
245 ++thiscol;
246 }
247
248 this->add_all_collection_groups(gsdlhome,collecthome);
249 }
250}
251
252// add_collection sets up the collectionserver and calls
253// add_collectserver
254void collectset::add_collection (const text_t& collection,
255 const text_t& gsdlhome,
256 const text_t& collecthome)
257{
258
259 this->remove_collection(collection);
260
261 // read config file to see if built with mg, mgpp, or lucene
262 text_t buildtype = "mg"; // mg is default
263 text_t infodbtype = "gdbm"; // gdbm is default
264
265 collectserver *cserver = NULL;
266
267 text_tarray cfgline;
268 text_t key;
269
270 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
271 if (file_exists (build_cfg)) {
272 char *build_cfgc = build_cfg.getcstr();
273 ifstream confin(build_cfgc);
274
275 if (confin) {
276 while (read_cfg_line(confin, cfgline) >= 0) {
277 if (cfgline.size() == 2) {
278 key = cfgline[0];
279 cfgline.erase(cfgline.begin());
280 if (key == "buildtype") {
281 buildtype = cfgline[0];
282 }
283 if (key == "infodbtype") {
284 infodbtype = cfgline[0];
285 }
286 }
287 }
288 confin.close();
289 }
290 delete []build_cfgc;
291
292 cserver = new collectserver();
293
294 // Create a dbclass of the correct type
295 dbclass *db_ptr = NULL;
296
297#ifdef USE_SQLITE
298 if (infodbtype == "sqlite")
299 {
300 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
301 db_ptr = sql_db_ptr;
302
303 // add a sql browse filter
304 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
305 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
306 cserver->add_filter (sqlbrowsefilter);
307 }
308#endif
309
310#ifdef USE_MSSQL
311 if (infodbtype == "mssql")
312 {
313 mssqldbclass *mssql_db_ptr = new mssqldbclass();
314 db_ptr = mssql_db_ptr;
315
316 // add a sql browse filter
317 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
318 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
319 cserver->add_filter (sqlbrowsefilter);
320 }
321#endif
322
323 // Use GDBM if the infodb type is empty or not one of the values above
324 if (db_ptr == NULL) {
325 db_ptr = new gdbmclass();
326 }
327
328 // add a null filter
329 filterclass *filter = new filterclass ();
330 cserver->add_filter (filter);
331
332 // add a browse filter
333 browsefilterclass *browsefilter = new browsefilterclass();
334 browsefilter->set_db_ptr(db_ptr);
335 cserver->add_filter (browsefilter);
336
337 if (buildtype == "mg") {
338#ifdef ENABLE_MG
339 mgsearch = new mgsearchclass();
340
341 // add a query filter
342 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
343 queryfilter->set_db_ptr(db_ptr);
344 queryfilter->set_textsearchptr (mgsearch);
345 cserver->add_filter (queryfilter);
346
347 // add a mg source
348 mgsourceclass *mgsource = new mgsourceclass ();
349 mgsource->set_db_ptr(db_ptr);
350 mgsource->set_textsearchptr (mgsearch);
351 cserver->add_source (mgsource);
352#else
353 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
354#endif
355 }
356 else if (buildtype == "mgpp") {
357#ifdef ENABLE_MGPP
358 mgppsearch = new mgppsearchclass();
359
360 // add a query filter
361 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
362 queryfilter->set_db_ptr(db_ptr);
363 queryfilter->set_textsearchptr (mgppsearch);
364 cserver->add_filter (queryfilter);
365
366 // add a mgpp source
367 mgppsourceclass *mgppsource = new mgppsourceclass ();
368 mgppsource->set_db_ptr(db_ptr);
369 mgppsource->set_textsearchptr (mgppsearch);
370 cserver->add_source (mgppsource);
371#else
372 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
373#endif
374 }
375 else if (buildtype == "lucene") {
376#ifdef ENABLE_LUCENE
377 lucenesearch = new lucenesearchclass();
378 lucenesearch->set_gsdlhome(gsdlhome);
379
380 // add a query filter
381 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
382 queryfilter->set_db_ptr(db_ptr);
383 queryfilter->set_textsearchptr (lucenesearch);
384 cserver->add_filter (queryfilter);
385
386 // add a lucene source
387 lucenesourceclass *lucenesource = new lucenesourceclass ();
388 lucenesource->set_db_ptr(db_ptr);
389 lucenesource->set_textsearchptr (lucenesearch);
390 cserver->add_source (lucenesource);
391#else
392 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
393#endif
394 }
395 else {
396 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
397 }
398
399 }
400 else {
401 // see if it is a collectgroup col
402
403
404 // This routine essentially exists twice in the code now (see add_collection_group)
405 // factor out as support routine
406
407 text_t collect_cfg = filename_cat(collecthome, collection, "etc", "collect.cfg");
408 if (file_exists (collect_cfg)) {
409 char *collect_cfgc = collect_cfg.getcstr();
410 ifstream confin(collect_cfgc);
411
412 text_t is_collect_group = "false";
413
414 if (confin) {
415 while (read_cfg_line(confin, cfgline) >= 0) {
416 if (cfgline.size() == 2) {
417 key = cfgline[0];
418
419 cfgline.erase(cfgline.begin());
420 if (key == "collectgroup") {
421 is_collect_group = cfgline[0];
422 }
423 }
424 }
425 confin.close();
426 }
427 delete []collect_cfgc;
428
429 if (is_collect_group != "true") {
430 // an unbuilt leaf collection
431 return;
432 }
433
434 // by this point we know we will need a cserver
435 cserver = new collectserver();
436
437 }
438 else {
439 // no collect.cfg => filter it out from list of collections added
440 return;
441 }
442 }
443
444 // inform collection server and everything it contains about its
445 // collection name
446 cserver->configure ("collection", collection);
447 cserver->configure ("gsdlhome", gsdlhome);
448 cserver->configure ("collecthome", collecthome);
449 cservers.addcollectserver (cserver);
450}
451
452void collectset::remove_all_collections () {
453
454#ifdef ENABLE_MG
455 // first unload any cached mg databases
456 if (mgsearch != NULL) {
457 mgsearch->unload_database();
458 }
459#endif
460
461 // now delete the collection server objects
462 collectservermapclass::iterator here = cservers.begin();
463 collectservermapclass::iterator end = cservers.end();
464
465 while (here != end) {
466 if ((*here).second.c != NULL) {
467 delete (*here).second.c;
468 }
469 ++here;
470 }
471 cservers.clear();
472}
473
474void collectset::add_collection_group(const text_t& collection,
475 const text_t& gsdlhome,
476 const text_t& collecthome)
477{
478 text_tarray group;
479
480 text_t collect_group_dir = filename_cat (collecthome, collection);
481
482 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
483 text_t is_collect_group;
484 text_tarray cfgline;
485 text_t key;
486 text_t collect_cfg = filename_cat(collect_group_dir, "etc", "collect.cfg");
487 char *collect_cfgc = collect_cfg.getcstr();
488 ifstream confin(collect_cfgc);
489
490 if (confin) {
491 while (read_cfg_line(confin, cfgline) >= 0) {
492 if (cfgline.size() == 2) {
493 key = cfgline[0];
494 cfgline.erase(cfgline.begin());
495 if (key == "collectgroup") {
496 is_collect_group = cfgline[0];
497 break;
498 }
499 }
500 }
501 confin.close();
502 }
503 delete []collect_cfgc;
504
505 if (is_collect_group == "true") {
506 if (read_dir (collect_group_dir, group)) {
507
508 text_tarray::const_iterator thiscol = group.begin();
509 text_tarray::const_iterator endcol = group.end();
510
511 while (thiscol != endcol) {
512 // ignore the etc directory
513 if (*thiscol == "etc") {
514 ++thiscol;
515 continue;
516 }
517
518 //text_t group_col = filename_cat(collection,*thiscol);
519 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
520 text_t group_col = collection + "/" + *thiscol;
521 this->add_collection (group_col, gsdlhome, collecthome);
522
523 ++thiscol;
524 }
525 }
526 }
527}
528
529void collectset::add_all_collection_groups (const text_t& gsdlhome,
530 const text_t& collecthome)
531
532{
533 collectservermapclass::iterator here = cservers.begin();
534 collectservermapclass::iterator end = cservers.end();
535
536 while (here != end) {
537 text_t collection = (*here).second.c->get_collection_name();
538 this->add_collection_group(collection,gsdlhome,collecthome);
539
540 ++here;
541 }
542}
543
544
545// remove_collection deletes the collection server of collection.
546// This only needs to be called if a collectionserver is to be
547// removed while the library is running. The destructor function
548// cleans up all collectservers when the program exits.
549void collectset::remove_collection (const text_t &collection) {
550
551 // do nothing if no collection server exists for this collection
552 if (cservers.getcollectserver(collection) == NULL) return;
553
554#ifdef ENABLE_MG
555 // first unload any cached mg databases - we may need to do something
556 // similar to this for mgpp and lucene too
557 if (mgsearch != NULL) {
558 mgsearch->unload_database();
559 }
560#endif
561
562 // now delete the collection server object
563 collectservermapclass::iterator here = cservers.begin();
564 collectservermapclass::iterator end = cservers.end();
565
566 while (here != end) {
567 if ((*here).second.c != NULL && (*here).first == collection) {
568 delete (*here).second.c;
569 cservers.erase (here);
570 return;
571 }
572 ++here;
573 }
574}
575
576
577// remove_collection deletes the collection server of collection.
578// This only needs to be called if a collectionserver is to be
579// removed while the library is running. The destructor function
580// cleans up all collectservers when the program exits.
581void collectset::remove_collection (const text_t &collection, ostream &logout) {
582
583 remove_collection(collection);
584
585 outconvertclass text_t2ascii;
586 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
587 << collection << "\n";
588}
589
590void collectset::configure(const text_t &key, const text_tarray &cfgline)
591{
592 if ((key == "collection") || (key == "collectdir")) return;
593
594 collectservermapclass::iterator here = cservers.begin();
595 collectservermapclass::iterator end = cservers.end();
596
597 while (here != end) {
598 assert ((*here).second.c != NULL);
599 if ((*here).second.c != NULL) {
600 if (key == "collectinfo") {
601 if ((*here).first == cfgline[0]) {
602 if (cfgline.size()==3) {
603 (*here).second.c->configure ("gsdlhome", cfgline[1]);
604 (*here).second.c->configure ("gdbmhome", cfgline[2]);
605 }
606 else {
607 (*here).second.c->configure ("gsdlhome", cfgline[1]);
608 (*here).second.c->configure ("collecthome", cfgline[2]);
609 (*here).second.c->configure ("gdbmhome", cfgline[3]);
610 }
611 }
612 } else {
613 (*here).second.c->configure (key, cfgline);
614 }
615 }
616
617 ++here;
618 }
619}
620
621void collectset::getCollectionList (text_tarray &collist)
622{
623 collist.erase(collist.begin(),collist.end());
624
625 collectservermapclass::iterator here = cservers.begin();
626 collectservermapclass::iterator end = cservers.end();
627 while (here != end) {
628 assert ((*here).second.c != NULL);
629 if ((*here).second.c != NULL) {
630 collist.push_back ((*here).second.c->get_collection_name());
631 }
632 ++here;
633 }
634}
635
Note: See TracBrowser for help on using the repository browser.