source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 24111

Last change on this file since 24111 was 22067, checked in by ak19, 14 years ago
  1. More changes to makefiles: rm JDBMWrapper.jar and jdbm.jar on clean. 2. DB files (argdb, users, key, history) now not only for gdbm but to work with other db types like jdbm, sqlite and mssql.
  • Property svn:keywords set to Author Date Id Revision
File size: 17.4 KB
RevLine 
[1860]1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
[15402]27#include "collectset.h"
[1860]28#include "collectserver.h"
[15402]29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
[16895]31#include "gsdltools.h"
[15759]32#include "fileutil.h"
[1860]33#include "filter.h"
34#include "browsefilter.h"
[15759]35#include "sqlbrowsefilter.h"
[22050]36#include "sqlqueryfilter.h"
[1860]37#include "queryfilter.h"
[21324]38
39#ifdef ENABLE_MG
[1860]40#include "mgqueryfilter.h"
[21324]41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
[1860]44#include "mgppqueryfilter.h"
[21324]45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
[8028]48#include "lucenequeryfilter.h"
[15592]49#include "lucenesource.h"
[21324]50#endif
[8032]51
[1860]52#include <assert.h>
53
[21472]54#ifdef USE_GDBM
55#include "gdbmclass.h"
56#endif
57
58#ifdef USE_JDBM
59#include "jdbmnaiveclass.h"
60#endif
61
[15729]62#ifdef USE_SQLITE
63#include "sqlitedbclass.h"
64#endif
[1860]65
[17476]66#ifdef USE_MSSQL
67#include "mssqldbclass.h"
68#endif
69
70
[16312]71collectset::collectset (text_t& gsdlhome, text_t& collecthome)
72{
73 // gsdlhome and collecthome will be set as a result of calling this function
74 // collecthome will default to "<gsdlhome>/collect" if not explicitly
75 // specified in config file
[15729]76
[1860]77 text_tarray collections;
78
[21324]79#ifdef ENABLE_MG
[19365]80 mgsearch = NULL;
[21324]81#endif
82#ifdef ENABLE_MGPP
[19365]83 mgppsearch = NULL;
[21324]84#endif
85#ifdef ENABLE_LUCENE
[19365]86 lucenesearch = NULL;
[21324]87#endif
[19365]88
[1860]89 // get gsdlhome (if we fail the error will be picked up later -- in
90 // cgiwrapper)
[15002]91
[16312]92 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
[7302]93 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
[16312]94 if (read_dir (collecthome, collections)) {
[1860]95
[2344]96 text_tarray::const_iterator thiscol = collections.begin();
97 text_tarray::const_iterator endcol = collections.end();
[1860]98
[2344]99 while (thiscol != endcol) {
100 // ignore the modelcol
101 if (*thiscol == "modelcol") {
[9620]102 ++thiscol;
[2344]103 continue;
104 }
[1860]105
[16312]106 this->add_collection (*thiscol, gsdlhome, collecthome);
[1860]107
[9620]108 ++thiscol;
[2344]109 }
[15002]110
[16312]111 this->add_all_collection_groups(gsdlhome, collecthome);
[2344]112 }
113 }
[1860]114 }
[16895]115
116 set_gsdl_env_vars(gsdlhome);
[1860]117}
118
[17989]119
120collectset::collectset (text_t& httpprefix_arg)
121{
122 httpprefix = httpprefix_arg;
[19365]123
[21324]124#ifdef ENABLE_MG
[19365]125 mgsearch = NULL;
[21324]126#endif
127#ifdef ENABLE_MGPP
[19365]128 mgppsearch = NULL;
[21324]129#endif
130#ifdef ENABLE_LUCENE
[19365]131 lucenesearch = NULL;
[21324]132#endif
[19365]133
[17989]134}
135
[19365]136collectset::collectset ()
137{
[21324]138#ifdef ENABLE_MG
[19365]139 mgsearch = NULL;
[21324]140#endif
141#ifdef ENABLE_MGPP
[19365]142 mgppsearch = NULL;
[21324]143#endif
144#ifdef ENABLE_LUCENE
[19365]145 lucenesearch = NULL;
[21324]146#endif
[19365]147}
[17989]148
[1860]149collectset::~collectset () {
150 collectservermapclass::iterator here = cservers.begin();
151 collectservermapclass::iterator end = cservers.end();
152
153 while (here != end) {
154 if ((*here).second.c != NULL) {
155 delete (*here).second.c;
156 }
[9620]157 ++here;
[1860]158 }
159 cservers.clear();
160}
161
162bool collectset::init (ostream &logout) {
163 collectservermapclass::iterator here = cservers.begin();
164 collectservermapclass::iterator end = cservers.end();
165
166 while (here != end) {
167 assert ((*here).second.c != NULL);
168 if ((*here).second.c != NULL) {
169 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
170
171 // configure this collection server
172
173 // note that we read build.cfg before collect.cfg so that the indexmaps
174 // are available to decode defaultindex, defaultsubcollection, and
175 // defaultlanguage
[15002]176
177 bool failed_build_cfg = false;
[1860]178 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
[16312]179 configinfo.collecthome, configinfo.collection)) {
[15002]180 failed_build_cfg = true;
181
[1860]182 outconvertclass text_t2ascii;
183 logout << text_t2ascii
[16312]184 << "Warning: couldn't read build.cfg file for collection \""
185 << configinfo.collection << "\""
186 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
187 << " collecthome=\"" << configinfo.collecthome << "\"\n";
[1860]188 }
189
[15002]190 bool failed_collect_cfg = false;
[1860]191 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
[16312]192 configinfo.collecthome, configinfo.collection)) {
[15002]193 failed_collect_cfg = true;
[1860]194 outconvertclass text_t2ascii;
195 logout << text_t2ascii
196 << "Warning: couldn't read collect.cfg file for collection \""
[16312]197 << configinfo.collection << "\""
198 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
199 << " collecthome=\"" << configinfo.collecthome << "\"\n";
[15002]200 }
201
202
203 bool is_colgroup = (*here).second.c->is_collection_group();
204
205 if (failed_collect_cfg) {
[9620]206 ++here;
[1860]207 continue;
208 }
209
[15002]210 if (failed_build_cfg && (!is_colgroup)) {
211 ++here;
212 continue;
213 }
214 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
215
[1860]216 if (!(*here).second.c->init (logout)) return false;
217
218 (*here).second.c->configure("httpdomain",httpdomain);
219 (*here).second.c->configure("httpprefix",httpprefix);
220 }
[9620]221 ++here;
[1860]222 }
223
224 return true;
225}
226
227collectservermapclass collectset::servers()
228{ return cservers;
229}
230
[9030]231
[16312]232void collectset::add_all_collections(const text_t &gsdlhome,
233 const text_t& collecthome)
234{
[9030]235 text_tarray collections;
236
[16312]237 if (read_dir(collecthome, collections)) {
238
[9030]239 text_tarray::const_iterator thiscol = collections.begin();
240 text_tarray::const_iterator endcol = collections.end();
241
242 while (thiscol != endcol) {
243
244 // ignore the modelcol
245 if (*thiscol == "modelcol") {
[9620]246 ++thiscol;
[9030]247 continue;
248 }
249
[15421]250 // create collection server for this collection
[16312]251 this->add_collection (*thiscol, gsdlhome, collecthome);
[9030]252
[9620]253 ++thiscol;
[9030]254 }
[15002]255
[16312]256 this->add_all_collection_groups(gsdlhome,collecthome);
[9030]257 }
258}
259
[21453]260bool collectset::collection_is_collect_group (const text_t& collect_dir)
261{
262 text_t is_collect_group_str = "false";
263 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
264
265 if (file_exists(collect_cfg)) {
266 char *collect_cfgc = collect_cfg.getcstr();
267 ifstream confin(collect_cfgc);
268
269 if (confin) {
270 text_tarray cfgline;
271
272 while (read_cfg_line(confin, cfgline) >= 0) {
273 if (cfgline.size() == 2) {
274 text_t key = cfgline[0];
275 cfgline.erase(cfgline.begin());
276 if (key == "collectgroup") {
277 is_collect_group_str = cfgline[0];
278 break;
279 }
280 }
281 }
282
283 confin.close();
284 }
285
286 delete []collect_cfgc;
287 }
288
289 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
290
291 return is_collect_group;
292}
293
294
[1860]295// add_collection sets up the collectionserver and calls
296// add_collectserver
[16312]297void collectset::add_collection (const text_t& collection,
298 const text_t& gsdlhome,
299 const text_t& collecthome)
300{
[8028]301 // read config file to see if built with mg, mgpp, or lucene
[1860]302 text_t buildtype = "mg"; // mg is default
[15729]303 text_t infodbtype = "gdbm"; // gdbm is default
[2545]304
[21453]305 this->remove_collection(collection);
306
[19806]307 collectserver *cserver = NULL;
308
[16312]309 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
[19806]310 if (file_exists (build_cfg)) {
311 char *build_cfgc = build_cfg.getcstr();
312 ifstream confin(build_cfgc);
313
314 if (confin) {
[21453]315 text_tarray cfgline;
316
[19806]317 while (read_cfg_line(confin, cfgline) >= 0) {
318 if (cfgline.size() == 2) {
[21453]319 text_t key = cfgline[0];
[19806]320 cfgline.erase(cfgline.begin());
321 if (key == "buildtype") {
322 buildtype = cfgline[0];
323 }
324 if (key == "infodbtype") {
325 infodbtype = cfgline[0];
326 }
[1860]327 }
328 }
[19806]329 confin.close();
[1860]330 }
[19806]331 delete []build_cfgc;
[1860]332
[19806]333 cserver = new collectserver();
[1860]334
[19806]335 // Create a dbclass of the correct type
336 dbclass *db_ptr = NULL;
[15729]337
[19806]338 if (infodbtype == "sqlite")
339 {
[22050]340#ifdef USE_SQLITE
[22067]341 sqlitedbclass *sql_db_ptr = new sqlitedbclass(gsdlhome);
[19806]342 db_ptr = sql_db_ptr;
[15759]343
[19806]344 // add a sql browse filter
345 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
346 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
347 cserver->add_filter (sqlbrowsefilter);
[22050]348
349 // add a sql query filter
350 sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
351 sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
352 cserver->add_filter (sqlqueryfilter);
353
354#else
355 cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
356 cerr << " this installation of Greenstone";
357#endif
[19806]358 }
[17476]359
[19806]360 if (infodbtype == "mssql")
361 {
[22050]362#ifdef USE_MSSQL
[22067]363 mssqldbclass *mssql_db_ptr = new mssqldbclass(gsdlhome);
[19806]364 db_ptr = mssql_db_ptr;
[15729]365
[19806]366 // add a sql browse filter
367 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
368 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
369 cserver->add_filter (sqlbrowsefilter);
[22050]370#else
371 cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
372 cerr << " this installation of Greenstone";
373#endif
[19806]374 }
[21415]375
376 if (infodbtype == "jdbm") {
377
[22050]378#ifdef USE_JDBM
[21415]379 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
380 db_ptr = jdbm_db_ptr;
[22050]381#else
382 cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
383 cerr << " this installation of Greenstone";
384#endif
[21415]385 }
[21472]386
[19806]387 // Use GDBM if the infodb type is empty or not one of the values above
388 if (db_ptr == NULL) {
[21472]389#ifdef USE_GDBM
[22067]390 db_ptr = new gdbmclass(gsdlhome);
[22050]391#else
392 cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
393 cerr << " this installation of Greenstone";
[21472]394#endif
[19806]395 }
[15729]396
[19806]397 // add a null filter
398 filterclass *filter = new filterclass ();
399 cserver->add_filter (filter);
400
401 // add a browse filter
402 browsefilterclass *browsefilter = new browsefilterclass();
403 browsefilter->set_db_ptr(db_ptr);
404 cserver->add_filter (browsefilter);
405
406 if (buildtype == "mg") {
[21324]407#ifdef ENABLE_MG
[19806]408 mgsearch = new mgsearchclass();
[1860]409
[19806]410 // add a query filter
411 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
412 queryfilter->set_db_ptr(db_ptr);
413 queryfilter->set_textsearchptr (mgsearch);
414 cserver->add_filter (queryfilter);
[1860]415
[19806]416 // add a mg source
417 mgsourceclass *mgsource = new mgsourceclass ();
418 mgsource->set_db_ptr(db_ptr);
419 mgsource->set_textsearchptr (mgsearch);
420 cserver->add_source (mgsource);
[21324]421#else
422 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
423#endif
[19806]424 }
425 else if (buildtype == "mgpp") {
[21324]426#ifdef ENABLE_MGPP
[19806]427 mgppsearch = new mgppsearchclass();
[1860]428
[19806]429 // add a query filter
430 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
431 queryfilter->set_db_ptr(db_ptr);
432 queryfilter->set_textsearchptr (mgppsearch);
433 cserver->add_filter (queryfilter);
[1860]434
[21324]435 // add a mgpp source
436 mgppsourceclass *mgppsource = new mgppsourceclass ();
437 mgppsource->set_db_ptr(db_ptr);
438 mgppsource->set_textsearchptr (mgppsearch);
439 cserver->add_source (mgppsource);
440#else
441 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
442#endif
[19806]443 }
444 else if (buildtype == "lucene") {
[21324]445#ifdef ENABLE_LUCENE
[19806]446 lucenesearch = new lucenesearchclass();
447 lucenesearch->set_gsdlhome(gsdlhome);
448
449 // add a query filter
450 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
451 queryfilter->set_db_ptr(db_ptr);
452 queryfilter->set_textsearchptr (lucenesearch);
453 cserver->add_filter (queryfilter);
454
455 // add a lucene source
456 lucenesourceclass *lucenesource = new lucenesourceclass ();
457 lucenesource->set_db_ptr(db_ptr);
458 lucenesource->set_textsearchptr (lucenesearch);
459 cserver->add_source (lucenesource);
[21324]460#else
461 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
462#endif
[19806]463 }
464 else {
[21324]465 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
[19806]466 }
467
[12246]468 }
[19806]469 else {
470 // see if it is a collectgroup col
[21453]471 text_t this_collect_dir = filename_cat(collecthome, collection);
472 if (collection_is_collect_group(this_collect_dir)) {
473 // by this point we know we will need a cserver
474 cserver = new collectserver();
[19806]475 }
[21453]476 // else not a collect group, or there was no collect.cfg
477 // => leave cserver as NULL so it will not be added into cservers
[1860]478 }
479
[21453]480 if (cserver != NULL) {
481 // inform collection server and everything it contains about its
482 // collection name
483 cserver->configure ("collection", collection);
484 cserver->configure ("gsdlhome", gsdlhome);
485 cserver->configure ("collecthome", collecthome);
486 cservers.addcollectserver (cserver);
487 }
[1860]488}
489
[9030]490void collectset::remove_all_collections () {
491
[21324]492#ifdef ENABLE_MG
[9030]493 // first unload any cached mg databases
494 if (mgsearch != NULL) {
495 mgsearch->unload_database();
496 }
[21324]497#endif
[9030]498
499 // now delete the collection server objects
500 collectservermapclass::iterator here = cservers.begin();
501 collectservermapclass::iterator end = cservers.end();
502
503 while (here != end) {
504 if ((*here).second.c != NULL) {
505 delete (*here).second.c;
506 }
[9620]507 ++here;
[9030]508 }
509 cservers.clear();
510}
511
[15002]512void collectset::add_collection_group(const text_t& collection,
[16312]513 const text_t& gsdlhome,
514 const text_t& collecthome)
[15002]515{
516 text_tarray group;
[9030]517
[16312]518 text_t collect_group_dir = filename_cat (collecthome, collection);
519
[15002]520 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
[21453]521 if (collection_is_collect_group(collect_group_dir)) {
[15002]522 if (read_dir (collect_group_dir, group)) {
523
524 text_tarray::const_iterator thiscol = group.begin();
525 text_tarray::const_iterator endcol = group.end();
526
527 while (thiscol != endcol) {
[18978]528 // ignore the etc directory
[15002]529 if (*thiscol == "etc") {
530 ++thiscol;
531 continue;
532 }
533
[18978]534 //text_t group_col = filename_cat(collection,*thiscol);
535 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
536 text_t group_col = collection + "/" + *thiscol;
[16312]537 this->add_collection (group_col, gsdlhome, collecthome);
[15002]538
539 ++thiscol;
540 }
541 }
542 }
543}
544
[16312]545void collectset::add_all_collection_groups (const text_t& gsdlhome,
546 const text_t& collecthome)
[15002]547
548{
549 collectservermapclass::iterator here = cservers.begin();
550 collectservermapclass::iterator end = cservers.end();
551
552 while (here != end) {
553 text_t collection = (*here).second.c->get_collection_name();
[16312]554 this->add_collection_group(collection,gsdlhome,collecthome);
[15002]555
556 ++here;
557 }
558}
559
560
[1860]561// remove_collection deletes the collection server of collection.
562// This only needs to be called if a collectionserver is to be
563// removed while the library is running. The destructor function
564// cleans up all collectservers when the program exits.
[15002]565void collectset::remove_collection (const text_t &collection) {
[2545]566
[4340]567 // do nothing if no collection server exists for this collection
568 if (cservers.getcollectserver(collection) == NULL) return;
569
[21324]570#ifdef ENABLE_MG
[2545]571 // first unload any cached mg databases - we may need to do something
[8028]572 // similar to this for mgpp and lucene too
[2545]573 if (mgsearch != NULL) {
574 mgsearch->unload_database();
575 }
[21324]576#endif
[2545]577
578 // now delete the collection server object
[1860]579 collectservermapclass::iterator here = cservers.begin();
580 collectservermapclass::iterator end = cservers.end();
581
582 while (here != end) {
583 if ((*here).second.c != NULL && (*here).first == collection) {
584 delete (*here).second.c;
585 cservers.erase (here);
586 return;
587 }
[9620]588 ++here;
[1860]589 }
[15002]590}
591
592
593// remove_collection deletes the collection server of collection.
594// This only needs to be called if a collectionserver is to be
595// removed while the library is running. The destructor function
596// cleans up all collectservers when the program exits.
597void collectset::remove_collection (const text_t &collection, ostream &logout) {
598
599 remove_collection(collection);
600
[1860]601 outconvertclass text_t2ascii;
[15421]602 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
[1860]603 << collection << "\n";
604}
605
606void collectset::configure(const text_t &key, const text_tarray &cfgline)
607{
[16312]608 if ((key == "collection") || (key == "collectdir")) return;
[1860]609
610 collectservermapclass::iterator here = cservers.begin();
611 collectservermapclass::iterator end = cservers.end();
612
613 while (here != end) {
614 assert ((*here).second.c != NULL);
615 if ((*here).second.c != NULL) {
616 if (key == "collectinfo") {
617 if ((*here).first == cfgline[0]) {
[16312]618 if (cfgline.size()==3) {
619 (*here).second.c->configure ("gsdlhome", cfgline[1]);
620 (*here).second.c->configure ("gdbmhome", cfgline[2]);
621 }
622 else {
623 (*here).second.c->configure ("gsdlhome", cfgline[1]);
624 (*here).second.c->configure ("collecthome", cfgline[2]);
625 (*here).second.c->configure ("gdbmhome", cfgline[3]);
626 }
[1860]627 }
628 } else {
629 (*here).second.c->configure (key, cfgline);
630 }
631 }
632
[9620]633 ++here;
[1860]634 }
635}
636
637void collectset::getCollectionList (text_tarray &collist)
638{
639 collist.erase(collist.begin(),collist.end());
640
641 collectservermapclass::iterator here = cservers.begin();
642 collectservermapclass::iterator end = cservers.end();
643 while (here != end) {
644 assert ((*here).second.c != NULL);
645 if ((*here).second.c != NULL) {
646 collist.push_back ((*here).second.c->get_collection_name());
647 }
[9620]648 ++here;
[1860]649 }
650}
[16312]651
Note: See TracBrowser for help on using the repository browser.