source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 21453

Last change on this file since 21453 was 21453, checked in by ak19, 14 years ago

Dr Bainbridge improved some code by adding the helper method collection_is_collect_group().

  • Property svn:keywords set to Author Date Id Revision
File size: 16.5 KB
RevLine 
[1860]1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
[15402]27#include "collectset.h"
[1860]28#include "collectserver.h"
[15402]29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
[15584]31#include "gdbmclass.h"
[21415]32#include "jdbmnaiveclass.h"
[16895]33#include "gsdltools.h"
[15759]34#include "fileutil.h"
[1860]35#include "filter.h"
36#include "browsefilter.h"
[15759]37#include "sqlbrowsefilter.h"
[1860]38#include "queryfilter.h"
[21324]39
40#ifdef ENABLE_MG
[1860]41#include "mgqueryfilter.h"
[21324]42#include "mgsource.h"
43#endif
44#ifdef ENABLE_MGPP
[1860]45#include "mgppqueryfilter.h"
[21324]46#include "mgppsource.h"
47#endif
48#ifdef ENABLE_LUCENE
[8028]49#include "lucenequeryfilter.h"
[15592]50#include "lucenesource.h"
[21324]51#endif
[8032]52
[1860]53#include <assert.h>
54
[15729]55#ifdef USE_SQLITE
56#include "sqlitedbclass.h"
57#endif
[1860]58
[17476]59#ifdef USE_MSSQL
60#include "mssqldbclass.h"
61#endif
62
63
[16312]64collectset::collectset (text_t& gsdlhome, text_t& collecthome)
65{
66 // gsdlhome and collecthome will be set as a result of calling this function
67 // collecthome will default to "<gsdlhome>/collect" if not explicitly
68 // specified in config file
[15729]69
[1860]70 text_tarray collections;
71
[21324]72#ifdef ENABLE_MG
[19365]73 mgsearch = NULL;
[21324]74#endif
75#ifdef ENABLE_MGPP
[19365]76 mgppsearch = NULL;
[21324]77#endif
78#ifdef ENABLE_LUCENE
[19365]79 lucenesearch = NULL;
[21324]80#endif
[19365]81
[1860]82 // get gsdlhome (if we fail the error will be picked up later -- in
83 // cgiwrapper)
[15002]84
[16312]85 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
[7302]86 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
[16312]87 if (read_dir (collecthome, collections)) {
[1860]88
[2344]89 text_tarray::const_iterator thiscol = collections.begin();
90 text_tarray::const_iterator endcol = collections.end();
[1860]91
[2344]92 while (thiscol != endcol) {
93 // ignore the modelcol
94 if (*thiscol == "modelcol") {
[9620]95 ++thiscol;
[2344]96 continue;
97 }
[1860]98
[16312]99 this->add_collection (*thiscol, gsdlhome, collecthome);
[1860]100
[9620]101 ++thiscol;
[2344]102 }
[15002]103
[16312]104 this->add_all_collection_groups(gsdlhome, collecthome);
[2344]105 }
106 }
[1860]107 }
[16895]108
109 set_gsdl_env_vars(gsdlhome);
[1860]110}
111
[17989]112
113collectset::collectset (text_t& httpprefix_arg)
114{
115 httpprefix = httpprefix_arg;
[19365]116
[21324]117#ifdef ENABLE_MG
[19365]118 mgsearch = NULL;
[21324]119#endif
120#ifdef ENABLE_MGPP
[19365]121 mgppsearch = NULL;
[21324]122#endif
123#ifdef ENABLE_LUCENE
[19365]124 lucenesearch = NULL;
[21324]125#endif
[19365]126
[17989]127}
128
[19365]129collectset::collectset ()
130{
[21324]131#ifdef ENABLE_MG
[19365]132 mgsearch = NULL;
[21324]133#endif
134#ifdef ENABLE_MGPP
[19365]135 mgppsearch = NULL;
[21324]136#endif
137#ifdef ENABLE_LUCENE
[19365]138 lucenesearch = NULL;
[21324]139#endif
[19365]140}
[17989]141
[1860]142collectset::~collectset () {
143 collectservermapclass::iterator here = cservers.begin();
144 collectservermapclass::iterator end = cservers.end();
145
146 while (here != end) {
147 if ((*here).second.c != NULL) {
148 delete (*here).second.c;
149 }
[9620]150 ++here;
[1860]151 }
152 cservers.clear();
153}
154
155bool collectset::init (ostream &logout) {
156 collectservermapclass::iterator here = cservers.begin();
157 collectservermapclass::iterator end = cservers.end();
158
159 while (here != end) {
160 assert ((*here).second.c != NULL);
161 if ((*here).second.c != NULL) {
162 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
163
164 // configure this collection server
165
166 // note that we read build.cfg before collect.cfg so that the indexmaps
167 // are available to decode defaultindex, defaultsubcollection, and
168 // defaultlanguage
[15002]169
170 bool failed_build_cfg = false;
[1860]171 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
[16312]172 configinfo.collecthome, configinfo.collection)) {
[15002]173 failed_build_cfg = true;
174
[1860]175 outconvertclass text_t2ascii;
176 logout << text_t2ascii
[16312]177 << "Warning: couldn't read build.cfg file for collection \""
178 << configinfo.collection << "\""
179 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
180 << " collecthome=\"" << configinfo.collecthome << "\"\n";
[1860]181 }
182
[15002]183 bool failed_collect_cfg = false;
[1860]184 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
[16312]185 configinfo.collecthome, configinfo.collection)) {
[15002]186 failed_collect_cfg = true;
[1860]187 outconvertclass text_t2ascii;
188 logout << text_t2ascii
189 << "Warning: couldn't read collect.cfg file for collection \""
[16312]190 << configinfo.collection << "\""
191 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
192 << " collecthome=\"" << configinfo.collecthome << "\"\n";
[15002]193 }
194
195
196 bool is_colgroup = (*here).second.c->is_collection_group();
197
198 if (failed_collect_cfg) {
[9620]199 ++here;
[1860]200 continue;
201 }
202
[15002]203 if (failed_build_cfg && (!is_colgroup)) {
204 ++here;
205 continue;
206 }
207 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
208
[1860]209 if (!(*here).second.c->init (logout)) return false;
210
211 (*here).second.c->configure("httpdomain",httpdomain);
212 (*here).second.c->configure("httpprefix",httpprefix);
213 }
[9620]214 ++here;
[1860]215 }
216
217 return true;
218}
219
220collectservermapclass collectset::servers()
221{ return cservers;
222}
223
[9030]224
[16312]225void collectset::add_all_collections(const text_t &gsdlhome,
226 const text_t& collecthome)
227{
[9030]228 text_tarray collections;
229
[16312]230 if (read_dir(collecthome, collections)) {
231
[9030]232 text_tarray::const_iterator thiscol = collections.begin();
233 text_tarray::const_iterator endcol = collections.end();
234
235 while (thiscol != endcol) {
236
237 // ignore the modelcol
238 if (*thiscol == "modelcol") {
[9620]239 ++thiscol;
[9030]240 continue;
241 }
242
[15421]243 // create collection server for this collection
[16312]244 this->add_collection (*thiscol, gsdlhome, collecthome);
[9030]245
[9620]246 ++thiscol;
[9030]247 }
[15002]248
[16312]249 this->add_all_collection_groups(gsdlhome,collecthome);
[9030]250 }
251}
252
[21453]253bool collectset::collection_is_collect_group (const text_t& collect_dir)
254{
255 text_t is_collect_group_str = "false";
256 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
257
258 if (file_exists(collect_cfg)) {
259 char *collect_cfgc = collect_cfg.getcstr();
260 ifstream confin(collect_cfgc);
261
262 if (confin) {
263 text_tarray cfgline;
264
265 while (read_cfg_line(confin, cfgline) >= 0) {
266 if (cfgline.size() == 2) {
267 text_t key = cfgline[0];
268 cfgline.erase(cfgline.begin());
269 if (key == "collectgroup") {
270 is_collect_group_str = cfgline[0];
271 break;
272 }
273 }
274 }
275
276 confin.close();
277 }
278
279 delete []collect_cfgc;
280 }
281
282 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
283
284 return is_collect_group;
285}
286
287
[1860]288// add_collection sets up the collectionserver and calls
289// add_collectserver
[16312]290void collectset::add_collection (const text_t& collection,
291 const text_t& gsdlhome,
292 const text_t& collecthome)
293{
[8028]294 // read config file to see if built with mg, mgpp, or lucene
[1860]295 text_t buildtype = "mg"; // mg is default
[15729]296 text_t infodbtype = "gdbm"; // gdbm is default
[2545]297
[21453]298 this->remove_collection(collection);
299
[19806]300 collectserver *cserver = NULL;
301
[16312]302 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
[19806]303 if (file_exists (build_cfg)) {
304 char *build_cfgc = build_cfg.getcstr();
305 ifstream confin(build_cfgc);
306
307 if (confin) {
[21453]308 text_tarray cfgline;
309
[19806]310 while (read_cfg_line(confin, cfgline) >= 0) {
311 if (cfgline.size() == 2) {
[21453]312 text_t key = cfgline[0];
[19806]313 cfgline.erase(cfgline.begin());
314 if (key == "buildtype") {
315 buildtype = cfgline[0];
316 }
317 if (key == "infodbtype") {
318 infodbtype = cfgline[0];
319 }
[1860]320 }
321 }
[19806]322 confin.close();
[1860]323 }
[19806]324 delete []build_cfgc;
[1860]325
[19806]326 cserver = new collectserver();
[1860]327
[19806]328 // Create a dbclass of the correct type
329 dbclass *db_ptr = NULL;
[15729]330
331#ifdef USE_SQLITE
[19806]332 if (infodbtype == "sqlite")
333 {
334 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
335 db_ptr = sql_db_ptr;
[15759]336
[19806]337 // add a sql browse filter
338 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
339 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
340 cserver->add_filter (sqlbrowsefilter);
341 }
[15729]342#endif
[17476]343
344#ifdef USE_MSSQL
[19806]345 if (infodbtype == "mssql")
346 {
347 mssqldbclass *mssql_db_ptr = new mssqldbclass();
348 db_ptr = mssql_db_ptr;
[15729]349
[19806]350 // add a sql browse filter
351 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
352 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
353 cserver->add_filter (sqlbrowsefilter);
354 }
[17476]355#endif
[21415]356
357 if (infodbtype == "jdbm") {
358
359 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
360 db_ptr = jdbm_db_ptr;
361 }
[17476]362
[19806]363 // Use GDBM if the infodb type is empty or not one of the values above
364 if (db_ptr == NULL) {
365 db_ptr = new gdbmclass();
366 }
[15729]367
[19806]368 // add a null filter
369 filterclass *filter = new filterclass ();
370 cserver->add_filter (filter);
371
372 // add a browse filter
373 browsefilterclass *browsefilter = new browsefilterclass();
374 browsefilter->set_db_ptr(db_ptr);
375 cserver->add_filter (browsefilter);
376
377 if (buildtype == "mg") {
[21324]378#ifdef ENABLE_MG
[19806]379 mgsearch = new mgsearchclass();
[1860]380
[19806]381 // add a query filter
382 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
383 queryfilter->set_db_ptr(db_ptr);
384 queryfilter->set_textsearchptr (mgsearch);
385 cserver->add_filter (queryfilter);
[1860]386
[19806]387 // add a mg source
388 mgsourceclass *mgsource = new mgsourceclass ();
389 mgsource->set_db_ptr(db_ptr);
390 mgsource->set_textsearchptr (mgsearch);
391 cserver->add_source (mgsource);
[21324]392#else
393 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
394#endif
[19806]395 }
396 else if (buildtype == "mgpp") {
[21324]397#ifdef ENABLE_MGPP
[19806]398 mgppsearch = new mgppsearchclass();
[1860]399
[19806]400 // add a query filter
401 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
402 queryfilter->set_db_ptr(db_ptr);
403 queryfilter->set_textsearchptr (mgppsearch);
404 cserver->add_filter (queryfilter);
[1860]405
[21324]406 // add a mgpp source
407 mgppsourceclass *mgppsource = new mgppsourceclass ();
408 mgppsource->set_db_ptr(db_ptr);
409 mgppsource->set_textsearchptr (mgppsearch);
410 cserver->add_source (mgppsource);
411#else
412 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
413#endif
[19806]414 }
415 else if (buildtype == "lucene") {
[21324]416#ifdef ENABLE_LUCENE
[19806]417 lucenesearch = new lucenesearchclass();
418 lucenesearch->set_gsdlhome(gsdlhome);
419
420 // add a query filter
421 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
422 queryfilter->set_db_ptr(db_ptr);
423 queryfilter->set_textsearchptr (lucenesearch);
424 cserver->add_filter (queryfilter);
425
426 // add a lucene source
427 lucenesourceclass *lucenesource = new lucenesourceclass ();
428 lucenesource->set_db_ptr(db_ptr);
429 lucenesource->set_textsearchptr (lucenesearch);
430 cserver->add_source (lucenesource);
[21324]431#else
432 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
433#endif
[19806]434 }
435 else {
[21324]436 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
[19806]437 }
438
[12246]439 }
[19806]440 else {
441 // see if it is a collectgroup col
[21453]442 text_t this_collect_dir = filename_cat(collecthome, collection);
443 if (collection_is_collect_group(this_collect_dir)) {
444 // by this point we know we will need a cserver
445 cserver = new collectserver();
[19806]446 }
[21453]447 // else not a collect group, or there was no collect.cfg
448 // => leave cserver as NULL so it will not be added into cservers
[1860]449 }
450
[21453]451 if (cserver != NULL) {
452 // inform collection server and everything it contains about its
453 // collection name
454 cserver->configure ("collection", collection);
455 cserver->configure ("gsdlhome", gsdlhome);
456 cserver->configure ("collecthome", collecthome);
457 cservers.addcollectserver (cserver);
458 }
[1860]459}
460
[9030]461void collectset::remove_all_collections () {
462
[21324]463#ifdef ENABLE_MG
[9030]464 // first unload any cached mg databases
465 if (mgsearch != NULL) {
466 mgsearch->unload_database();
467 }
[21324]468#endif
[9030]469
470 // now delete the collection server objects
471 collectservermapclass::iterator here = cservers.begin();
472 collectservermapclass::iterator end = cservers.end();
473
474 while (here != end) {
475 if ((*here).second.c != NULL) {
476 delete (*here).second.c;
477 }
[9620]478 ++here;
[9030]479 }
480 cservers.clear();
481}
482
[15002]483void collectset::add_collection_group(const text_t& collection,
[16312]484 const text_t& gsdlhome,
485 const text_t& collecthome)
[15002]486{
487 text_tarray group;
[9030]488
[16312]489 text_t collect_group_dir = filename_cat (collecthome, collection);
490
[15002]491 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
[21453]492 if (collection_is_collect_group(collect_group_dir)) {
[15002]493 if (read_dir (collect_group_dir, group)) {
494
495 text_tarray::const_iterator thiscol = group.begin();
496 text_tarray::const_iterator endcol = group.end();
497
498 while (thiscol != endcol) {
[18978]499 // ignore the etc directory
[15002]500 if (*thiscol == "etc") {
501 ++thiscol;
502 continue;
503 }
504
[18978]505 //text_t group_col = filename_cat(collection,*thiscol);
506 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
507 text_t group_col = collection + "/" + *thiscol;
[16312]508 this->add_collection (group_col, gsdlhome, collecthome);
[15002]509
510 ++thiscol;
511 }
512 }
513 }
514}
515
[16312]516void collectset::add_all_collection_groups (const text_t& gsdlhome,
517 const text_t& collecthome)
[15002]518
519{
520 collectservermapclass::iterator here = cservers.begin();
521 collectservermapclass::iterator end = cservers.end();
522
523 while (here != end) {
524 text_t collection = (*here).second.c->get_collection_name();
[16312]525 this->add_collection_group(collection,gsdlhome,collecthome);
[15002]526
527 ++here;
528 }
529}
530
531
[1860]532// remove_collection deletes the collection server of collection.
533// This only needs to be called if a collectionserver is to be
534// removed while the library is running. The destructor function
535// cleans up all collectservers when the program exits.
[15002]536void collectset::remove_collection (const text_t &collection) {
[2545]537
[4340]538 // do nothing if no collection server exists for this collection
539 if (cservers.getcollectserver(collection) == NULL) return;
540
[21324]541#ifdef ENABLE_MG
[2545]542 // first unload any cached mg databases - we may need to do something
[8028]543 // similar to this for mgpp and lucene too
[2545]544 if (mgsearch != NULL) {
545 mgsearch->unload_database();
546 }
[21324]547#endif
[2545]548
549 // now delete the collection server object
[1860]550 collectservermapclass::iterator here = cservers.begin();
551 collectservermapclass::iterator end = cservers.end();
552
553 while (here != end) {
554 if ((*here).second.c != NULL && (*here).first == collection) {
555 delete (*here).second.c;
556 cservers.erase (here);
557 return;
558 }
[9620]559 ++here;
[1860]560 }
[15002]561}
562
563
564// remove_collection deletes the collection server of collection.
565// This only needs to be called if a collectionserver is to be
566// removed while the library is running. The destructor function
567// cleans up all collectservers when the program exits.
568void collectset::remove_collection (const text_t &collection, ostream &logout) {
569
570 remove_collection(collection);
571
[1860]572 outconvertclass text_t2ascii;
[15421]573 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
[1860]574 << collection << "\n";
575}
576
577void collectset::configure(const text_t &key, const text_tarray &cfgline)
578{
[16312]579 if ((key == "collection") || (key == "collectdir")) return;
[1860]580
581 collectservermapclass::iterator here = cservers.begin();
582 collectservermapclass::iterator end = cservers.end();
583
584 while (here != end) {
585 assert ((*here).second.c != NULL);
586 if ((*here).second.c != NULL) {
587 if (key == "collectinfo") {
588 if ((*here).first == cfgline[0]) {
[16312]589 if (cfgline.size()==3) {
590 (*here).second.c->configure ("gsdlhome", cfgline[1]);
591 (*here).second.c->configure ("gdbmhome", cfgline[2]);
592 }
593 else {
594 (*here).second.c->configure ("gsdlhome", cfgline[1]);
595 (*here).second.c->configure ("collecthome", cfgline[2]);
596 (*here).second.c->configure ("gdbmhome", cfgline[3]);
597 }
[1860]598 }
599 } else {
600 (*here).second.c->configure (key, cfgline);
601 }
602 }
603
[9620]604 ++here;
[1860]605 }
606}
607
608void collectset::getCollectionList (text_tarray &collist)
609{
610 collist.erase(collist.begin(),collist.end());
611
612 collectservermapclass::iterator here = cservers.begin();
613 collectservermapclass::iterator end = cservers.end();
614 while (here != end) {
615 assert ((*here).second.c != NULL);
616 if ((*here).second.c != NULL) {
617 collist.push_back ((*here).second.c->get_collection_name());
618 }
[9620]619 ++here;
[1860]620 }
621}
[16312]622
Note: See TracBrowser for help on using the repository browser.