source: main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp@ 21453

Last change on this file since 21453 was 21453, checked in by ak19, 14 years ago

Dr Bainbridge improved some code by adding the helper method collection_is_collect_group().

  • Property svn:keywords set to Author Date Id Revision
File size: 16.5 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "jdbmnaiveclass.h"
33#include "gsdltools.h"
34#include "fileutil.h"
35#include "filter.h"
36#include "browsefilter.h"
37#include "sqlbrowsefilter.h"
38#include "queryfilter.h"
39
40#ifdef ENABLE_MG
41#include "mgqueryfilter.h"
42#include "mgsource.h"
43#endif
44#ifdef ENABLE_MGPP
45#include "mgppqueryfilter.h"
46#include "mgppsource.h"
47#endif
48#ifdef ENABLE_LUCENE
49#include "lucenequeryfilter.h"
50#include "lucenesource.h"
51#endif
52
53#include <assert.h>
54
55#ifdef USE_SQLITE
56#include "sqlitedbclass.h"
57#endif
58
59#ifdef USE_MSSQL
60#include "mssqldbclass.h"
61#endif
62
63
64collectset::collectset (text_t& gsdlhome, text_t& collecthome)
65{
66 // gsdlhome and collecthome will be set as a result of calling this function
67 // collecthome will default to "<gsdlhome>/collect" if not explicitly
68 // specified in config file
69
70 text_tarray collections;
71
72#ifdef ENABLE_MG
73 mgsearch = NULL;
74#endif
75#ifdef ENABLE_MGPP
76 mgppsearch = NULL;
77#endif
78#ifdef ENABLE_LUCENE
79 lucenesearch = NULL;
80#endif
81
82 // get gsdlhome (if we fail the error will be picked up later -- in
83 // cgiwrapper)
84
85 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
86 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
87 if (read_dir (collecthome, collections)) {
88
89 text_tarray::const_iterator thiscol = collections.begin();
90 text_tarray::const_iterator endcol = collections.end();
91
92 while (thiscol != endcol) {
93 // ignore the modelcol
94 if (*thiscol == "modelcol") {
95 ++thiscol;
96 continue;
97 }
98
99 this->add_collection (*thiscol, gsdlhome, collecthome);
100
101 ++thiscol;
102 }
103
104 this->add_all_collection_groups(gsdlhome, collecthome);
105 }
106 }
107 }
108
109 set_gsdl_env_vars(gsdlhome);
110}
111
112
113collectset::collectset (text_t& httpprefix_arg)
114{
115 httpprefix = httpprefix_arg;
116
117#ifdef ENABLE_MG
118 mgsearch = NULL;
119#endif
120#ifdef ENABLE_MGPP
121 mgppsearch = NULL;
122#endif
123#ifdef ENABLE_LUCENE
124 lucenesearch = NULL;
125#endif
126
127}
128
129collectset::collectset ()
130{
131#ifdef ENABLE_MG
132 mgsearch = NULL;
133#endif
134#ifdef ENABLE_MGPP
135 mgppsearch = NULL;
136#endif
137#ifdef ENABLE_LUCENE
138 lucenesearch = NULL;
139#endif
140}
141
142collectset::~collectset () {
143 collectservermapclass::iterator here = cservers.begin();
144 collectservermapclass::iterator end = cservers.end();
145
146 while (here != end) {
147 if ((*here).second.c != NULL) {
148 delete (*here).second.c;
149 }
150 ++here;
151 }
152 cservers.clear();
153}
154
155bool collectset::init (ostream &logout) {
156 collectservermapclass::iterator here = cservers.begin();
157 collectservermapclass::iterator end = cservers.end();
158
159 while (here != end) {
160 assert ((*here).second.c != NULL);
161 if ((*here).second.c != NULL) {
162 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
163
164 // configure this collection server
165
166 // note that we read build.cfg before collect.cfg so that the indexmaps
167 // are available to decode defaultindex, defaultsubcollection, and
168 // defaultlanguage
169
170 bool failed_build_cfg = false;
171 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
172 configinfo.collecthome, configinfo.collection)) {
173 failed_build_cfg = true;
174
175 outconvertclass text_t2ascii;
176 logout << text_t2ascii
177 << "Warning: couldn't read build.cfg file for collection \""
178 << configinfo.collection << "\""
179 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
180 << " collecthome=\"" << configinfo.collecthome << "\"\n";
181 }
182
183 bool failed_collect_cfg = false;
184 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
185 configinfo.collecthome, configinfo.collection)) {
186 failed_collect_cfg = true;
187 outconvertclass text_t2ascii;
188 logout << text_t2ascii
189 << "Warning: couldn't read collect.cfg file for collection \""
190 << configinfo.collection << "\""
191 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
192 << " collecthome=\"" << configinfo.collecthome << "\"\n";
193 }
194
195
196 bool is_colgroup = (*here).second.c->is_collection_group();
197
198 if (failed_collect_cfg) {
199 ++here;
200 continue;
201 }
202
203 if (failed_build_cfg && (!is_colgroup)) {
204 ++here;
205 continue;
206 }
207 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
208
209 if (!(*here).second.c->init (logout)) return false;
210
211 (*here).second.c->configure("httpdomain",httpdomain);
212 (*here).second.c->configure("httpprefix",httpprefix);
213 }
214 ++here;
215 }
216
217 return true;
218}
219
220collectservermapclass collectset::servers()
221{ return cservers;
222}
223
224
225void collectset::add_all_collections(const text_t &gsdlhome,
226 const text_t& collecthome)
227{
228 text_tarray collections;
229
230 if (read_dir(collecthome, collections)) {
231
232 text_tarray::const_iterator thiscol = collections.begin();
233 text_tarray::const_iterator endcol = collections.end();
234
235 while (thiscol != endcol) {
236
237 // ignore the modelcol
238 if (*thiscol == "modelcol") {
239 ++thiscol;
240 continue;
241 }
242
243 // create collection server for this collection
244 this->add_collection (*thiscol, gsdlhome, collecthome);
245
246 ++thiscol;
247 }
248
249 this->add_all_collection_groups(gsdlhome,collecthome);
250 }
251}
252
253bool collectset::collection_is_collect_group (const text_t& collect_dir)
254{
255 text_t is_collect_group_str = "false";
256 text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
257
258 if (file_exists(collect_cfg)) {
259 char *collect_cfgc = collect_cfg.getcstr();
260 ifstream confin(collect_cfgc);
261
262 if (confin) {
263 text_tarray cfgline;
264
265 while (read_cfg_line(confin, cfgline) >= 0) {
266 if (cfgline.size() == 2) {
267 text_t key = cfgline[0];
268 cfgline.erase(cfgline.begin());
269 if (key == "collectgroup") {
270 is_collect_group_str = cfgline[0];
271 break;
272 }
273 }
274 }
275
276 confin.close();
277 }
278
279 delete []collect_cfgc;
280 }
281
282 bool is_collect_group = (is_collect_group_str == "true") ? true : false;
283
284 return is_collect_group;
285}
286
287
288// add_collection sets up the collectionserver and calls
289// add_collectserver
290void collectset::add_collection (const text_t& collection,
291 const text_t& gsdlhome,
292 const text_t& collecthome)
293{
294 // read config file to see if built with mg, mgpp, or lucene
295 text_t buildtype = "mg"; // mg is default
296 text_t infodbtype = "gdbm"; // gdbm is default
297
298 this->remove_collection(collection);
299
300 collectserver *cserver = NULL;
301
302 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
303 if (file_exists (build_cfg)) {
304 char *build_cfgc = build_cfg.getcstr();
305 ifstream confin(build_cfgc);
306
307 if (confin) {
308 text_tarray cfgline;
309
310 while (read_cfg_line(confin, cfgline) >= 0) {
311 if (cfgline.size() == 2) {
312 text_t key = cfgline[0];
313 cfgline.erase(cfgline.begin());
314 if (key == "buildtype") {
315 buildtype = cfgline[0];
316 }
317 if (key == "infodbtype") {
318 infodbtype = cfgline[0];
319 }
320 }
321 }
322 confin.close();
323 }
324 delete []build_cfgc;
325
326 cserver = new collectserver();
327
328 // Create a dbclass of the correct type
329 dbclass *db_ptr = NULL;
330
331#ifdef USE_SQLITE
332 if (infodbtype == "sqlite")
333 {
334 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
335 db_ptr = sql_db_ptr;
336
337 // add a sql browse filter
338 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
339 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
340 cserver->add_filter (sqlbrowsefilter);
341 }
342#endif
343
344#ifdef USE_MSSQL
345 if (infodbtype == "mssql")
346 {
347 mssqldbclass *mssql_db_ptr = new mssqldbclass();
348 db_ptr = mssql_db_ptr;
349
350 // add a sql browse filter
351 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
352 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
353 cserver->add_filter (sqlbrowsefilter);
354 }
355#endif
356
357 if (infodbtype == "jdbm") {
358
359 jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
360 db_ptr = jdbm_db_ptr;
361 }
362
363 // Use GDBM if the infodb type is empty or not one of the values above
364 if (db_ptr == NULL) {
365 db_ptr = new gdbmclass();
366 }
367
368 // add a null filter
369 filterclass *filter = new filterclass ();
370 cserver->add_filter (filter);
371
372 // add a browse filter
373 browsefilterclass *browsefilter = new browsefilterclass();
374 browsefilter->set_db_ptr(db_ptr);
375 cserver->add_filter (browsefilter);
376
377 if (buildtype == "mg") {
378#ifdef ENABLE_MG
379 mgsearch = new mgsearchclass();
380
381 // add a query filter
382 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
383 queryfilter->set_db_ptr(db_ptr);
384 queryfilter->set_textsearchptr (mgsearch);
385 cserver->add_filter (queryfilter);
386
387 // add a mg source
388 mgsourceclass *mgsource = new mgsourceclass ();
389 mgsource->set_db_ptr(db_ptr);
390 mgsource->set_textsearchptr (mgsearch);
391 cserver->add_source (mgsource);
392#else
393 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
394#endif
395 }
396 else if (buildtype == "mgpp") {
397#ifdef ENABLE_MGPP
398 mgppsearch = new mgppsearchclass();
399
400 // add a query filter
401 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
402 queryfilter->set_db_ptr(db_ptr);
403 queryfilter->set_textsearchptr (mgppsearch);
404 cserver->add_filter (queryfilter);
405
406 // add a mgpp source
407 mgppsourceclass *mgppsource = new mgppsourceclass ();
408 mgppsource->set_db_ptr(db_ptr);
409 mgppsource->set_textsearchptr (mgppsearch);
410 cserver->add_source (mgppsource);
411#else
412 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
413#endif
414 }
415 else if (buildtype == "lucene") {
416#ifdef ENABLE_LUCENE
417 lucenesearch = new lucenesearchclass();
418 lucenesearch->set_gsdlhome(gsdlhome);
419
420 // add a query filter
421 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
422 queryfilter->set_db_ptr(db_ptr);
423 queryfilter->set_textsearchptr (lucenesearch);
424 cserver->add_filter (queryfilter);
425
426 // add a lucene source
427 lucenesourceclass *lucenesource = new lucenesourceclass ();
428 lucenesource->set_db_ptr(db_ptr);
429 lucenesource->set_textsearchptr (lucenesearch);
430 cserver->add_source (lucenesource);
431#else
432 cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
433#endif
434 }
435 else {
436 cerr << "Warning: unrecognized buildtype " << buildtype << endl;
437 }
438
439 }
440 else {
441 // see if it is a collectgroup col
442 text_t this_collect_dir = filename_cat(collecthome, collection);
443 if (collection_is_collect_group(this_collect_dir)) {
444 // by this point we know we will need a cserver
445 cserver = new collectserver();
446 }
447 // else not a collect group, or there was no collect.cfg
448 // => leave cserver as NULL so it will not be added into cservers
449 }
450
451 if (cserver != NULL) {
452 // inform collection server and everything it contains about its
453 // collection name
454 cserver->configure ("collection", collection);
455 cserver->configure ("gsdlhome", gsdlhome);
456 cserver->configure ("collecthome", collecthome);
457 cservers.addcollectserver (cserver);
458 }
459}
460
461void collectset::remove_all_collections () {
462
463#ifdef ENABLE_MG
464 // first unload any cached mg databases
465 if (mgsearch != NULL) {
466 mgsearch->unload_database();
467 }
468#endif
469
470 // now delete the collection server objects
471 collectservermapclass::iterator here = cservers.begin();
472 collectservermapclass::iterator end = cservers.end();
473
474 while (here != end) {
475 if ((*here).second.c != NULL) {
476 delete (*here).second.c;
477 }
478 ++here;
479 }
480 cservers.clear();
481}
482
483void collectset::add_collection_group(const text_t& collection,
484 const text_t& gsdlhome,
485 const text_t& collecthome)
486{
487 text_tarray group;
488
489 text_t collect_group_dir = filename_cat (collecthome, collection);
490
491 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
492 if (collection_is_collect_group(collect_group_dir)) {
493 if (read_dir (collect_group_dir, group)) {
494
495 text_tarray::const_iterator thiscol = group.begin();
496 text_tarray::const_iterator endcol = group.end();
497
498 while (thiscol != endcol) {
499 // ignore the etc directory
500 if (*thiscol == "etc") {
501 ++thiscol;
502 continue;
503 }
504
505 //text_t group_col = filename_cat(collection,*thiscol);
506 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
507 text_t group_col = collection + "/" + *thiscol;
508 this->add_collection (group_col, gsdlhome, collecthome);
509
510 ++thiscol;
511 }
512 }
513 }
514}
515
516void collectset::add_all_collection_groups (const text_t& gsdlhome,
517 const text_t& collecthome)
518
519{
520 collectservermapclass::iterator here = cservers.begin();
521 collectservermapclass::iterator end = cservers.end();
522
523 while (here != end) {
524 text_t collection = (*here).second.c->get_collection_name();
525 this->add_collection_group(collection,gsdlhome,collecthome);
526
527 ++here;
528 }
529}
530
531
532// remove_collection deletes the collection server of collection.
533// This only needs to be called if a collectionserver is to be
534// removed while the library is running. The destructor function
535// cleans up all collectservers when the program exits.
536void collectset::remove_collection (const text_t &collection) {
537
538 // do nothing if no collection server exists for this collection
539 if (cservers.getcollectserver(collection) == NULL) return;
540
541#ifdef ENABLE_MG
542 // first unload any cached mg databases - we may need to do something
543 // similar to this for mgpp and lucene too
544 if (mgsearch != NULL) {
545 mgsearch->unload_database();
546 }
547#endif
548
549 // now delete the collection server object
550 collectservermapclass::iterator here = cservers.begin();
551 collectservermapclass::iterator end = cservers.end();
552
553 while (here != end) {
554 if ((*here).second.c != NULL && (*here).first == collection) {
555 delete (*here).second.c;
556 cservers.erase (here);
557 return;
558 }
559 ++here;
560 }
561}
562
563
564// remove_collection deletes the collection server of collection.
565// This only needs to be called if a collectionserver is to be
566// removed while the library is running. The destructor function
567// cleans up all collectservers when the program exits.
568void collectset::remove_collection (const text_t &collection, ostream &logout) {
569
570 remove_collection(collection);
571
572 outconvertclass text_t2ascii;
573 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
574 << collection << "\n";
575}
576
577void collectset::configure(const text_t &key, const text_tarray &cfgline)
578{
579 if ((key == "collection") || (key == "collectdir")) return;
580
581 collectservermapclass::iterator here = cservers.begin();
582 collectservermapclass::iterator end = cservers.end();
583
584 while (here != end) {
585 assert ((*here).second.c != NULL);
586 if ((*here).second.c != NULL) {
587 if (key == "collectinfo") {
588 if ((*here).first == cfgline[0]) {
589 if (cfgline.size()==3) {
590 (*here).second.c->configure ("gsdlhome", cfgline[1]);
591 (*here).second.c->configure ("gdbmhome", cfgline[2]);
592 }
593 else {
594 (*here).second.c->configure ("gsdlhome", cfgline[1]);
595 (*here).second.c->configure ("collecthome", cfgline[2]);
596 (*here).second.c->configure ("gdbmhome", cfgline[3]);
597 }
598 }
599 } else {
600 (*here).second.c->configure (key, cfgline);
601 }
602 }
603
604 ++here;
605 }
606}
607
608void collectset::getCollectionList (text_tarray &collist)
609{
610 collist.erase(collist.begin(),collist.end());
611
612 collectservermapclass::iterator here = cservers.begin();
613 collectservermapclass::iterator end = cservers.end();
614 while (here != end) {
615 assert ((*here).second.c != NULL);
616 if ((*here).second.c != NULL) {
617 collist.push_back ((*here).second.c->get_collection_name());
618 }
619 ++here;
620 }
621}
622
Note: See TracBrowser for help on using the repository browser.