source: gsdl/trunk/src/colservr/collectset.cpp@ 16312

Last change on this file since 16312 was 16312, checked in by davidb, 16 years ago

Introduction of 'collecthome' which parallels 'gsdlhome' to allow the toplevel collect folder to be outside of the gsdlhome area

  • Property svn:keywords set to Author Date Id Revision
File size: 13.8 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "queryfilter.h"
37#include "mgqueryfilter.h"
38#include "mgppqueryfilter.h"
39#include "mgsource.h"
40#include "lucenequeryfilter.h"
41#include "lucenesource.h"
42
43#include <assert.h>
44
45#ifdef USE_SQLITE
46#include "sqlitedbclass.h"
47#endif
48
49collectset::collectset (text_t& gsdlhome, text_t& collecthome)
50{
51 // gsdlhome and collecthome will be set as a result of calling this function
52 // collecthome will default to "<gsdlhome>/collect" if not explicitly
53 // specified in config file
54
55 text_tarray collections;
56
57 // get gsdlhome (if we fail the error will be picked up later -- in
58 // cgiwrapper)
59
60 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
61 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
62 if (read_dir (collecthome, collections)) {
63
64 text_tarray::const_iterator thiscol = collections.begin();
65 text_tarray::const_iterator endcol = collections.end();
66
67 while (thiscol != endcol) {
68 // ignore the modelcol
69 if (*thiscol == "modelcol") {
70 ++thiscol;
71 continue;
72 }
73
74 this->add_collection (*thiscol, gsdlhome, collecthome);
75
76 ++thiscol;
77 }
78
79 this->add_all_collection_groups(gsdlhome, collecthome);
80 }
81 }
82 }
83}
84
85collectset::~collectset () {
86 collectservermapclass::iterator here = cservers.begin();
87 collectservermapclass::iterator end = cservers.end();
88
89 while (here != end) {
90 if ((*here).second.c != NULL) {
91 delete (*here).second.c;
92 }
93 ++here;
94 }
95 cservers.clear();
96}
97
98bool collectset::init (ostream &logout) {
99 collectservermapclass::iterator here = cservers.begin();
100 collectservermapclass::iterator end = cservers.end();
101
102 while (here != end) {
103 assert ((*here).second.c != NULL);
104 if ((*here).second.c != NULL) {
105 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
106
107 // configure this collection server
108
109 // note that we read build.cfg before collect.cfg so that the indexmaps
110 // are available to decode defaultindex, defaultsubcollection, and
111 // defaultlanguage
112
113 bool failed_build_cfg = false;
114 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
115 configinfo.collecthome, configinfo.collection)) {
116 failed_build_cfg = true;
117
118 outconvertclass text_t2ascii;
119 logout << text_t2ascii
120 << "Warning: couldn't read build.cfg file for collection \""
121 << configinfo.collection << "\""
122 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
123 << " collecthome=\"" << configinfo.collecthome << "\"\n";
124 }
125
126 bool failed_collect_cfg = false;
127 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
128 configinfo.collecthome, configinfo.collection)) {
129 failed_collect_cfg = true;
130 outconvertclass text_t2ascii;
131 logout << text_t2ascii
132 << "Warning: couldn't read collect.cfg file for collection \""
133 << configinfo.collection << "\""
134 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
135 << " collecthome=\"" << configinfo.collecthome << "\"\n";
136 }
137
138
139 bool is_colgroup = (*here).second.c->is_collection_group();
140
141 if (failed_collect_cfg) {
142 ++here;
143 continue;
144 }
145
146 if (failed_build_cfg && (!is_colgroup)) {
147 ++here;
148 continue;
149 }
150 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
151
152 if (!(*here).second.c->init (logout)) return false;
153
154 (*here).second.c->configure("httpdomain",httpdomain);
155 (*here).second.c->configure("httpprefix",httpprefix);
156 }
157 ++here;
158 }
159
160 return true;
161}
162
163collectservermapclass collectset::servers()
164{ return cservers;
165}
166
167
168void collectset::add_all_collections(const text_t &gsdlhome,
169 const text_t& collecthome)
170{
171 text_tarray collections;
172
173 if (read_dir(collecthome, collections)) {
174
175 text_tarray::const_iterator thiscol = collections.begin();
176 text_tarray::const_iterator endcol = collections.end();
177
178 while (thiscol != endcol) {
179
180 // ignore the modelcol
181 if (*thiscol == "modelcol") {
182 ++thiscol;
183 continue;
184 }
185
186 // create collection server for this collection
187 this->add_collection (*thiscol, gsdlhome, collecthome);
188
189 ++thiscol;
190 }
191
192 this->add_all_collection_groups(gsdlhome,collecthome);
193 }
194}
195
196// add_collection sets up the collectionserver and calls
197// add_collectserver
198void collectset::add_collection (const text_t& collection,
199 const text_t& gsdlhome,
200 const text_t& collecthome)
201{
202
203 this->remove_collection(collection);
204
205 // read config file to see if built with mg, mgpp, or lucene
206 text_t buildtype = "mg"; // mg is default
207 text_t infodbtype = "gdbm"; // gdbm is default
208
209 text_tarray cfgline;
210 text_t key;
211
212 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
213 char *build_cfgc = build_cfg.getcstr();
214 ifstream confin(build_cfgc);
215
216 if (confin) {
217 while (read_cfg_line(confin, cfgline) >= 0) {
218 if (cfgline.size() == 2) {
219 key = cfgline[0];
220 cfgline.erase(cfgline.begin());
221 if (key == "buildtype") {
222 buildtype = cfgline[0];
223 }
224 if (key == "infodbtype") {
225 infodbtype = cfgline[0];
226 }
227 }
228 }
229 confin.close();
230 }
231 delete []build_cfgc;
232
233 collectserver *cserver = new collectserver();
234
235 // Create a dbclass of the correct type
236 dbclass *db_ptr = NULL;
237
238#ifdef USE_SQLITE
239 if (infodbtype == "sqlite")
240 {
241 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
242 db_ptr = sql_db_ptr;
243
244 // add a sql browse filter
245 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
246 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
247 cserver->add_filter (sqlbrowsefilter);
248 }
249#endif
250
251 // Use GDBM if the infodb type is empty or not one of the values above
252 if (db_ptr == NULL)
253 {
254 db_ptr = new gdbmclass();
255 }
256
257 // add a null filter
258 filterclass *filter = new filterclass ();
259 cserver->add_filter (filter);
260
261 // add a browse filter
262 browsefilterclass *browsefilter = new browsefilterclass();
263 browsefilter->set_db_ptr(db_ptr);
264 cserver->add_filter (browsefilter);
265
266 if (buildtype == "mg") {
267 mgsearch = new mgsearchclass();
268
269 // add a query filter
270 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
271 queryfilter->set_db_ptr(db_ptr);
272 queryfilter->set_textsearchptr (mgsearch);
273 cserver->add_filter (queryfilter);
274
275 // add a mg source
276 mgsourceclass *mgsource = new mgsourceclass ();
277 mgsource->set_db_ptr(db_ptr);
278 mgsource->set_textsearchptr (mgsearch);
279 cserver->add_source (mgsource);
280 }
281 else if (buildtype == "mgpp") {
282 mgppsearch = new mgppsearchclass();
283
284 // add a query filter
285 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
286 queryfilter->set_db_ptr(db_ptr);
287 queryfilter->set_textsearchptr (mgppsearch);
288 cserver->add_filter (queryfilter);
289
290 // add a mg source
291 mgsourceclass *mgsource = new mgsourceclass ();
292 mgsource->set_db_ptr(db_ptr);
293 mgsource->set_textsearchptr (mgppsearch);
294 cserver->add_source (mgsource);
295 }
296 else if (buildtype == "lucene") {
297 lucenesearch = new lucenesearchclass();
298 lucenesearch->set_gsdlhome(gsdlhome);
299
300 // add a query filter
301 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
302 queryfilter->set_db_ptr(db_ptr);
303 queryfilter->set_textsearchptr (lucenesearch);
304 cserver->add_filter (queryfilter);
305
306 // add a lucene source
307 lucenesourceclass *lucenesource = new lucenesourceclass ();
308 lucenesource->set_db_ptr(db_ptr);
309 lucenesource->set_textsearchptr (lucenesearch);
310 cserver->add_source (lucenesource);
311 }
312
313 // inform collection server and everything it contains about its
314 // collection name
315 cserver->configure ("collection", collection);
316 cserver->configure ("gsdlhome", gsdlhome);
317 cserver->configure ("collecthome", collecthome);
318 cservers.addcollectserver (cserver);
319}
320
321void collectset::remove_all_collections () {
322
323 // first unload any cached mg databases
324 if (mgsearch != NULL) {
325 mgsearch->unload_database();
326 }
327
328 // now delete the collection server objects
329 collectservermapclass::iterator here = cservers.begin();
330 collectservermapclass::iterator end = cservers.end();
331
332 while (here != end) {
333 if ((*here).second.c != NULL) {
334 delete (*here).second.c;
335 }
336 ++here;
337 }
338 cservers.clear();
339}
340
341void collectset::add_collection_group(const text_t& collection,
342 const text_t& gsdlhome,
343 const text_t& collecthome)
344{
345 text_tarray group;
346
347 text_t collect_group_dir = filename_cat (collecthome, collection);
348
349 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
350 text_t is_collect_group;
351 text_tarray cfgline;
352 text_t key;
353 text_t build_cfg = filename_cat(collect_group_dir, "etc", "collect.cfg");
354 char *collect_cfgc = build_cfg.getcstr();
355 ifstream confin(collect_cfgc);
356
357 if (confin) {
358 while (read_cfg_line(confin, cfgline) >= 0) {
359 if (cfgline.size() == 2) {
360 key = cfgline[0];
361 cfgline.erase(cfgline.begin());
362 if (key == "collectgroup") {
363 is_collect_group = cfgline[0];
364 break;
365 }
366 }
367 }
368 confin.close();
369 }
370 delete []collect_cfgc;
371
372 if (is_collect_group == "true") {
373
374 if (read_dir (collect_group_dir, group)) {
375
376 text_tarray::const_iterator thiscol = group.begin();
377 text_tarray::const_iterator endcol = group.end();
378
379 while (thiscol != endcol) {
380 // ignore the modelcol
381 if (*thiscol == "etc") {
382 ++thiscol;
383 continue;
384 }
385
386 text_t group_col = filename_cat(collection,*thiscol);
387 this->add_collection (group_col, gsdlhome, collecthome);
388
389 ++thiscol;
390 }
391 }
392 }
393}
394
395void collectset::add_all_collection_groups (const text_t& gsdlhome,
396 const text_t& collecthome)
397
398{
399 collectservermapclass::iterator here = cservers.begin();
400 collectservermapclass::iterator end = cservers.end();
401
402 while (here != end) {
403 text_t collection = (*here).second.c->get_collection_name();
404 this->add_collection_group(collection,gsdlhome,collecthome);
405
406 ++here;
407 }
408}
409
410
411// remove_collection deletes the collection server of collection.
412// This only needs to be called if a collectionserver is to be
413// removed while the library is running. The destructor function
414// cleans up all collectservers when the program exits.
415void collectset::remove_collection (const text_t &collection) {
416
417 // do nothing if no collection server exists for this collection
418 if (cservers.getcollectserver(collection) == NULL) return;
419
420 // first unload any cached mg databases - we may need to do something
421 // similar to this for mgpp and lucene too
422 if (mgsearch != NULL) {
423 mgsearch->unload_database();
424 }
425
426 // now delete the collection server object
427 collectservermapclass::iterator here = cservers.begin();
428 collectservermapclass::iterator end = cservers.end();
429
430 while (here != end) {
431 if ((*here).second.c != NULL && (*here).first == collection) {
432 delete (*here).second.c;
433 cservers.erase (here);
434 return;
435 }
436 ++here;
437 }
438}
439
440
441// remove_collection deletes the collection server of collection.
442// This only needs to be called if a collectionserver is to be
443// removed while the library is running. The destructor function
444// cleans up all collectservers when the program exits.
445void collectset::remove_collection (const text_t &collection, ostream &logout) {
446
447 remove_collection(collection);
448
449 outconvertclass text_t2ascii;
450 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
451 << collection << "\n";
452}
453
454void collectset::configure(const text_t &key, const text_tarray &cfgline)
455{
456 if ((key == "collection") || (key == "collectdir")) return;
457
458 collectservermapclass::iterator here = cservers.begin();
459 collectservermapclass::iterator end = cservers.end();
460
461 while (here != end) {
462 assert ((*here).second.c != NULL);
463 if ((*here).second.c != NULL) {
464 if (key == "collectinfo") {
465 if ((*here).first == cfgline[0]) {
466 if (cfgline.size()==3) {
467 (*here).second.c->configure ("gsdlhome", cfgline[1]);
468 (*here).second.c->configure ("gdbmhome", cfgline[2]);
469 }
470 else {
471 (*here).second.c->configure ("gsdlhome", cfgline[1]);
472 (*here).second.c->configure ("collecthome", cfgline[2]);
473 (*here).second.c->configure ("gdbmhome", cfgline[3]);
474 }
475 }
476 } else {
477 (*here).second.c->configure (key, cfgline);
478 }
479 }
480
481 ++here;
482 }
483}
484
485void collectset::getCollectionList (text_tarray &collist)
486{
487 collist.erase(collist.begin(),collist.end());
488
489 collectservermapclass::iterator here = cservers.begin();
490 collectservermapclass::iterator end = cservers.end();
491 while (here != end) {
492 assert ((*here).second.c != NULL);
493 if ((*here).second.c != NULL) {
494 collist.push_back ((*here).second.c->get_collection_name());
495 }
496 ++here;
497 }
498}
499
Note: See TracBrowser for help on using the repository browser.