source: gsdl/trunk/runtime-src/src/colservr/collectset.cpp@ 16895

Last change on this file since 16895 was 16895, checked in by davidb, 16 years ago

Runtime code can now support GDBM database being provided as a gzipped txt file, which is platform independant. If the required .ldb (or .bdb) isn't there than the runtime system executes txt2db to generate it. The rest of the code then works as before

  • Property svn:keywords set to Author Date Id Revision
File size: 13.9 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "gsdltools.h"
33#include "fileutil.h"
34#include "filter.h"
35#include "browsefilter.h"
36#include "sqlbrowsefilter.h"
37#include "queryfilter.h"
38#include "mgqueryfilter.h"
39#include "mgppqueryfilter.h"
40#include "mgsource.h"
41#include "lucenequeryfilter.h"
42#include "lucenesource.h"
43
44#include <assert.h>
45
46#ifdef USE_SQLITE
47#include "sqlitedbclass.h"
48#endif
49
50collectset::collectset (text_t& gsdlhome, text_t& collecthome)
51{
52 // gsdlhome and collecthome will be set as a result of calling this function
53 // collecthome will default to "<gsdlhome>/collect" if not explicitly
54 // specified in config file
55
56 text_tarray collections;
57
58 // get gsdlhome (if we fail the error will be picked up later -- in
59 // cgiwrapper)
60
61 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
62 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
63 if (read_dir (collecthome, collections)) {
64
65 text_tarray::const_iterator thiscol = collections.begin();
66 text_tarray::const_iterator endcol = collections.end();
67
68 while (thiscol != endcol) {
69 // ignore the modelcol
70 if (*thiscol == "modelcol") {
71 ++thiscol;
72 continue;
73 }
74
75 this->add_collection (*thiscol, gsdlhome, collecthome);
76
77 ++thiscol;
78 }
79
80 this->add_all_collection_groups(gsdlhome, collecthome);
81 }
82 }
83 }
84
85 set_gsdl_env_vars(gsdlhome);
86}
87
88collectset::~collectset () {
89 collectservermapclass::iterator here = cservers.begin();
90 collectservermapclass::iterator end = cservers.end();
91
92 while (here != end) {
93 if ((*here).second.c != NULL) {
94 delete (*here).second.c;
95 }
96 ++here;
97 }
98 cservers.clear();
99}
100
101bool collectset::init (ostream &logout) {
102 collectservermapclass::iterator here = cservers.begin();
103 collectservermapclass::iterator end = cservers.end();
104
105 while (here != end) {
106 assert ((*here).second.c != NULL);
107 if ((*here).second.c != NULL) {
108 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
109
110 // configure this collection server
111
112 // note that we read build.cfg before collect.cfg so that the indexmaps
113 // are available to decode defaultindex, defaultsubcollection, and
114 // defaultlanguage
115
116 bool failed_build_cfg = false;
117 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
118 configinfo.collecthome, configinfo.collection)) {
119 failed_build_cfg = true;
120
121 outconvertclass text_t2ascii;
122 logout << text_t2ascii
123 << "Warning: couldn't read build.cfg file for collection \""
124 << configinfo.collection << "\""
125 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
126 << " collecthome=\"" << configinfo.collecthome << "\"\n";
127 }
128
129 bool failed_collect_cfg = false;
130 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
131 configinfo.collecthome, configinfo.collection)) {
132 failed_collect_cfg = true;
133 outconvertclass text_t2ascii;
134 logout << text_t2ascii
135 << "Warning: couldn't read collect.cfg file for collection \""
136 << configinfo.collection << "\""
137 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
138 << " collecthome=\"" << configinfo.collecthome << "\"\n";
139 }
140
141
142 bool is_colgroup = (*here).second.c->is_collection_group();
143
144 if (failed_collect_cfg) {
145 ++here;
146 continue;
147 }
148
149 if (failed_build_cfg && (!is_colgroup)) {
150 ++here;
151 continue;
152 }
153 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
154
155 if (!(*here).second.c->init (logout)) return false;
156
157 (*here).second.c->configure("httpdomain",httpdomain);
158 (*here).second.c->configure("httpprefix",httpprefix);
159 }
160 ++here;
161 }
162
163 return true;
164}
165
166collectservermapclass collectset::servers()
167{ return cservers;
168}
169
170
171void collectset::add_all_collections(const text_t &gsdlhome,
172 const text_t& collecthome)
173{
174 text_tarray collections;
175
176 if (read_dir(collecthome, collections)) {
177
178 text_tarray::const_iterator thiscol = collections.begin();
179 text_tarray::const_iterator endcol = collections.end();
180
181 while (thiscol != endcol) {
182
183 // ignore the modelcol
184 if (*thiscol == "modelcol") {
185 ++thiscol;
186 continue;
187 }
188
189 // create collection server for this collection
190 this->add_collection (*thiscol, gsdlhome, collecthome);
191
192 ++thiscol;
193 }
194
195 this->add_all_collection_groups(gsdlhome,collecthome);
196 }
197}
198
199// add_collection sets up the collectionserver and calls
200// add_collectserver
201void collectset::add_collection (const text_t& collection,
202 const text_t& gsdlhome,
203 const text_t& collecthome)
204{
205
206 this->remove_collection(collection);
207
208 // read config file to see if built with mg, mgpp, or lucene
209 text_t buildtype = "mg"; // mg is default
210 text_t infodbtype = "gdbm"; // gdbm is default
211
212 text_tarray cfgline;
213 text_t key;
214
215 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
216 char *build_cfgc = build_cfg.getcstr();
217 ifstream confin(build_cfgc);
218
219 if (confin) {
220 while (read_cfg_line(confin, cfgline) >= 0) {
221 if (cfgline.size() == 2) {
222 key = cfgline[0];
223 cfgline.erase(cfgline.begin());
224 if (key == "buildtype") {
225 buildtype = cfgline[0];
226 }
227 if (key == "infodbtype") {
228 infodbtype = cfgline[0];
229 }
230 }
231 }
232 confin.close();
233 }
234 delete []build_cfgc;
235
236 collectserver *cserver = new collectserver();
237
238 // Create a dbclass of the correct type
239 dbclass *db_ptr = NULL;
240
241#ifdef USE_SQLITE
242 if (infodbtype == "sqlite")
243 {
244 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
245 db_ptr = sql_db_ptr;
246
247 // add a sql browse filter
248 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
249 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
250 cserver->add_filter (sqlbrowsefilter);
251 }
252#endif
253
254 // Use GDBM if the infodb type is empty or not one of the values above
255 if (db_ptr == NULL)
256 {
257 db_ptr = new gdbmclass();
258 }
259
260 // add a null filter
261 filterclass *filter = new filterclass ();
262 cserver->add_filter (filter);
263
264 // add a browse filter
265 browsefilterclass *browsefilter = new browsefilterclass();
266 browsefilter->set_db_ptr(db_ptr);
267 cserver->add_filter (browsefilter);
268
269 if (buildtype == "mg") {
270 mgsearch = new mgsearchclass();
271
272 // add a query filter
273 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
274 queryfilter->set_db_ptr(db_ptr);
275 queryfilter->set_textsearchptr (mgsearch);
276 cserver->add_filter (queryfilter);
277
278 // add a mg source
279 mgsourceclass *mgsource = new mgsourceclass ();
280 mgsource->set_db_ptr(db_ptr);
281 mgsource->set_textsearchptr (mgsearch);
282 cserver->add_source (mgsource);
283 }
284 else if (buildtype == "mgpp") {
285 mgppsearch = new mgppsearchclass();
286
287 // add a query filter
288 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
289 queryfilter->set_db_ptr(db_ptr);
290 queryfilter->set_textsearchptr (mgppsearch);
291 cserver->add_filter (queryfilter);
292
293 // add a mg source
294 mgsourceclass *mgsource = new mgsourceclass ();
295 mgsource->set_db_ptr(db_ptr);
296 mgsource->set_textsearchptr (mgppsearch);
297 cserver->add_source (mgsource);
298 }
299 else if (buildtype == "lucene") {
300 lucenesearch = new lucenesearchclass();
301 lucenesearch->set_gsdlhome(gsdlhome);
302
303 // add a query filter
304 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
305 queryfilter->set_db_ptr(db_ptr);
306 queryfilter->set_textsearchptr (lucenesearch);
307 cserver->add_filter (queryfilter);
308
309 // add a lucene source
310 lucenesourceclass *lucenesource = new lucenesourceclass ();
311 lucenesource->set_db_ptr(db_ptr);
312 lucenesource->set_textsearchptr (lucenesearch);
313 cserver->add_source (lucenesource);
314 }
315
316 // inform collection server and everything it contains about its
317 // collection name
318 cserver->configure ("collection", collection);
319 cserver->configure ("gsdlhome", gsdlhome);
320 cserver->configure ("collecthome", collecthome);
321 cservers.addcollectserver (cserver);
322}
323
324void collectset::remove_all_collections () {
325
326 // first unload any cached mg databases
327 if (mgsearch != NULL) {
328 mgsearch->unload_database();
329 }
330
331 // now delete the collection server objects
332 collectservermapclass::iterator here = cservers.begin();
333 collectservermapclass::iterator end = cservers.end();
334
335 while (here != end) {
336 if ((*here).second.c != NULL) {
337 delete (*here).second.c;
338 }
339 ++here;
340 }
341 cservers.clear();
342}
343
344void collectset::add_collection_group(const text_t& collection,
345 const text_t& gsdlhome,
346 const text_t& collecthome)
347{
348 text_tarray group;
349
350 text_t collect_group_dir = filename_cat (collecthome, collection);
351
352 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
353 text_t is_collect_group;
354 text_tarray cfgline;
355 text_t key;
356 text_t build_cfg = filename_cat(collect_group_dir, "etc", "collect.cfg");
357 char *collect_cfgc = build_cfg.getcstr();
358 ifstream confin(collect_cfgc);
359
360 if (confin) {
361 while (read_cfg_line(confin, cfgline) >= 0) {
362 if (cfgline.size() == 2) {
363 key = cfgline[0];
364 cfgline.erase(cfgline.begin());
365 if (key == "collectgroup") {
366 is_collect_group = cfgline[0];
367 break;
368 }
369 }
370 }
371 confin.close();
372 }
373 delete []collect_cfgc;
374
375 if (is_collect_group == "true") {
376
377 if (read_dir (collect_group_dir, group)) {
378
379 text_tarray::const_iterator thiscol = group.begin();
380 text_tarray::const_iterator endcol = group.end();
381
382 while (thiscol != endcol) {
383 // ignore the modelcol
384 if (*thiscol == "etc") {
385 ++thiscol;
386 continue;
387 }
388
389 text_t group_col = filename_cat(collection,*thiscol);
390 this->add_collection (group_col, gsdlhome, collecthome);
391
392 ++thiscol;
393 }
394 }
395 }
396}
397
398void collectset::add_all_collection_groups (const text_t& gsdlhome,
399 const text_t& collecthome)
400
401{
402 collectservermapclass::iterator here = cservers.begin();
403 collectservermapclass::iterator end = cservers.end();
404
405 while (here != end) {
406 text_t collection = (*here).second.c->get_collection_name();
407 this->add_collection_group(collection,gsdlhome,collecthome);
408
409 ++here;
410 }
411}
412
413
414// remove_collection deletes the collection server of collection.
415// This only needs to be called if a collectionserver is to be
416// removed while the library is running. The destructor function
417// cleans up all collectservers when the program exits.
418void collectset::remove_collection (const text_t &collection) {
419
420 // do nothing if no collection server exists for this collection
421 if (cservers.getcollectserver(collection) == NULL) return;
422
423 // first unload any cached mg databases - we may need to do something
424 // similar to this for mgpp and lucene too
425 if (mgsearch != NULL) {
426 mgsearch->unload_database();
427 }
428
429 // now delete the collection server object
430 collectservermapclass::iterator here = cservers.begin();
431 collectservermapclass::iterator end = cservers.end();
432
433 while (here != end) {
434 if ((*here).second.c != NULL && (*here).first == collection) {
435 delete (*here).second.c;
436 cservers.erase (here);
437 return;
438 }
439 ++here;
440 }
441}
442
443
444// remove_collection deletes the collection server of collection.
445// This only needs to be called if a collectionserver is to be
446// removed while the library is running. The destructor function
447// cleans up all collectservers when the program exits.
448void collectset::remove_collection (const text_t &collection, ostream &logout) {
449
450 remove_collection(collection);
451
452 outconvertclass text_t2ascii;
453 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
454 << collection << "\n";
455}
456
457void collectset::configure(const text_t &key, const text_tarray &cfgline)
458{
459 if ((key == "collection") || (key == "collectdir")) return;
460
461 collectservermapclass::iterator here = cservers.begin();
462 collectservermapclass::iterator end = cservers.end();
463
464 while (here != end) {
465 assert ((*here).second.c != NULL);
466 if ((*here).second.c != NULL) {
467 if (key == "collectinfo") {
468 if ((*here).first == cfgline[0]) {
469 if (cfgline.size()==3) {
470 (*here).second.c->configure ("gsdlhome", cfgline[1]);
471 (*here).second.c->configure ("gdbmhome", cfgline[2]);
472 }
473 else {
474 (*here).second.c->configure ("gsdlhome", cfgline[1]);
475 (*here).second.c->configure ("collecthome", cfgline[2]);
476 (*here).second.c->configure ("gdbmhome", cfgline[3]);
477 }
478 }
479 } else {
480 (*here).second.c->configure (key, cfgline);
481 }
482 }
483
484 ++here;
485 }
486}
487
488void collectset::getCollectionList (text_tarray &collist)
489{
490 collist.erase(collist.begin(),collist.end());
491
492 collectservermapclass::iterator here = cservers.begin();
493 collectservermapclass::iterator end = cservers.end();
494 while (here != end) {
495 assert ((*here).second.c != NULL);
496 if ((*here).second.c != NULL) {
497 collist.push_back ((*here).second.c->get_collection_name());
498 }
499 ++here;
500 }
501}
502
Note: See TracBrowser for help on using the repository browser.