source: trunk/gsdl/src/colservr/collectset.cpp@ 9937

Last change on this file since 9937 was 9937, checked in by kjdon, 19 years ago

modified the filters/sources etc so that if an indexstem is specified in the build.cfg file, then this will be used as the root of the index/gdbm filenames instead of the collection name. colleciton name still used by default. this means that we can rename a coll directory without rebuilding.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: collectset.cpp 9937 2005-05-24 02:21:02Z kjdon $
25 *
26 *********************************************************************/
27
28
29#include "collectserver.h"
30#include "filter.h"
31#include "browsefilter.h"
32#include "queryfilter.h"
33#include "infodbclass.h"
34#include "mgqueryfilter.h"
35#include "mgppqueryfilter.h"
36#include "mggdbmsource.h"
37#include "lucenequeryfilter.h"
38#include "lucenegdbmsource.h"
39
40#include "fileutil.h"
41#include <assert.h>
42
43#include "colservrconfig.h"
44#include "recptconfig.h"
45#include "fileutil.h"
46#include "collectset.h"
47
48collectset::collectset (text_t &gsdlhome) {
49
50 text_tarray collections;
51 text_t collectdir;
52
53 // get gsdlhome (if we fail the error will be picked up later -- in
54 // cgiwrapper)
55 if (site_cfg_read (gsdlhome, httpdomain, httpprefix)) {
56 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
57 collectdir = filename_cat (gsdlhome, "collect");
58 if (read_dir (collectdir, collections)) {
59
60 text_tarray::const_iterator thiscol = collections.begin();
61 text_tarray::const_iterator endcol = collections.end();
62
63 while (thiscol != endcol) {
64 // ignore the modelcol
65 if (*thiscol == "modelcol") {
66 ++thiscol;
67 continue;
68 }
69
70 // read config file to see if built with mg, mgpp, or lucene
71 text_t buildtype = "mg"; // mg is default
72 text_tarray cfgline;
73 text_t key;
74 text_t filename = filename_cat(collectdir, *thiscol, "index" , "build.cfg");
75 ifstream confin(filename.getcstr());
76
77 if (confin) {
78 while (read_cfg_line(confin, cfgline) >= 0) {
79 if (cfgline.size() ==2 ) {
80 key = cfgline[0];
81 cfgline.erase(cfgline.begin());
82 if (key =="buildtype") {
83 buildtype = cfgline[0];
84 break;
85 }
86 }
87 }
88 }
89
90 confin.close();
91
92 // this memory is created but never destroyed
93 // we're also not doing any error checking to make sure we didn't
94 // run out of memory
95 collectserver *cserver = new collectserver();
96 gdbmclass *gdbmhandler = new gdbmclass();
97
98 // add a null filter
99 filterclass *filter = new filterclass ();
100 cserver->add_filter (filter);
101
102 // add a browse filter
103 browsefilterclass *browsefilter = new browsefilterclass();
104 browsefilter->set_gdbmptr (gdbmhandler);
105 cserver->add_filter (browsefilter);
106
107 if (buildtype == "mg") {
108 mgsearch = new mgsearchclass();
109
110 // add a query filter
111 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
112 queryfilter->set_gdbmptr (gdbmhandler);
113 queryfilter->set_textsearchptr (mgsearch);
114 cserver->add_filter (queryfilter);
115
116 // add a mg and gdbm source
117 mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
118 mggdbmsource->set_gdbmptr (gdbmhandler);
119 mggdbmsource->set_textsearchptr (mgsearch);
120 cserver->add_source (mggdbmsource);
121
122 } else if (buildtype == "mgpp") {
123
124 mgppsearch = new mgppsearchclass();
125
126 // add a query filter
127 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
128 queryfilter->set_gdbmptr (gdbmhandler);
129 queryfilter->set_textsearchptr (mgppsearch);
130 cserver->add_filter (queryfilter);
131
132 // add a mg and gdbm source
133 mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
134 mggdbmsource->set_gdbmptr (gdbmhandler);
135 mggdbmsource->set_textsearchptr (mgppsearch);
136 cserver->add_source (mggdbmsource);
137 } else if (buildtype == "lucene") {
138 lucenesearch = new lucenesearchclass();
139
140 // add a query filter
141 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
142 queryfilter->set_gdbmptr (gdbmhandler);
143 queryfilter->set_textsearchptr (lucenesearch);
144 cserver->add_filter (queryfilter);
145
146 // add a lucene and gdbm source
147 lucenegdbmsourceclass *lucenegdbmsource = new lucenegdbmsourceclass ();
148 lucenegdbmsource->set_gdbmptr (gdbmhandler);
149 lucenegdbmsource->set_textsearchptr (lucenesearch);
150 cserver->add_source (lucenegdbmsource);
151 }
152
153
154 // inform collection server and everything it contains about its
155 // collection name
156 cserver->configure ("collection", *thiscol);
157 // AZIZ: added on 10/10/00
158 // the cserver object does not have a reference to gsdlhome
159 cserver->configure ("gsdlhome", gsdlhome);
160
161 // GRB: removed proto.add_collectserver (cserver);
162 // GRB: added to build our own cservers list
163 cservers.addcollectserver (cserver);
164
165 ++thiscol;
166 }
167 }
168 }
169 }
170}
171
172collectset::~collectset () {
173 collectservermapclass::iterator here = cservers.begin();
174 collectservermapclass::iterator end = cservers.end();
175
176 while (here != end) {
177 if ((*here).second.c != NULL) {
178 delete (*here).second.c;
179 }
180 ++here;
181 }
182 cservers.clear();
183}
184
185bool collectset::init (ostream &logout) {
186 collectservermapclass::iterator here = cservers.begin();
187 collectservermapclass::iterator end = cservers.end();
188
189 while (here != end) {
190 assert ((*here).second.c != NULL);
191 if ((*here).second.c != NULL) {
192 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
193
194 // configure this collection server
195
196 // note that we read build.cfg before collect.cfg so that the indexmaps
197 // are available to decode defaultindex, defaultsubcollection, and
198 // defaultlanguage
199 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
200 configinfo.collection)) {
201 outconvertclass text_t2ascii;
202 logout << text_t2ascii
203 << "Warning: couldn't read build.cfg file for collection \"" //****
204 << configinfo.collection << "\", gsdlhome=\""
205 << configinfo.gsdlhome << "\"\n";
206 ++here;
207 continue;
208 }
209
210 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
211 configinfo.collection)) {
212 outconvertclass text_t2ascii;
213 logout << text_t2ascii
214 << "Warning: couldn't read collect.cfg file for collection \""
215 << configinfo.collection << "\", gsdlhome=\""
216 << configinfo.gsdlhome << "\"\n";
217 ++here;
218 continue;
219 }
220
221 if (!(*here).second.c->init (logout)) return false;
222
223 (*here).second.c->configure("httpdomain",httpdomain);
224 (*here).second.c->configure("httpprefix",httpprefix);
225 }
226 ++here;
227 }
228
229 return true;
230}
231
232collectservermapclass collectset::servers()
233{ return cservers;
234}
235
236
237void collectset::add_all_collections(const text_t &gsdlhome) {
238
239 text_tarray collections;
240 text_t collectdir = filename_cat(gsdlhome, "collect");
241 if (read_dir(collectdir, collections)) {
242
243 text_tarray::const_iterator thiscol = collections.begin();
244 text_tarray::const_iterator endcol = collections.end();
245
246 while (thiscol != endcol) {
247
248 // ignore the modelcol
249 if (*thiscol == "modelcol") {
250 ++thiscol;
251 continue;
252 }
253
254 // create collection server and add to null protocol
255 this->add_collection (*thiscol, gsdlhome);
256
257 ++thiscol;
258 }
259 }
260}
261
262// add_collection sets up the collectionserver and calls
263// add_collectserver
264void collectset::add_collection (const text_t &collection,
265 const text_t &gsdlhome) {
266
267 // if an old collection server exists for this collection we should
268 // delete it first
269 collectservermapclass::iterator here = cservers.begin();
270 collectservermapclass::iterator end = cservers.end();
271 while (here != end) {
272 if ((*here).second.c != NULL && (*here).first == collection) {
273 delete (*here).second.c;
274 cservers.erase (here);
275 break;
276 }
277 ++here;
278 }
279
280 // read config file to see if built with mg, mgpp, or lucene
281 // -- we can rely on the collection (and therefore the build.cfg)
282 // being here since this is the null protocol - a nicer way to
283 // do this would be preferable though - Stefan.
284 text_t buildtype = "mg"; // mg is default
285
286 text_tarray cfgline;
287 text_t key;
288 text_t build_cfg = filename_cat(gsdlhome, "collect", collection, "index", "build.cfg");
289 char *build_cfgc = build_cfg.getcstr();
290 ifstream confin(build_cfgc);
291
292 if (confin) {
293 while (read_cfg_line(confin, cfgline) >= 0) {
294 if (cfgline.size() == 2) {
295 key = cfgline[0];
296 cfgline.erase(cfgline.begin());
297 if (key == "buildtype") {
298 buildtype = cfgline[0];
299 break;
300 }
301 }
302 }
303 confin.close();
304 }
305 delete []build_cfgc;
306
307 collectserver *cserver = new collectserver();
308 gdbmclass *gdbmhandler = new gdbmclass();
309
310 // add a null filter
311 filterclass *filter = new filterclass ();
312 cserver->add_filter (filter);
313
314 // add a browse filter
315 browsefilterclass *browsefilter = new browsefilterclass();
316 browsefilter->set_gdbmptr (gdbmhandler);
317
318 cserver->add_filter (browsefilter);
319
320 if (buildtype == "mg") {
321 mgsearch = new mgsearchclass();
322
323 // add a query filter
324 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
325 queryfilter->set_gdbmptr (gdbmhandler);
326 queryfilter->set_textsearchptr (mgsearch);
327 cserver->add_filter (queryfilter);
328
329 // add a mg and gdbm source
330 mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
331 mggdbmsource->set_gdbmptr (gdbmhandler);
332 mggdbmsource->set_textsearchptr (mgsearch);
333 cserver->add_source (mggdbmsource);
334
335 } else if (buildtype == "mgpp") {
336
337 mgppsearch = new mgppsearchclass();
338
339 // add a query filter
340 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
341 queryfilter->set_gdbmptr (gdbmhandler);
342 queryfilter->set_textsearchptr (mgppsearch);
343 cserver->add_filter (queryfilter);
344
345 // add a mg and gdbm source
346 mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
347 mggdbmsource->set_gdbmptr (gdbmhandler);
348 mggdbmsource->set_textsearchptr (mgppsearch);
349 cserver->add_source (mggdbmsource);
350
351 } else if (buildtype == "lucene") {
352 lucenesearch = new lucenesearchclass();
353
354 // add a query filter
355 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
356 queryfilter->set_gdbmptr (gdbmhandler);
357 queryfilter->set_textsearchptr (lucenesearch);
358 cserver->add_filter (queryfilter);
359
360 // add a lucene and gdbm source
361 lucenegdbmsourceclass *lucenegdbmsource = new lucenegdbmsourceclass ();
362 lucenegdbmsource->set_gdbmptr (gdbmhandler);
363 lucenegdbmsource->set_textsearchptr (lucenesearch);
364 cserver->add_source (lucenegdbmsource);
365 }
366
367 // inform collection server and everything it contains about its
368 // collection name
369 cserver->configure ("collection", collection);
370 cserver->configure ("gsdlhome", gsdlhome);
371
372 /* Removed from add_collection 24/11/2000; already done elsewhere in collectset.
373 // configure receptionist's collectinfo structure
374 text_tarray colinfo;
375 colinfo.push_back (collection);
376 colinfo.push_back (gsdlhome);
377 colinfo.push_back (gdbmhome);
378 */
379 cservers.addcollectserver (cserver);
380}
381
382void collectset::remove_all_collections () {
383
384 // first unload any cached mg databases
385 if (mgsearch != NULL) {
386 mgsearch->unload_database();
387 }
388
389 // now delete the collection server objects
390 collectservermapclass::iterator here = cservers.begin();
391 collectservermapclass::iterator end = cservers.end();
392
393 while (here != end) {
394 if ((*here).second.c != NULL) {
395 delete (*here).second.c;
396 }
397 ++here;
398 }
399 cservers.clear();
400}
401
402
403// remove_collection deletes the collection server of collection.
404// This only needs to be called if a collectionserver is to be
405// removed while the library is running. The destructor function
406// cleans up all collectservers when the program exits.
407void collectset::remove_collection (const text_t &collection, ostream &logout) {
408
409 // do nothing if no collection server exists for this collection
410 if (cservers.getcollectserver(collection) == NULL) return;
411
412 // first unload any cached mg databases - we may need to do something
413 // similar to this for mgpp and lucene too
414 if (mgsearch != NULL) {
415 mgsearch->unload_database();
416 }
417
418 // now delete the collection server object
419 collectservermapclass::iterator here = cservers.begin();
420 collectservermapclass::iterator end = cservers.end();
421
422 while (here != end) {
423 if ((*here).second.c != NULL && (*here).first == collection) {
424 delete (*here).second.c;
425 cservers.erase (here);
426 return;
427 }
428 ++here;
429 }
430 outconvertclass text_t2ascii;
431 logout << text_t2ascii << "nullproto::remove_collection: failed to remove collectserver for "
432 << collection << "\n";
433}
434
435void collectset::configure(const text_t &key, const text_tarray &cfgline)
436{
437 if (key == "collection" || key == "collectdir") return;
438
439 collectservermapclass::iterator here = cservers.begin();
440 collectservermapclass::iterator end = cservers.end();
441
442 while (here != end) {
443 assert ((*here).second.c != NULL);
444 if ((*here).second.c != NULL) {
445 if (key == "collectinfo") {
446 if ((*here).first == cfgline[0]) {
447 (*here).second.c->configure ("gsdlhome", cfgline[1]);
448 (*here).second.c->configure ("gdbmhome", cfgline[2]);
449 }
450 } else {
451 (*here).second.c->configure (key, cfgline);
452 }
453 }
454
455 ++here;
456 }
457}
458
459void collectset::getCollectionList (text_tarray &collist)
460{
461 collist.erase(collist.begin(),collist.end());
462
463 collectservermapclass::iterator here = cservers.begin();
464 collectservermapclass::iterator end = cservers.end();
465 while (here != end) {
466 assert ((*here).second.c != NULL);
467 if ((*here).second.c != NULL) {
468 collist.push_back ((*here).second.c->get_collection_name());
469 }
470 ++here;
471 }
472}
473
474void collectset::setReceptionistServers(receptionist &recpt, text_t &gsdlhome)
475{
476 collectservermapclass::iterator here = cservers.begin();
477 collectservermapclass::iterator end = cservers.end();
478 while (here != end) {
479 assert ((*here).second.c != NULL);
480
481 text_tarray colinfo;
482 colinfo.push_back((*here).second.c->get_collection_name());
483 colinfo.push_back(gsdlhome);
484 colinfo.push_back(gsdlhome);
485 recpt.configure("collectinfo", colinfo);
486
487 ++here;
488 }
489}
Note: See TracBrowser for help on using the repository browser.