source: gsdl/trunk/runtime-src/src/colservr/collectset.cpp@ 19806

Last change on this file since 19806 was 19806, checked in by davidb, 15 years ago

Dr Bainbridge's fix to getting collectgroup to work again, which follows on Katherine's fix to get collectgroup working on Windows again. It now adds in only built collections by checking for the build.cfg in the index folder of a collection, and if that does not exist, by looking for a collect.cfg file in the etc folder to see if its collectiongroup field is set to true. This additional change continues to ensure that the mgsearch object is not created when there is actually no collection built with mg (which used to previously cause a server crash on Windows).

  • Property svn:keywords set to Author Date Id Revision
File size: 16.0 KB
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gdbmclass.h"
32#include "gsdltools.h"
33#include "fileutil.h"
34#include "filter.h"
35#include "browsefilter.h"
36#include "sqlbrowsefilter.h"
37#include "queryfilter.h"
38#include "mgqueryfilter.h"
39#include "mgppqueryfilter.h"
40#include "mgsource.h"
41#include "lucenequeryfilter.h"
42#include "lucenesource.h"
43
44#include <assert.h>
45
46#ifdef USE_SQLITE
47#include "sqlitedbclass.h"
48#endif
49
50#ifdef USE_MSSQL
51#include "mssqldbclass.h"
52#endif
53
54
55collectset::collectset (text_t& gsdlhome, text_t& collecthome)
56{
57 // gsdlhome and collecthome will be set as a result of calling this function
58 // collecthome will default to "<gsdlhome>/collect" if not explicitly
59 // specified in config file
60
61 text_tarray collections;
62
63 mgsearch = NULL;
64 mgppsearch = NULL;
65 lucenesearch = NULL;
66
67 // get gsdlhome (if we fail the error will be picked up later -- in
68 // cgiwrapper)
69
70 if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
71 if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
72 if (read_dir (collecthome, collections)) {
73
74 text_tarray::const_iterator thiscol = collections.begin();
75 text_tarray::const_iterator endcol = collections.end();
76
77 while (thiscol != endcol) {
78 // ignore the modelcol
79 if (*thiscol == "modelcol") {
80 ++thiscol;
81 continue;
82 }
83
84 this->add_collection (*thiscol, gsdlhome, collecthome);
85
86 ++thiscol;
87 }
88
89 this->add_all_collection_groups(gsdlhome, collecthome);
90 }
91 }
92 }
93
94 set_gsdl_env_vars(gsdlhome);
95}
96
97
98collectset::collectset (text_t& httpprefix_arg)
99{
100 httpprefix = httpprefix_arg;
101
102 mgsearch = NULL;
103 mgppsearch = NULL;
104 lucenesearch = NULL;
105
106}
107
108collectset::collectset ()
109{
110 mgsearch = NULL;
111 mgppsearch = NULL;
112 lucenesearch = NULL;
113}
114
115collectset::~collectset () {
116 collectservermapclass::iterator here = cservers.begin();
117 collectservermapclass::iterator end = cservers.end();
118
119 while (here != end) {
120 if ((*here).second.c != NULL) {
121 delete (*here).second.c;
122 }
123 ++here;
124 }
125 cservers.clear();
126}
127
128bool collectset::init (ostream &logout) {
129 collectservermapclass::iterator here = cservers.begin();
130 collectservermapclass::iterator end = cservers.end();
131
132 while (here != end) {
133 assert ((*here).second.c != NULL);
134 if ((*here).second.c != NULL) {
135 const colservrconf &configinfo = (*here).second.c->get_configinfo ();
136
137 // configure this collection server
138
139 // note that we read build.cfg before collect.cfg so that the indexmaps
140 // are available to decode defaultindex, defaultsubcollection, and
141 // defaultlanguage
142
143 bool failed_build_cfg = false;
144 if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
145 configinfo.collecthome, configinfo.collection)) {
146 failed_build_cfg = true;
147
148 outconvertclass text_t2ascii;
149 logout << text_t2ascii
150 << "Warning: couldn't read build.cfg file for collection \""
151 << configinfo.collection << "\""
152 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
153 << " collecthome=\"" << configinfo.collecthome << "\"\n";
154 }
155
156 bool failed_collect_cfg = false;
157 if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
158 configinfo.collecthome, configinfo.collection)) {
159 failed_collect_cfg = true;
160 outconvertclass text_t2ascii;
161 logout << text_t2ascii
162 << "Warning: couldn't read collect.cfg file for collection \""
163 << configinfo.collection << "\""
164 << " gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
165 << " collecthome=\"" << configinfo.collecthome << "\"\n";
166 }
167
168
169 bool is_colgroup = (*here).second.c->is_collection_group();
170
171 if (failed_collect_cfg) {
172 ++here;
173 continue;
174 }
175
176 if (failed_build_cfg && (!is_colgroup)) {
177 ++here;
178 continue;
179 }
180 // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
181
182 if (!(*here).second.c->init (logout)) return false;
183
184 (*here).second.c->configure("httpdomain",httpdomain);
185 (*here).second.c->configure("httpprefix",httpprefix);
186 }
187 ++here;
188 }
189
190 return true;
191}
192
193collectservermapclass collectset::servers()
194{ return cservers;
195}
196
197
198void collectset::add_all_collections(const text_t &gsdlhome,
199 const text_t& collecthome)
200{
201 text_tarray collections;
202
203 if (read_dir(collecthome, collections)) {
204
205 text_tarray::const_iterator thiscol = collections.begin();
206 text_tarray::const_iterator endcol = collections.end();
207
208 while (thiscol != endcol) {
209
210 // ignore the modelcol
211 if (*thiscol == "modelcol") {
212 ++thiscol;
213 continue;
214 }
215
216 // create collection server for this collection
217 this->add_collection (*thiscol, gsdlhome, collecthome);
218
219 ++thiscol;
220 }
221
222 this->add_all_collection_groups(gsdlhome,collecthome);
223 }
224}
225
226// add_collection sets up the collectionserver and calls
227// add_collectserver
228void collectset::add_collection (const text_t& collection,
229 const text_t& gsdlhome,
230 const text_t& collecthome)
231{
232
233 this->remove_collection(collection);
234
235 // read config file to see if built with mg, mgpp, or lucene
236 text_t buildtype = "mg"; // mg is default
237 text_t infodbtype = "gdbm"; // gdbm is default
238
239 collectserver *cserver = NULL;
240
241 text_tarray cfgline;
242 text_t key;
243
244 text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
245 if (file_exists (build_cfg)) {
246 char *build_cfgc = build_cfg.getcstr();
247 ifstream confin(build_cfgc);
248
249 if (confin) {
250 while (read_cfg_line(confin, cfgline) >= 0) {
251 if (cfgline.size() == 2) {
252 key = cfgline[0];
253 cfgline.erase(cfgline.begin());
254 if (key == "buildtype") {
255 buildtype = cfgline[0];
256 }
257 if (key == "infodbtype") {
258 infodbtype = cfgline[0];
259 }
260 }
261 }
262 confin.close();
263 }
264 delete []build_cfgc;
265
266 cserver = new collectserver();
267
268 // Create a dbclass of the correct type
269 dbclass *db_ptr = NULL;
270
271#ifdef USE_SQLITE
272 if (infodbtype == "sqlite")
273 {
274 sqlitedbclass *sql_db_ptr = new sqlitedbclass();
275 db_ptr = sql_db_ptr;
276
277 // add a sql browse filter
278 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
279 sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
280 cserver->add_filter (sqlbrowsefilter);
281 }
282#endif
283
284#ifdef USE_MSSQL
285 if (infodbtype == "mssql")
286 {
287 mssqldbclass *mssql_db_ptr = new mssqldbclass();
288 db_ptr = mssql_db_ptr;
289
290 // add a sql browse filter
291 sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
292 sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
293 cserver->add_filter (sqlbrowsefilter);
294 }
295#endif
296
297 // Use GDBM if the infodb type is empty or not one of the values above
298 if (db_ptr == NULL) {
299 db_ptr = new gdbmclass();
300 }
301
302 // add a null filter
303 filterclass *filter = new filterclass ();
304 cserver->add_filter (filter);
305
306 // add a browse filter
307 browsefilterclass *browsefilter = new browsefilterclass();
308 browsefilter->set_db_ptr(db_ptr);
309 cserver->add_filter (browsefilter);
310
311 if (buildtype == "mg") {
312 mgsearch = new mgsearchclass();
313
314 // add a query filter
315 mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
316 queryfilter->set_db_ptr(db_ptr);
317 queryfilter->set_textsearchptr (mgsearch);
318 cserver->add_filter (queryfilter);
319
320 // add a mg source
321 mgsourceclass *mgsource = new mgsourceclass ();
322 mgsource->set_db_ptr(db_ptr);
323 mgsource->set_textsearchptr (mgsearch);
324 cserver->add_source (mgsource);
325 }
326 else if (buildtype == "mgpp") {
327 mgppsearch = new mgppsearchclass();
328
329 // add a query filter
330 mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
331 queryfilter->set_db_ptr(db_ptr);
332 queryfilter->set_textsearchptr (mgppsearch);
333 cserver->add_filter (queryfilter);
334
335 // add a mg source
336 mgsourceclass *mgsource = new mgsourceclass ();
337 mgsource->set_db_ptr(db_ptr);
338 mgsource->set_textsearchptr (mgppsearch);
339 cserver->add_source (mgsource);
340 }
341 else if (buildtype == "lucene") {
342 lucenesearch = new lucenesearchclass();
343 lucenesearch->set_gsdlhome(gsdlhome);
344
345 // add a query filter
346 lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
347 queryfilter->set_db_ptr(db_ptr);
348 queryfilter->set_textsearchptr (lucenesearch);
349 cserver->add_filter (queryfilter);
350
351 // add a lucene source
352 lucenesourceclass *lucenesource = new lucenesourceclass ();
353 lucenesource->set_db_ptr(db_ptr);
354 lucenesource->set_textsearchptr (lucenesearch);
355 cserver->add_source (lucenesource);
356 }
357 else {
358 cerr << "Warning: unrecognized buildtype" << buildtype << endl;
359 }
360
361 }
362 else {
363 // see if it is a collectgroup col
364
365
366 // This routine essentially exists twice in the code now (see add_collection_group)
367 // factor out as support routine
368
369 text_t collect_cfg = filename_cat(collecthome, collection, "etc", "collect.cfg");
370 if (file_exists (collect_cfg)) {
371 char *collect_cfgc = collect_cfg.getcstr();
372 ifstream confin(collect_cfgc);
373
374 text_t is_collect_group = "false";
375
376 if (confin) {
377 while (read_cfg_line(confin, cfgline) >= 0) {
378 if (cfgline.size() == 2) {
379 key = cfgline[0];
380
381 cfgline.erase(cfgline.begin());
382 if (key == "collectgroup") {
383 is_collect_group = cfgline[0];
384 }
385 }
386 }
387 confin.close();
388 }
389 delete []collect_cfgc;
390
391 if (is_collect_group != "true") {
392 // an unbuilt leaf collection
393 return;
394 }
395
396 // by this point we know we will need a cserver
397 cserver = new collectserver();
398
399 }
400 else {
401 // no collect.cfg => filter it out from list of collections added
402 return;
403 }
404 }
405
406 // inform collection server and everything it contains about its
407 // collection name
408 cserver->configure ("collection", collection);
409 cserver->configure ("gsdlhome", gsdlhome);
410 cserver->configure ("collecthome", collecthome);
411 cservers.addcollectserver (cserver);
412}
413
414void collectset::remove_all_collections () {
415
416 // first unload any cached mg databases
417 if (mgsearch != NULL) {
418 mgsearch->unload_database();
419 }
420
421 // now delete the collection server objects
422 collectservermapclass::iterator here = cservers.begin();
423 collectservermapclass::iterator end = cservers.end();
424
425 while (here != end) {
426 if ((*here).second.c != NULL) {
427 delete (*here).second.c;
428 }
429 ++here;
430 }
431 cservers.clear();
432}
433
434void collectset::add_collection_group(const text_t& collection,
435 const text_t& gsdlhome,
436 const text_t& collecthome)
437{
438 text_tarray group;
439
440 text_t collect_group_dir = filename_cat (collecthome, collection);
441
442 // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
443 text_t is_collect_group;
444 text_tarray cfgline;
445 text_t key;
446 text_t collect_cfg = filename_cat(collect_group_dir, "etc", "collect.cfg");
447 char *collect_cfgc = collect_cfg.getcstr();
448 ifstream confin(collect_cfgc);
449
450 if (confin) {
451 while (read_cfg_line(confin, cfgline) >= 0) {
452 if (cfgline.size() == 2) {
453 key = cfgline[0];
454 cfgline.erase(cfgline.begin());
455 if (key == "collectgroup") {
456 is_collect_group = cfgline[0];
457 break;
458 }
459 }
460 }
461 confin.close();
462 }
463 delete []collect_cfgc;
464
465 if (is_collect_group == "true") {
466 if (read_dir (collect_group_dir, group)) {
467
468 text_tarray::const_iterator thiscol = group.begin();
469 text_tarray::const_iterator endcol = group.end();
470
471 while (thiscol != endcol) {
472 // ignore the etc directory
473 if (*thiscol == "etc") {
474 ++thiscol;
475 continue;
476 }
477
478 //text_t group_col = filename_cat(collection,*thiscol);
479 // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted to \ on windows
480 text_t group_col = collection + "/" + *thiscol;
481 this->add_collection (group_col, gsdlhome, collecthome);
482
483 ++thiscol;
484 }
485 }
486 }
487}
488
489void collectset::add_all_collection_groups (const text_t& gsdlhome,
490 const text_t& collecthome)
491
492{
493 collectservermapclass::iterator here = cservers.begin();
494 collectservermapclass::iterator end = cservers.end();
495
496 while (here != end) {
497 text_t collection = (*here).second.c->get_collection_name();
498 this->add_collection_group(collection,gsdlhome,collecthome);
499
500 ++here;
501 }
502}
503
504
505// remove_collection deletes the collection server of collection.
506// This only needs to be called if a collectionserver is to be
507// removed while the library is running. The destructor function
508// cleans up all collectservers when the program exits.
509void collectset::remove_collection (const text_t &collection) {
510
511 // do nothing if no collection server exists for this collection
512 if (cservers.getcollectserver(collection) == NULL) return;
513
514 // first unload any cached mg databases - we may need to do something
515 // similar to this for mgpp and lucene too
516 if (mgsearch != NULL) {
517 mgsearch->unload_database();
518 }
519
520 // now delete the collection server object
521 collectservermapclass::iterator here = cservers.begin();
522 collectservermapclass::iterator end = cservers.end();
523
524 while (here != end) {
525 if ((*here).second.c != NULL && (*here).first == collection) {
526 delete (*here).second.c;
527 cservers.erase (here);
528 return;
529 }
530 ++here;
531 }
532}
533
534
535// remove_collection deletes the collection server of collection.
536// This only needs to be called if a collectionserver is to be
537// removed while the library is running. The destructor function
538// cleans up all collectservers when the program exits.
539void collectset::remove_collection (const text_t &collection, ostream &logout) {
540
541 remove_collection(collection);
542
543 outconvertclass text_t2ascii;
544 logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
545 << collection << "\n";
546}
547
548void collectset::configure(const text_t &key, const text_tarray &cfgline)
549{
550 if ((key == "collection") || (key == "collectdir")) return;
551
552 collectservermapclass::iterator here = cservers.begin();
553 collectservermapclass::iterator end = cservers.end();
554
555 while (here != end) {
556 assert ((*here).second.c != NULL);
557 if ((*here).second.c != NULL) {
558 if (key == "collectinfo") {
559 if ((*here).first == cfgline[0]) {
560 if (cfgline.size()==3) {
561 (*here).second.c->configure ("gsdlhome", cfgline[1]);
562 (*here).second.c->configure ("gdbmhome", cfgline[2]);
563 }
564 else {
565 (*here).second.c->configure ("gsdlhome", cfgline[1]);
566 (*here).second.c->configure ("collecthome", cfgline[2]);
567 (*here).second.c->configure ("gdbmhome", cfgline[3]);
568 }
569 }
570 } else {
571 (*here).second.c->configure (key, cfgline);
572 }
573 }
574
575 ++here;
576 }
577}
578
579void collectset::getCollectionList (text_tarray &collist)
580{
581 collist.erase(collist.begin(),collist.end());
582
583 collectservermapclass::iterator here = cservers.begin();
584 collectservermapclass::iterator end = cservers.end();
585 while (here != end) {
586 assert ((*here).second.c != NULL);
587 if ((*here).second.c != NULL) {
588 collist.push_back ((*here).second.c->get_collection_name());
589 }
590 ++here;
591 }
592}
593
Note: See TracBrowser for help on using the repository browser.