root/main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp @ 21472

Revision 21472, 16.6 KB (checked in by mdewsnip, 10 years ago)

Created USE_GDBM and USE_JDBM defines that work similar to USE_SQLITE and USE_MSSQL for controlling which infodb types are compiled in. Currently these are not configurable through the configure scripts and are always on.

  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "queryfilter.h"
37
38#ifdef ENABLE_MG
39#include "mgqueryfilter.h"
40#include "mgsource.h"
41#endif
42#ifdef ENABLE_MGPP
43#include "mgppqueryfilter.h"
44#include "mgppsource.h"
45#endif
46#ifdef ENABLE_LUCENE
47#include "lucenequeryfilter.h"
48#include "lucenesource.h"
49#endif
50
51#include <assert.h>
52
53#ifdef USE_GDBM
54#include "gdbmclass.h"
55#endif
56
57#ifdef USE_JDBM
58#include "jdbmnaiveclass.h"
59#endif
60
61#ifdef USE_SQLITE
62#include "sqlitedbclass.h"
63#endif
64
65#ifdef USE_MSSQL
66#include "mssqldbclass.h"
67#endif
68
69
70collectset::collectset (text_t& gsdlhome, text_t& collecthome)
71{
72  // gsdlhome and collecthome will be set as a result of calling this function
73  // collecthome will default to "<gsdlhome>/collect" if not explicitly
74  // specified in config file
75
76  text_tarray collections;
77
78#ifdef ENABLE_MG
79  mgsearch = NULL;
80#endif
81#ifdef ENABLE_MGPP
82  mgppsearch = NULL;
83#endif
84#ifdef ENABLE_LUCENE
85  lucenesearch = NULL;
86#endif
87
88  // get gsdlhome (if we fail the error will be picked up later -- in
89  // cgiwrapper)
90
91  if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
92    if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
93      if (read_dir (collecthome, collections)) {
94   
95    text_tarray::const_iterator thiscol = collections.begin();
96    text_tarray::const_iterator endcol = collections.end();
97
98    while (thiscol != endcol) {
99      // ignore the modelcol
100      if (*thiscol == "modelcol") {
101        ++thiscol;
102        continue;
103      }
104
105      this->add_collection (*thiscol, gsdlhome, collecthome);
106
107      ++thiscol;
108    }
109
110    this->add_all_collection_groups(gsdlhome, collecthome);
111      }
112    }
113  }
114
115  set_gsdl_env_vars(gsdlhome);
116}
117
118
119collectset::collectset (text_t& httpprefix_arg)
120{
121  httpprefix = httpprefix_arg;
122
123#ifdef ENABLE_MG
124  mgsearch = NULL;
125#endif
126#ifdef ENABLE_MGPP
127  mgppsearch = NULL;
128#endif
129#ifdef ENABLE_LUCENE
130  lucenesearch = NULL;
131#endif
132
133}
134
135collectset::collectset ()
136{
137#ifdef ENABLE_MG
138  mgsearch = NULL;
139#endif
140#ifdef ENABLE_MGPP
141  mgppsearch = NULL;
142#endif
143#ifdef ENABLE_LUCENE
144  lucenesearch = NULL;
145#endif
146}
147
148collectset::~collectset () {
149  collectservermapclass::iterator here = cservers.begin();
150  collectservermapclass::iterator end = cservers.end();
151
152  while (here != end) {
153    if ((*here).second.c != NULL) {
154      delete (*here).second.c;
155    }
156    ++here;
157  }
158  cservers.clear();
159}
160
161bool collectset::init (ostream &logout) {
162  collectservermapclass::iterator here = cservers.begin();
163  collectservermapclass::iterator end = cservers.end();
164
165  while (here != end) {
166    assert ((*here).second.c != NULL);
167    if ((*here).second.c != NULL) {
168      const colservrconf &configinfo = (*here).second.c->get_configinfo ();
169
170      // configure this collection server
171
172      // note that we read build.cfg before collect.cfg so that the indexmaps
173      // are available to decode defaultindex, defaultsubcollection, and
174      // defaultlanguage
175
176      bool failed_build_cfg = false;
177      if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
178               configinfo.collecthome, configinfo.collection)) {
179    failed_build_cfg = true;
180
181    outconvertclass text_t2ascii;
182    logout << text_t2ascii
183           << "Warning: couldn't read build.cfg file for collection \""
184           << configinfo.collection << "\""
185           << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
186           << "  collecthome=\"" << configinfo.collecthome << "\"\n";
187      }
188
189      bool failed_collect_cfg = false;
190      if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
191                 configinfo.collecthome, configinfo.collection)) {
192    failed_collect_cfg = true;
193    outconvertclass text_t2ascii;
194    logout << text_t2ascii
195           << "Warning: couldn't read collect.cfg file for collection \""
196           << configinfo.collection << "\""
197           << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
198           << "  collecthome=\"" << configinfo.collecthome << "\"\n";
199      }
200
201     
202      bool is_colgroup = (*here).second.c->is_collection_group();
203
204      if (failed_collect_cfg) {
205    ++here;
206    continue;
207      }
208
209      if (failed_build_cfg && (!is_colgroup)) {
210    ++here;
211    continue;
212      }
213      // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
214
215      if (!(*here).second.c->init (logout)) return false;
216
217      (*here).second.c->configure("httpdomain",httpdomain);
218      (*here).second.c->configure("httpprefix",httpprefix);
219    }
220    ++here;
221  }
222
223  return true;
224}
225
226collectservermapclass collectset::servers()
227{ return cservers;
228}
229
230
231void collectset::add_all_collections(const text_t &gsdlhome,
232                     const text_t& collecthome)
233{
234  text_tarray collections;
235
236  if (read_dir(collecthome, collections)) {
237
238    text_tarray::const_iterator thiscol = collections.begin();
239    text_tarray::const_iterator endcol = collections.end();
240
241    while (thiscol != endcol) {
242   
243      // ignore the modelcol
244      if (*thiscol == "modelcol") {
245    ++thiscol;
246    continue;
247      }
248   
249      // create collection server for this collection
250      this->add_collection (*thiscol, gsdlhome, collecthome);
251   
252      ++thiscol;
253    }
254
255    this->add_all_collection_groups(gsdlhome,collecthome);
256  }
257}
258
259bool collectset::collection_is_collect_group (const text_t& collect_dir)
260{
261  text_t is_collect_group_str = "false";
262  text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
263
264  if (file_exists(collect_cfg)) {
265    char *collect_cfgc = collect_cfg.getcstr();
266    ifstream confin(collect_cfgc);
267
268    if (confin) {
269      text_tarray cfgline;
270
271      while (read_cfg_line(confin, cfgline) >= 0) {
272    if (cfgline.size() == 2) {
273      text_t key = cfgline[0];
274      cfgline.erase(cfgline.begin());
275      if (key == "collectgroup") {
276        is_collect_group_str = cfgline[0];
277        break;
278      }
279    }
280      }
281
282      confin.close();
283    }
284
285    delete []collect_cfgc;
286  }
287
288  bool is_collect_group = (is_collect_group_str == "true") ? true : false;
289
290  return is_collect_group;
291}
292
293
294// add_collection sets up the collectionserver and calls
295// add_collectserver
296void collectset::add_collection (const text_t& collection,
297                 const text_t& gsdlhome,
298                 const text_t& collecthome)
299{
300  // read config file to see if built with mg, mgpp, or lucene
301  text_t buildtype = "mg"; // mg is default
302  text_t infodbtype = "gdbm"; // gdbm is default
303
304  this->remove_collection(collection);
305 
306  collectserver *cserver = NULL;
307
308  text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
309  if (file_exists (build_cfg)) {
310    char *build_cfgc = build_cfg.getcstr();
311    ifstream confin(build_cfgc);
312   
313    if (confin) {
314      text_tarray cfgline;
315
316      while (read_cfg_line(confin, cfgline) >= 0) {
317    if (cfgline.size() == 2) {
318      text_t key = cfgline[0];
319      cfgline.erase(cfgline.begin());
320      if (key == "buildtype") {
321        buildtype = cfgline[0];
322      }
323      if (key == "infodbtype") {
324        infodbtype = cfgline[0];
325      }
326    }
327      }
328      confin.close();
329    }
330    delete []build_cfgc;
331
332    cserver = new collectserver();
333
334    // Create a dbclass of the correct type
335    dbclass *db_ptr = NULL;
336
337#ifdef USE_SQLITE
338    if (infodbtype == "sqlite")
339      {
340    sqlitedbclass *sql_db_ptr = new sqlitedbclass();
341    db_ptr = sql_db_ptr;
342
343    // add a sql browse filter
344    sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
345    sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
346    cserver->add_filter (sqlbrowsefilter); 
347      }
348#endif
349 
350#ifdef USE_MSSQL
351    if (infodbtype == "mssql")
352      {
353    mssqldbclass *mssql_db_ptr = new mssqldbclass();
354    db_ptr = mssql_db_ptr;
355
356    // add a sql browse filter
357    sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
358    sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
359    cserver->add_filter (sqlbrowsefilter); 
360      }
361#endif
362
363#ifdef USE_JDBM
364    if (infodbtype == "jdbm") {
365
366    jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
367    db_ptr = jdbm_db_ptr;
368    }
369#endif
370
371    // Use GDBM if the infodb type is empty or not one of the values above
372    if (db_ptr == NULL) {
373#ifdef USE_GDBM
374      db_ptr = new gdbmclass();
375#endif
376    }
377
378    // add a null filter
379    filterclass *filter = new filterclass ();
380    cserver->add_filter (filter);
381 
382    // add a browse filter
383    browsefilterclass *browsefilter = new browsefilterclass();
384    browsefilter->set_db_ptr(db_ptr);
385    cserver->add_filter (browsefilter); 
386
387    if (buildtype == "mg") {
388#ifdef ENABLE_MG
389      mgsearch = new mgsearchclass();
390 
391      // add a query filter
392      mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
393      queryfilter->set_db_ptr(db_ptr);
394      queryfilter->set_textsearchptr (mgsearch);
395      cserver->add_filter (queryfilter);
396   
397      // add a mg source
398      mgsourceclass *mgsource = new mgsourceclass ();
399      mgsource->set_db_ptr(db_ptr);
400      mgsource->set_textsearchptr (mgsearch);
401      cserver->add_source (mgsource);
402#else
403      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
404#endif
405    }
406    else if (buildtype == "mgpp") {
407#ifdef ENABLE_MGPP
408      mgppsearch = new mgppsearchclass();
409
410      // add a query filter
411      mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
412      queryfilter->set_db_ptr(db_ptr);
413      queryfilter->set_textsearchptr (mgppsearch);
414      cserver->add_filter (queryfilter);
415     
416      // add a mgpp source
417      mgppsourceclass *mgppsource = new mgppsourceclass ();
418      mgppsource->set_db_ptr(db_ptr);
419      mgppsource->set_textsearchptr (mgppsearch);
420      cserver->add_source (mgppsource);
421#else
422      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
423#endif
424    }
425    else if (buildtype == "lucene") {
426#ifdef ENABLE_LUCENE
427      lucenesearch = new lucenesearchclass();
428      lucenesearch->set_gsdlhome(gsdlhome);
429
430      // add a query filter
431      lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
432      queryfilter->set_db_ptr(db_ptr);
433      queryfilter->set_textsearchptr (lucenesearch);
434      cserver->add_filter (queryfilter);
435     
436      // add a lucene source
437      lucenesourceclass *lucenesource = new lucenesourceclass ();
438      lucenesource->set_db_ptr(db_ptr);
439      lucenesource->set_textsearchptr (lucenesearch);
440      cserver->add_source (lucenesource);
441#else
442      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
443#endif
444    }
445    else {
446      cerr << "Warning: unrecognized buildtype " << buildtype << endl;
447    }
448
449  }
450  else {
451    // see if it is a collectgroup col
452    text_t this_collect_dir = filename_cat(collecthome, collection);
453    if (collection_is_collect_group(this_collect_dir)) {
454    // by this point we know we will need a cserver
455    cserver = new collectserver();
456    }
457    // else not a collect group, or there was no collect.cfg
458    // => leave cserver as NULL so it will not be added into cservers
459  }
460
461  if (cserver != NULL) {
462    // inform collection server and everything it contains about its
463    // collection name
464    cserver->configure ("collection", collection);
465    cserver->configure ("gsdlhome", gsdlhome);
466    cserver->configure ("collecthome", collecthome);
467    cservers.addcollectserver (cserver);
468  }
469}
470
471void collectset::remove_all_collections () {
472
473#ifdef ENABLE_MG
474  // first unload any cached mg databases
475  if (mgsearch != NULL) {
476    mgsearch->unload_database();
477  }
478#endif
479
480  // now delete the collection server objects
481  collectservermapclass::iterator here = cservers.begin();
482  collectservermapclass::iterator end = cservers.end();
483
484  while (here != end) {
485    if ((*here).second.c != NULL) {
486      delete (*here).second.c;
487    }
488    ++here;
489  }
490  cservers.clear();
491}
492
493void collectset::add_collection_group(const text_t& collection,
494                      const text_t& gsdlhome,
495                      const text_t& collecthome)
496{
497  text_tarray group;
498
499  text_t collect_group_dir = filename_cat (collecthome, collection);
500
501  // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
502  if (collection_is_collect_group(collect_group_dir)) {
503    if (read_dir (collect_group_dir, group)) {
504     
505      text_tarray::const_iterator thiscol = group.begin();
506      text_tarray::const_iterator endcol = group.end();
507     
508      while (thiscol != endcol) {
509    // ignore the etc directory
510    if (*thiscol == "etc") {
511      ++thiscol;
512      continue;
513    }
514   
515    //text_t group_col = filename_cat(collection,*thiscol);
516    // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted  to \ on windows
517    text_t group_col = collection + "/" + *thiscol;
518    this->add_collection (group_col, gsdlhome, collecthome);
519   
520    ++thiscol;
521      }
522    }
523  }
524}
525
526void collectset::add_all_collection_groups (const text_t& gsdlhome,
527                        const text_t& collecthome)
528
529{
530  collectservermapclass::iterator here = cservers.begin();
531  collectservermapclass::iterator end = cservers.end();
532 
533  while (here != end) {
534    text_t collection = (*here).second.c->get_collection_name();
535    this->add_collection_group(collection,gsdlhome,collecthome);
536
537    ++here;
538  }
539}
540
541
542// remove_collection deletes the collection server of collection.
543// This only needs to be called if a collectionserver is to be
544// removed while the library is running. The destructor function
545// cleans up all collectservers when the program exits.
546void collectset::remove_collection (const text_t &collection) {
547
548  // do nothing if no collection server exists for this collection
549  if (cservers.getcollectserver(collection) == NULL) return;
550
551#ifdef ENABLE_MG
552  // first unload any cached mg databases - we may need to do something
553  // similar to this for mgpp and lucene too
554  if (mgsearch != NULL) {
555    mgsearch->unload_database();
556  }
557#endif
558
559  // now delete the collection server object
560  collectservermapclass::iterator here = cservers.begin();
561  collectservermapclass::iterator end = cservers.end();
562
563  while (here != end) {
564    if ((*here).second.c != NULL && (*here).first == collection) {
565      delete (*here).second.c;
566      cservers.erase (here);
567      return;
568    }
569    ++here;
570  }
571}
572
573
574// remove_collection deletes the collection server of collection.
575// This only needs to be called if a collectionserver is to be
576// removed while the library is running. The destructor function
577// cleans up all collectservers when the program exits.
578void collectset::remove_collection (const text_t &collection, ostream &logout) {
579
580  remove_collection(collection);
581
582  outconvertclass text_t2ascii;
583  logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
584     << collection << "\n";
585}
586
587void collectset::configure(const text_t &key, const text_tarray &cfgline)
588{
589  if ((key == "collection") || (key == "collectdir")) return;
590
591  collectservermapclass::iterator here = cservers.begin();
592  collectservermapclass::iterator end = cservers.end();
593
594  while (here != end) {
595    assert ((*here).second.c != NULL);
596    if ((*here).second.c != NULL) {
597      if (key == "collectinfo") {
598    if ((*here).first == cfgline[0]) {
599      if (cfgline.size()==3) {
600        (*here).second.c->configure ("gsdlhome", cfgline[1]);
601        (*here).second.c->configure ("gdbmhome", cfgline[2]);
602      }
603      else {
604        (*here).second.c->configure ("gsdlhome", cfgline[1]);
605        (*here).second.c->configure ("collecthome", cfgline[2]);
606        (*here).second.c->configure ("gdbmhome", cfgline[3]);
607      }
608    }
609      } else {
610    (*here).second.c->configure (key, cfgline);
611      }
612    }
613
614    ++here;
615  }
616}
617
618void collectset::getCollectionList (text_tarray &collist)
619{
620  collist.erase(collist.begin(),collist.end());
621
622  collectservermapclass::iterator here = cservers.begin();
623  collectservermapclass::iterator end = cservers.end();
624  while (here != end) {
625    assert ((*here).second.c != NULL);
626    if ((*here).second.c != NULL) {
627      collist.push_back ((*here).second.c->get_collection_name());
628    }
629    ++here;
630  }
631}
632
Note: See TracBrowser for help on using the browser.