root/main/trunk/greenstone2/runtime-src/src/colservr/collectset.cpp @ 22050

Revision 22050, 17.3 KB (checked in by davidb, 10 years ago)

Updating of code to support sql-query filter

  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * collectset.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "collectset.h"
28#include "collectserver.h"
29#include "colservrconfig.h"
30#include "gsdlsitecfg.h"
31#include "gsdltools.h"
32#include "fileutil.h"
33#include "filter.h"
34#include "browsefilter.h"
35#include "sqlbrowsefilter.h"
36#include "sqlqueryfilter.h"
37#include "queryfilter.h"
38
39#ifdef ENABLE_MG
40#include "mgqueryfilter.h"
41#include "mgsource.h"
42#endif
43#ifdef ENABLE_MGPP
44#include "mgppqueryfilter.h"
45#include "mgppsource.h"
46#endif
47#ifdef ENABLE_LUCENE
48#include "lucenequeryfilter.h"
49#include "lucenesource.h"
50#endif
51
52#include <assert.h>
53
54#ifdef USE_GDBM
55#include "gdbmclass.h"
56#endif
57
58#ifdef USE_JDBM
59#include "jdbmnaiveclass.h"
60#endif
61
62#ifdef USE_SQLITE
63#include "sqlitedbclass.h"
64#endif
65
66#ifdef USE_MSSQL
67#include "mssqldbclass.h"
68#endif
69
70
71collectset::collectset (text_t& gsdlhome, text_t& collecthome)
72{
73  // gsdlhome and collecthome will be set as a result of calling this function
74  // collecthome will default to "<gsdlhome>/collect" if not explicitly
75  // specified in config file
76
77  text_tarray collections;
78
79#ifdef ENABLE_MG
80  mgsearch = NULL;
81#endif
82#ifdef ENABLE_MGPP
83  mgppsearch = NULL;
84#endif
85#ifdef ENABLE_LUCENE
86  lucenesearch = NULL;
87#endif
88
89  // get gsdlhome (if we fail the error will be picked up later -- in
90  // cgiwrapper)
91
92  if (site_cfg_read (gsdlhome, collecthome, httpdomain, httpprefix)) {
93    if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
94      if (read_dir (collecthome, collections)) {
95   
96    text_tarray::const_iterator thiscol = collections.begin();
97    text_tarray::const_iterator endcol = collections.end();
98
99    while (thiscol != endcol) {
100      // ignore the modelcol
101      if (*thiscol == "modelcol") {
102        ++thiscol;
103        continue;
104      }
105
106      this->add_collection (*thiscol, gsdlhome, collecthome);
107
108      ++thiscol;
109    }
110
111    this->add_all_collection_groups(gsdlhome, collecthome);
112      }
113    }
114  }
115
116  set_gsdl_env_vars(gsdlhome);
117}
118
119
120collectset::collectset (text_t& httpprefix_arg)
121{
122  httpprefix = httpprefix_arg;
123
124#ifdef ENABLE_MG
125  mgsearch = NULL;
126#endif
127#ifdef ENABLE_MGPP
128  mgppsearch = NULL;
129#endif
130#ifdef ENABLE_LUCENE
131  lucenesearch = NULL;
132#endif
133
134}
135
136collectset::collectset ()
137{
138#ifdef ENABLE_MG
139  mgsearch = NULL;
140#endif
141#ifdef ENABLE_MGPP
142  mgppsearch = NULL;
143#endif
144#ifdef ENABLE_LUCENE
145  lucenesearch = NULL;
146#endif
147}
148
149collectset::~collectset () {
150  collectservermapclass::iterator here = cservers.begin();
151  collectservermapclass::iterator end = cservers.end();
152
153  while (here != end) {
154    if ((*here).second.c != NULL) {
155      delete (*here).second.c;
156    }
157    ++here;
158  }
159  cservers.clear();
160}
161
162bool collectset::init (ostream &logout) {
163  collectservermapclass::iterator here = cservers.begin();
164  collectservermapclass::iterator end = cservers.end();
165
166  while (here != end) {
167    assert ((*here).second.c != NULL);
168    if ((*here).second.c != NULL) {
169      const colservrconf &configinfo = (*here).second.c->get_configinfo ();
170
171      // configure this collection server
172
173      // note that we read build.cfg before collect.cfg so that the indexmaps
174      // are available to decode defaultindex, defaultsubcollection, and
175      // defaultlanguage
176
177      bool failed_build_cfg = false;
178      if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
179               configinfo.collecthome, configinfo.collection)) {
180    failed_build_cfg = true;
181
182    outconvertclass text_t2ascii;
183    logout << text_t2ascii
184           << "Warning: couldn't read build.cfg file for collection \""
185           << configinfo.collection << "\""
186           << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
187           << "  collecthome=\"" << configinfo.collecthome << "\"\n";
188      }
189
190      bool failed_collect_cfg = false;
191      if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
192                 configinfo.collecthome, configinfo.collection)) {
193    failed_collect_cfg = true;
194    outconvertclass text_t2ascii;
195    logout << text_t2ascii
196           << "Warning: couldn't read collect.cfg file for collection \""
197           << configinfo.collection << "\""
198           << "  gsdlhome=\"" << configinfo.gsdlhome << "\"\n"
199           << "  collecthome=\"" << configinfo.collecthome << "\"\n";
200      }
201
202     
203      bool is_colgroup = (*here).second.c->is_collection_group();
204
205      if (failed_collect_cfg) {
206    ++here;
207    continue;
208      }
209
210      if (failed_build_cfg && (!is_colgroup)) {
211    ++here;
212    continue;
213      }
214      // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
215
216      if (!(*here).second.c->init (logout)) return false;
217
218      (*here).second.c->configure("httpdomain",httpdomain);
219      (*here).second.c->configure("httpprefix",httpprefix);
220    }
221    ++here;
222  }
223
224  return true;
225}
226
227collectservermapclass collectset::servers()
228{ return cservers;
229}
230
231
232void collectset::add_all_collections(const text_t &gsdlhome,
233                     const text_t& collecthome)
234{
235  text_tarray collections;
236
237  if (read_dir(collecthome, collections)) {
238
239    text_tarray::const_iterator thiscol = collections.begin();
240    text_tarray::const_iterator endcol = collections.end();
241
242    while (thiscol != endcol) {
243   
244      // ignore the modelcol
245      if (*thiscol == "modelcol") {
246    ++thiscol;
247    continue;
248      }
249   
250      // create collection server for this collection
251      this->add_collection (*thiscol, gsdlhome, collecthome);
252   
253      ++thiscol;
254    }
255
256    this->add_all_collection_groups(gsdlhome,collecthome);
257  }
258}
259
260bool collectset::collection_is_collect_group (const text_t& collect_dir)
261{
262  text_t is_collect_group_str = "false";
263  text_t collect_cfg = filename_cat(collect_dir, "etc", "collect.cfg");
264
265  if (file_exists(collect_cfg)) {
266    char *collect_cfgc = collect_cfg.getcstr();
267    ifstream confin(collect_cfgc);
268
269    if (confin) {
270      text_tarray cfgline;
271
272      while (read_cfg_line(confin, cfgline) >= 0) {
273    if (cfgline.size() == 2) {
274      text_t key = cfgline[0];
275      cfgline.erase(cfgline.begin());
276      if (key == "collectgroup") {
277        is_collect_group_str = cfgline[0];
278        break;
279      }
280    }
281      }
282
283      confin.close();
284    }
285
286    delete []collect_cfgc;
287  }
288
289  bool is_collect_group = (is_collect_group_str == "true") ? true : false;
290
291  return is_collect_group;
292}
293
294
295// add_collection sets up the collectionserver and calls
296// add_collectserver
297void collectset::add_collection (const text_t& collection,
298                 const text_t& gsdlhome,
299                 const text_t& collecthome)
300{
301  // read config file to see if built with mg, mgpp, or lucene
302  text_t buildtype = "mg"; // mg is default
303  text_t infodbtype = "gdbm"; // gdbm is default
304
305  this->remove_collection(collection);
306 
307  collectserver *cserver = NULL;
308
309  text_t build_cfg = filename_cat(collecthome, collection, "index", "build.cfg");
310  if (file_exists (build_cfg)) {
311    char *build_cfgc = build_cfg.getcstr();
312    ifstream confin(build_cfgc);
313   
314    if (confin) {
315      text_tarray cfgline;
316
317      while (read_cfg_line(confin, cfgline) >= 0) {
318    if (cfgline.size() == 2) {
319      text_t key = cfgline[0];
320      cfgline.erase(cfgline.begin());
321      if (key == "buildtype") {
322        buildtype = cfgline[0];
323      }
324      if (key == "infodbtype") {
325        infodbtype = cfgline[0];
326      }
327    }
328      }
329      confin.close();
330    }
331    delete []build_cfgc;
332
333    cserver = new collectserver();
334
335    // Create a dbclass of the correct type
336    dbclass *db_ptr = NULL;
337
338    if (infodbtype == "sqlite")
339      {
340#ifdef USE_SQLITE
341    sqlitedbclass *sql_db_ptr = new sqlitedbclass();
342    db_ptr = sql_db_ptr;
343
344    // add a sql browse filter
345    sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
346    sqlbrowsefilter->set_sql_db_ptr(sql_db_ptr);
347    cserver->add_filter (sqlbrowsefilter); 
348
349    // add a sql query filter
350    sqlqueryfilterclass *sqlqueryfilter = new sqlqueryfilterclass();
351    sqlqueryfilter->set_sql_db_ptr(sql_db_ptr);
352    cserver->add_filter (sqlqueryfilter); 
353
354#else
355    cerr << "Warning: infodbtype of 'sqlite' was not compiled in to " << endl;
356    cerr << "         this installation of Greenstone";
357#endif
358      }
359 
360    if (infodbtype == "mssql")
361      {
362#ifdef USE_MSSQL
363    mssqldbclass *mssql_db_ptr = new mssqldbclass();
364    db_ptr = mssql_db_ptr;
365
366    // add a sql browse filter
367    sqlbrowsefilterclass *sqlbrowsefilter = new sqlbrowsefilterclass();
368    sqlbrowsefilter->set_sql_db_ptr(mssql_db_ptr);
369    cserver->add_filter (sqlbrowsefilter); 
370#else
371    cerr << "Warning: infodbtype of 'mssql' was not compiled in to " << endl;
372    cerr << "         this installation of Greenstone";
373#endif
374      }
375
376    if (infodbtype == "jdbm") {
377
378#ifdef USE_JDBM
379    jdbmnaiveclass *jdbm_db_ptr = new jdbmnaiveclass(gsdlhome);
380    db_ptr = jdbm_db_ptr;
381#else
382    cerr << "Warning: infodbtype of 'jdbm' was not compiled in to " << endl;
383    cerr << "         this installation of Greenstone";
384#endif
385    }
386
387    // Use GDBM if the infodb type is empty or not one of the values above
388    if (db_ptr == NULL) {
389#ifdef USE_GDBM
390      db_ptr = new gdbmclass();
391#else
392    cerr << "Warning: infodbtype of 'gdbm' was not compiled in to " << endl;
393    cerr << "         this installation of Greenstone";
394#endif
395    }
396
397    // add a null filter
398    filterclass *filter = new filterclass ();
399    cserver->add_filter (filter);
400 
401    // add a browse filter
402    browsefilterclass *browsefilter = new browsefilterclass();
403    browsefilter->set_db_ptr(db_ptr);
404    cserver->add_filter (browsefilter); 
405
406    if (buildtype == "mg") {
407#ifdef ENABLE_MG
408      mgsearch = new mgsearchclass();
409 
410      // add a query filter
411      mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
412      queryfilter->set_db_ptr(db_ptr);
413      queryfilter->set_textsearchptr (mgsearch);
414      cserver->add_filter (queryfilter);
415   
416      // add a mg source
417      mgsourceclass *mgsource = new mgsourceclass ();
418      mgsource->set_db_ptr(db_ptr);
419      mgsource->set_textsearchptr (mgsearch);
420      cserver->add_source (mgsource);
421#else
422      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
423#endif
424    }
425    else if (buildtype == "mgpp") {
426#ifdef ENABLE_MGPP
427      mgppsearch = new mgppsearchclass();
428
429      // add a query filter
430      mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
431      queryfilter->set_db_ptr(db_ptr);
432      queryfilter->set_textsearchptr (mgppsearch);
433      cserver->add_filter (queryfilter);
434     
435      // add a mgpp source
436      mgppsourceclass *mgppsource = new mgppsourceclass ();
437      mgppsource->set_db_ptr(db_ptr);
438      mgppsource->set_textsearchptr (mgppsearch);
439      cserver->add_source (mgppsource);
440#else
441      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
442#endif
443    }
444    else if (buildtype == "lucene") {
445#ifdef ENABLE_LUCENE
446      lucenesearch = new lucenesearchclass();
447      lucenesearch->set_gsdlhome(gsdlhome);
448
449      // add a query filter
450      lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
451      queryfilter->set_db_ptr(db_ptr);
452      queryfilter->set_textsearchptr (lucenesearch);
453      cserver->add_filter (queryfilter);
454     
455      // add a lucene source
456      lucenesourceclass *lucenesource = new lucenesourceclass ();
457      lucenesource->set_db_ptr(db_ptr);
458      lucenesource->set_textsearchptr (lucenesearch);
459      cserver->add_source (lucenesource);
460#else
461      cerr << "Error: buildtype " << buildtype << " not enabled." << endl;
462#endif
463    }
464    else {
465      cerr << "Warning: unrecognized buildtype " << buildtype << endl;
466    }
467
468  }
469  else {
470    // see if it is a collectgroup col
471    text_t this_collect_dir = filename_cat(collecthome, collection);
472    if (collection_is_collect_group(this_collect_dir)) {
473    // by this point we know we will need a cserver
474    cserver = new collectserver();
475    }
476    // else not a collect group, or there was no collect.cfg
477    // => leave cserver as NULL so it will not be added into cservers
478  }
479
480  if (cserver != NULL) {
481    // inform collection server and everything it contains about its
482    // collection name
483    cserver->configure ("collection", collection);
484    cserver->configure ("gsdlhome", gsdlhome);
485    cserver->configure ("collecthome", collecthome);
486    cservers.addcollectserver (cserver);
487  }
488}
489
490void collectset::remove_all_collections () {
491
492#ifdef ENABLE_MG
493  // first unload any cached mg databases
494  if (mgsearch != NULL) {
495    mgsearch->unload_database();
496  }
497#endif
498
499  // now delete the collection server objects
500  collectservermapclass::iterator here = cservers.begin();
501  collectservermapclass::iterator end = cservers.end();
502
503  while (here != end) {
504    if ((*here).second.c != NULL) {
505      delete (*here).second.c;
506    }
507    ++here;
508  }
509  cservers.clear();
510}
511
512void collectset::add_collection_group(const text_t& collection,
513                      const text_t& gsdlhome,
514                      const text_t& collecthome)
515{
516  text_tarray group;
517
518  text_t collect_group_dir = filename_cat (collecthome, collection);
519
520  // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
521  if (collection_is_collect_group(collect_group_dir)) {
522    if (read_dir (collect_group_dir, group)) {
523     
524      text_tarray::const_iterator thiscol = group.begin();
525      text_tarray::const_iterator endcol = group.end();
526     
527      while (thiscol != endcol) {
528    // ignore the etc directory
529    if (*thiscol == "etc") {
530      ++thiscol;
531      continue;
532    }
533   
534    //text_t group_col = filename_cat(collection,*thiscol);
535    // later we check for / in the name. When this is used in a path (via fileanme_cat) the / will be converted  to \ on windows
536    text_t group_col = collection + "/" + *thiscol;
537    this->add_collection (group_col, gsdlhome, collecthome);
538   
539    ++thiscol;
540      }
541    }
542  }
543}
544
545void collectset::add_all_collection_groups (const text_t& gsdlhome,
546                        const text_t& collecthome)
547
548{
549  collectservermapclass::iterator here = cservers.begin();
550  collectservermapclass::iterator end = cservers.end();
551 
552  while (here != end) {
553    text_t collection = (*here).second.c->get_collection_name();
554    this->add_collection_group(collection,gsdlhome,collecthome);
555
556    ++here;
557  }
558}
559
560
561// remove_collection deletes the collection server of collection.
562// This only needs to be called if a collectionserver is to be
563// removed while the library is running. The destructor function
564// cleans up all collectservers when the program exits.
565void collectset::remove_collection (const text_t &collection) {
566
567  // do nothing if no collection server exists for this collection
568  if (cservers.getcollectserver(collection) == NULL) return;
569
570#ifdef ENABLE_MG
571  // first unload any cached mg databases - we may need to do something
572  // similar to this for mgpp and lucene too
573  if (mgsearch != NULL) {
574    mgsearch->unload_database();
575  }
576#endif
577
578  // now delete the collection server object
579  collectservermapclass::iterator here = cservers.begin();
580  collectservermapclass::iterator end = cservers.end();
581
582  while (here != end) {
583    if ((*here).second.c != NULL && (*here).first == collection) {
584      delete (*here).second.c;
585      cservers.erase (here);
586      return;
587    }
588    ++here;
589  }
590}
591
592
593// remove_collection deletes the collection server of collection.
594// This only needs to be called if a collectionserver is to be
595// removed while the library is running. The destructor function
596// cleans up all collectservers when the program exits.
597void collectset::remove_collection (const text_t &collection, ostream &logout) {
598
599  remove_collection(collection);
600
601  outconvertclass text_t2ascii;
602  logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
603     << collection << "\n";
604}
605
606void collectset::configure(const text_t &key, const text_tarray &cfgline)
607{
608  if ((key == "collection") || (key == "collectdir")) return;
609
610  collectservermapclass::iterator here = cservers.begin();
611  collectservermapclass::iterator end = cservers.end();
612
613  while (here != end) {
614    assert ((*here).second.c != NULL);
615    if ((*here).second.c != NULL) {
616      if (key == "collectinfo") {
617    if ((*here).first == cfgline[0]) {
618      if (cfgline.size()==3) {
619        (*here).second.c->configure ("gsdlhome", cfgline[1]);
620        (*here).second.c->configure ("gdbmhome", cfgline[2]);
621      }
622      else {
623        (*here).second.c->configure ("gsdlhome", cfgline[1]);
624        (*here).second.c->configure ("collecthome", cfgline[2]);
625        (*here).second.c->configure ("gdbmhome", cfgline[3]);
626      }
627    }
628      } else {
629    (*here).second.c->configure (key, cfgline);
630      }
631    }
632
633    ++here;
634  }
635}
636
637void collectset::getCollectionList (text_tarray &collist)
638{
639  collist.erase(collist.begin(),collist.end());
640
641  collectservermapclass::iterator here = cservers.begin();
642  collectservermapclass::iterator end = cservers.end();
643  while (here != end) {
644    assert ((*here).second.c != NULL);
645    if ((*here).second.c != NULL) {
646      collist.push_back ((*here).second.c->get_collection_name());
647    }
648    ++here;
649  }
650}
651
Note: See TracBrowser for help on using the browser.