[1860] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * collectset.cpp --
|
---|
| 4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 5 | *
|
---|
| 6 | * A component of the Greenstone digital library software
|
---|
| 7 | * from the New Zealand Digital Library Project at the
|
---|
| 8 | * University of Waikato, New Zealand.
|
---|
| 9 | *
|
---|
| 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
| 24 | *********************************************************************/
|
---|
| 25 |
|
---|
| 26 |
|
---|
[15402] | 27 | #include "collectset.h"
|
---|
[1860] | 28 | #include "collectserver.h"
|
---|
[15402] | 29 | #include "colservrconfig.h"
|
---|
| 30 | #include "gsdlsitecfg.h"
|
---|
[15584] | 31 | #include "gdbmclass.h"
|
---|
[1860] | 32 | #include "filter.h"
|
---|
| 33 | #include "browsefilter.h"
|
---|
| 34 | #include "queryfilter.h"
|
---|
| 35 | #include "mgqueryfilter.h"
|
---|
| 36 | #include "mgppqueryfilter.h"
|
---|
[8032] | 37 | #include "mggdbmsource.h"
|
---|
[8028] | 38 | #include "lucenequeryfilter.h"
|
---|
| 39 | #include "lucenegdbmsource.h"
|
---|
[8032] | 40 |
|
---|
[1860] | 41 | #include "fileutil.h"
|
---|
| 42 | #include <assert.h>
|
---|
| 43 |
|
---|
| 44 |
|
---|
| 45 | collectset::collectset (text_t &gsdlhome) {
|
---|
| 46 |
|
---|
| 47 | text_tarray collections;
|
---|
| 48 | text_t collectdir;
|
---|
| 49 |
|
---|
| 50 | // get gsdlhome (if we fail the error will be picked up later -- in
|
---|
| 51 | // cgiwrapper)
|
---|
[15002] | 52 |
|
---|
[1860] | 53 | if (site_cfg_read (gsdlhome, httpdomain, httpprefix)) {
|
---|
[7302] | 54 | if (!gsdlhome.empty() && directory_exists(gsdlhome)) {
|
---|
[2344] | 55 | collectdir = filename_cat (gsdlhome, "collect");
|
---|
| 56 | if (read_dir (collectdir, collections)) {
|
---|
[1860] | 57 |
|
---|
[2344] | 58 | text_tarray::const_iterator thiscol = collections.begin();
|
---|
| 59 | text_tarray::const_iterator endcol = collections.end();
|
---|
[1860] | 60 |
|
---|
[2344] | 61 | while (thiscol != endcol) {
|
---|
| 62 | // ignore the modelcol
|
---|
| 63 | if (*thiscol == "modelcol") {
|
---|
[9620] | 64 | ++thiscol;
|
---|
[2344] | 65 | continue;
|
---|
| 66 | }
|
---|
[1860] | 67 |
|
---|
[15002] | 68 | this->add_collection (*thiscol, gsdlhome);
|
---|
[1860] | 69 |
|
---|
[9620] | 70 | ++thiscol;
|
---|
[2344] | 71 | }
|
---|
[15002] | 72 |
|
---|
| 73 | this->add_all_collection_groups(gsdlhome);
|
---|
[2344] | 74 | }
|
---|
| 75 | }
|
---|
[1860] | 76 | }
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | collectset::~collectset () {
|
---|
| 80 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 81 | collectservermapclass::iterator end = cservers.end();
|
---|
| 82 |
|
---|
| 83 | while (here != end) {
|
---|
| 84 | if ((*here).second.c != NULL) {
|
---|
| 85 | delete (*here).second.c;
|
---|
| 86 | }
|
---|
[9620] | 87 | ++here;
|
---|
[1860] | 88 | }
|
---|
| 89 | cservers.clear();
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | bool collectset::init (ostream &logout) {
|
---|
| 93 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 94 | collectservermapclass::iterator end = cservers.end();
|
---|
| 95 |
|
---|
| 96 | while (here != end) {
|
---|
| 97 | assert ((*here).second.c != NULL);
|
---|
| 98 | if ((*here).second.c != NULL) {
|
---|
| 99 | const colservrconf &configinfo = (*here).second.c->get_configinfo ();
|
---|
| 100 |
|
---|
| 101 | // configure this collection server
|
---|
| 102 |
|
---|
| 103 | // note that we read build.cfg before collect.cfg so that the indexmaps
|
---|
| 104 | // are available to decode defaultindex, defaultsubcollection, and
|
---|
| 105 | // defaultlanguage
|
---|
[15002] | 106 |
|
---|
| 107 | bool failed_build_cfg = false;
|
---|
[1860] | 108 | if (!build_cfg_read (*((*here).second.c), configinfo.gsdlhome,
|
---|
| 109 | configinfo.collection)) {
|
---|
[15002] | 110 | failed_build_cfg = true;
|
---|
| 111 |
|
---|
[1860] | 112 | outconvertclass text_t2ascii;
|
---|
| 113 | logout << text_t2ascii
|
---|
| 114 | << "Warning: couldn't read build.cfg file for collection \"" //****
|
---|
| 115 | << configinfo.collection << "\", gsdlhome=\""
|
---|
| 116 | << configinfo.gsdlhome << "\"\n";
|
---|
| 117 | }
|
---|
| 118 |
|
---|
[15002] | 119 | bool failed_collect_cfg = false;
|
---|
[1860] | 120 | if (!collect_cfg_read (*((*here).second.c), configinfo.gsdlhome,
|
---|
| 121 | configinfo.collection)) {
|
---|
[15002] | 122 | failed_collect_cfg = true;
|
---|
[1860] | 123 | outconvertclass text_t2ascii;
|
---|
| 124 | logout << text_t2ascii
|
---|
| 125 | << "Warning: couldn't read collect.cfg file for collection \""
|
---|
| 126 | << configinfo.collection << "\", gsdlhome=\""
|
---|
| 127 | << configinfo.gsdlhome << "\"\n";
|
---|
[15002] | 128 | }
|
---|
| 129 |
|
---|
| 130 |
|
---|
| 131 | bool is_colgroup = (*here).second.c->is_collection_group();
|
---|
| 132 |
|
---|
| 133 | if (failed_collect_cfg) {
|
---|
[9620] | 134 | ++here;
|
---|
[1860] | 135 | continue;
|
---|
| 136 | }
|
---|
| 137 |
|
---|
[15002] | 138 | if (failed_build_cfg && (!is_colgroup)) {
|
---|
| 139 | ++here;
|
---|
| 140 | continue;
|
---|
| 141 | }
|
---|
| 142 | // let a failed build.cfg through if its 'collect.cfg' marks it as 'collectgroup true'
|
---|
| 143 |
|
---|
[1860] | 144 | if (!(*here).second.c->init (logout)) return false;
|
---|
| 145 |
|
---|
| 146 | (*here).second.c->configure("httpdomain",httpdomain);
|
---|
| 147 | (*here).second.c->configure("httpprefix",httpprefix);
|
---|
| 148 | }
|
---|
[9620] | 149 | ++here;
|
---|
[1860] | 150 | }
|
---|
| 151 |
|
---|
| 152 | return true;
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | collectservermapclass collectset::servers()
|
---|
| 156 | { return cservers;
|
---|
| 157 | }
|
---|
| 158 |
|
---|
[9030] | 159 |
|
---|
| 160 | void collectset::add_all_collections(const text_t &gsdlhome) {
|
---|
| 161 |
|
---|
| 162 | text_tarray collections;
|
---|
| 163 | text_t collectdir = filename_cat(gsdlhome, "collect");
|
---|
| 164 | if (read_dir(collectdir, collections)) {
|
---|
| 165 |
|
---|
| 166 | text_tarray::const_iterator thiscol = collections.begin();
|
---|
| 167 | text_tarray::const_iterator endcol = collections.end();
|
---|
| 168 |
|
---|
| 169 | while (thiscol != endcol) {
|
---|
| 170 |
|
---|
| 171 | // ignore the modelcol
|
---|
| 172 | if (*thiscol == "modelcol") {
|
---|
[9620] | 173 | ++thiscol;
|
---|
[9030] | 174 | continue;
|
---|
| 175 | }
|
---|
| 176 |
|
---|
[15421] | 177 | // create collection server for this collection
|
---|
[9030] | 178 | this->add_collection (*thiscol, gsdlhome);
|
---|
| 179 |
|
---|
[9620] | 180 | ++thiscol;
|
---|
[9030] | 181 | }
|
---|
[15002] | 182 |
|
---|
| 183 | this->add_all_collection_groups(gsdlhome);
|
---|
[9030] | 184 | }
|
---|
| 185 | }
|
---|
| 186 |
|
---|
[1860] | 187 | // add_collection sets up the collectionserver and calls
|
---|
| 188 | // add_collectserver
|
---|
[9030] | 189 | void collectset::add_collection (const text_t &collection,
|
---|
| 190 | const text_t &gsdlhome) {
|
---|
[4340] | 191 |
|
---|
[15002] | 192 | this->remove_collection(collection);
|
---|
| 193 |
|
---|
[8028] | 194 | // read config file to see if built with mg, mgpp, or lucene
|
---|
[1860] | 195 | text_t buildtype = "mg"; // mg is default
|
---|
[2545] | 196 |
|
---|
[1860] | 197 | text_tarray cfgline;
|
---|
| 198 | text_t key;
|
---|
| 199 | text_t build_cfg = filename_cat(gsdlhome, "collect", collection, "index", "build.cfg");
|
---|
| 200 | char *build_cfgc = build_cfg.getcstr();
|
---|
| 201 | ifstream confin(build_cfgc);
|
---|
| 202 |
|
---|
| 203 | if (confin) {
|
---|
| 204 | while (read_cfg_line(confin, cfgline) >= 0) {
|
---|
| 205 | if (cfgline.size() == 2) {
|
---|
| 206 | key = cfgline[0];
|
---|
| 207 | cfgline.erase(cfgline.begin());
|
---|
| 208 | if (key == "buildtype") {
|
---|
| 209 | buildtype = cfgline[0];
|
---|
| 210 | break;
|
---|
| 211 | }
|
---|
| 212 | }
|
---|
| 213 | }
|
---|
| 214 | confin.close();
|
---|
| 215 | }
|
---|
[9631] | 216 | delete []build_cfgc;
|
---|
[1860] | 217 |
|
---|
| 218 | collectserver *cserver = new collectserver();
|
---|
| 219 | gdbmclass *gdbmhandler = new gdbmclass();
|
---|
| 220 |
|
---|
| 221 | // add a null filter
|
---|
| 222 | filterclass *filter = new filterclass ();
|
---|
| 223 | cserver->add_filter (filter);
|
---|
| 224 |
|
---|
| 225 | // add a browse filter
|
---|
| 226 | browsefilterclass *browsefilter = new browsefilterclass();
|
---|
[15558] | 227 | browsefilter->set_db_ptr(gdbmhandler);
|
---|
[1860] | 228 |
|
---|
| 229 | cserver->add_filter (browsefilter);
|
---|
| 230 |
|
---|
| 231 | if (buildtype == "mg") {
|
---|
[2545] | 232 | mgsearch = new mgsearchclass();
|
---|
[1860] | 233 |
|
---|
| 234 | // add a query filter
|
---|
| 235 | mgqueryfilterclass *queryfilter = new mgqueryfilterclass();
|
---|
[15558] | 236 | queryfilter->set_db_ptr(gdbmhandler);
|
---|
[8028] | 237 | queryfilter->set_textsearchptr (mgsearch);
|
---|
[1860] | 238 | cserver->add_filter (queryfilter);
|
---|
| 239 |
|
---|
| 240 | // add a mg and gdbm source
|
---|
| 241 | mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
|
---|
[15584] | 242 | mggdbmsource->set_db_ptr (gdbmhandler);
|
---|
[8028] | 243 | mggdbmsource->set_textsearchptr (mgsearch);
|
---|
[1860] | 244 | cserver->add_source (mggdbmsource);
|
---|
[12246] | 245 | }
|
---|
| 246 | else if (buildtype == "mgpp") {
|
---|
[2545] | 247 | mgppsearch = new mgppsearchclass();
|
---|
[1860] | 248 |
|
---|
| 249 | // add a query filter
|
---|
| 250 | mgppqueryfilterclass *queryfilter = new mgppqueryfilterclass();
|
---|
[15558] | 251 | queryfilter->set_db_ptr(gdbmhandler);
|
---|
[8028] | 252 | queryfilter->set_textsearchptr (mgppsearch);
|
---|
[1860] | 253 | cserver->add_filter (queryfilter);
|
---|
| 254 |
|
---|
| 255 | // add a mg and gdbm source
|
---|
| 256 | mggdbmsourceclass *mggdbmsource = new mggdbmsourceclass ();
|
---|
[15584] | 257 | mggdbmsource->set_db_ptr (gdbmhandler);
|
---|
[8028] | 258 | mggdbmsource->set_textsearchptr (mgppsearch);
|
---|
[1860] | 259 | cserver->add_source (mggdbmsource);
|
---|
[12246] | 260 | }
|
---|
| 261 | else if (buildtype == "lucene") {
|
---|
[8028] | 262 | lucenesearch = new lucenesearchclass();
|
---|
[12246] | 263 | lucenesearch->set_gsdlhome(gsdlhome);
|
---|
[8028] | 264 |
|
---|
| 265 | // add a query filter
|
---|
| 266 | lucenequeryfilterclass *queryfilter = new lucenequeryfilterclass();
|
---|
[15558] | 267 | queryfilter->set_db_ptr(gdbmhandler);
|
---|
[8028] | 268 | queryfilter->set_textsearchptr (lucenesearch);
|
---|
| 269 | cserver->add_filter (queryfilter);
|
---|
| 270 |
|
---|
| 271 | // add a lucene and gdbm source
|
---|
| 272 | lucenegdbmsourceclass *lucenegdbmsource = new lucenegdbmsourceclass ();
|
---|
[15584] | 273 | lucenegdbmsource->set_db_ptr (gdbmhandler);
|
---|
[8028] | 274 | lucenegdbmsource->set_textsearchptr (lucenesearch);
|
---|
| 275 | cserver->add_source (lucenegdbmsource);
|
---|
[1860] | 276 | }
|
---|
| 277 |
|
---|
| 278 | // inform collection server and everything it contains about its
|
---|
| 279 | // collection name
|
---|
| 280 | cserver->configure ("collection", collection);
|
---|
[4372] | 281 | cserver->configure ("gsdlhome", gsdlhome);
|
---|
[1860] | 282 |
|
---|
| 283 | cservers.addcollectserver (cserver);
|
---|
| 284 | }
|
---|
| 285 |
|
---|
[9030] | 286 | void collectset::remove_all_collections () {
|
---|
| 287 |
|
---|
| 288 | // first unload any cached mg databases
|
---|
| 289 | if (mgsearch != NULL) {
|
---|
| 290 | mgsearch->unload_database();
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | // now delete the collection server objects
|
---|
| 294 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 295 | collectservermapclass::iterator end = cservers.end();
|
---|
| 296 |
|
---|
| 297 | while (here != end) {
|
---|
| 298 | if ((*here).second.c != NULL) {
|
---|
| 299 | delete (*here).second.c;
|
---|
| 300 | }
|
---|
[9620] | 301 | ++here;
|
---|
[9030] | 302 | }
|
---|
| 303 | cservers.clear();
|
---|
| 304 | }
|
---|
| 305 |
|
---|
[15002] | 306 | void collectset::add_collection_group(const text_t& collection,
|
---|
| 307 | const text_t& gsdlhome)
|
---|
| 308 | {
|
---|
| 309 | text_tarray group;
|
---|
| 310 | text_t collect_group_dir = filename_cat (gsdlhome, "collect", collection);
|
---|
[9030] | 311 |
|
---|
[15002] | 312 | // need to read collect.cfg for 'collectgroup' as class hasn't been initialised through 'init' yet
|
---|
| 313 | text_t is_collect_group;
|
---|
| 314 | text_tarray cfgline;
|
---|
| 315 | text_t key;
|
---|
| 316 | text_t build_cfg = filename_cat(gsdlhome, "collect", collection, "etc", "collect.cfg");
|
---|
| 317 | char *collect_cfgc = build_cfg.getcstr();
|
---|
| 318 | ifstream confin(collect_cfgc);
|
---|
| 319 |
|
---|
| 320 | if (confin) {
|
---|
| 321 | while (read_cfg_line(confin, cfgline) >= 0) {
|
---|
| 322 | if (cfgline.size() == 2) {
|
---|
| 323 | key = cfgline[0];
|
---|
| 324 | cfgline.erase(cfgline.begin());
|
---|
| 325 | if (key == "collectgroup") {
|
---|
| 326 | is_collect_group = cfgline[0];
|
---|
| 327 | break;
|
---|
| 328 | }
|
---|
| 329 | }
|
---|
| 330 | }
|
---|
| 331 | confin.close();
|
---|
| 332 | }
|
---|
| 333 | delete []collect_cfgc;
|
---|
| 334 |
|
---|
| 335 | if (is_collect_group == "true") {
|
---|
| 336 |
|
---|
| 337 | if (read_dir (collect_group_dir, group)) {
|
---|
| 338 |
|
---|
| 339 | text_tarray::const_iterator thiscol = group.begin();
|
---|
| 340 | text_tarray::const_iterator endcol = group.end();
|
---|
| 341 |
|
---|
| 342 | while (thiscol != endcol) {
|
---|
| 343 | // ignore the modelcol
|
---|
| 344 | if (*thiscol == "etc") {
|
---|
| 345 | ++thiscol;
|
---|
| 346 | continue;
|
---|
| 347 | }
|
---|
| 348 |
|
---|
| 349 | this->add_collection (collection + "/" + *thiscol, gsdlhome);
|
---|
| 350 |
|
---|
| 351 | ++thiscol;
|
---|
| 352 | }
|
---|
| 353 | }
|
---|
| 354 | }
|
---|
| 355 | }
|
---|
| 356 |
|
---|
| 357 | void collectset::add_all_collection_groups (const text_t& gsdlhome)
|
---|
| 358 |
|
---|
| 359 | {
|
---|
| 360 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 361 | collectservermapclass::iterator end = cservers.end();
|
---|
| 362 |
|
---|
| 363 | while (here != end) {
|
---|
| 364 | text_t collection = (*here).second.c->get_collection_name();
|
---|
| 365 | this->add_collection_group(collection,gsdlhome);
|
---|
| 366 |
|
---|
| 367 | ++here;
|
---|
| 368 | }
|
---|
| 369 | }
|
---|
| 370 |
|
---|
| 371 |
|
---|
[1860] | 372 | // remove_collection deletes the collection server of collection.
|
---|
| 373 | // This only needs to be called if a collectionserver is to be
|
---|
| 374 | // removed while the library is running. The destructor function
|
---|
| 375 | // cleans up all collectservers when the program exits.
|
---|
[15002] | 376 | void collectset::remove_collection (const text_t &collection) {
|
---|
[2545] | 377 |
|
---|
[4340] | 378 | // do nothing if no collection server exists for this collection
|
---|
| 379 | if (cservers.getcollectserver(collection) == NULL) return;
|
---|
| 380 |
|
---|
[2545] | 381 | // first unload any cached mg databases - we may need to do something
|
---|
[8028] | 382 | // similar to this for mgpp and lucene too
|
---|
[2545] | 383 | if (mgsearch != NULL) {
|
---|
| 384 | mgsearch->unload_database();
|
---|
| 385 | }
|
---|
| 386 |
|
---|
| 387 | // now delete the collection server object
|
---|
[1860] | 388 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 389 | collectservermapclass::iterator end = cservers.end();
|
---|
| 390 |
|
---|
| 391 | while (here != end) {
|
---|
| 392 | if ((*here).second.c != NULL && (*here).first == collection) {
|
---|
| 393 | delete (*here).second.c;
|
---|
| 394 | cservers.erase (here);
|
---|
| 395 | return;
|
---|
| 396 | }
|
---|
[9620] | 397 | ++here;
|
---|
[1860] | 398 | }
|
---|
[15002] | 399 | }
|
---|
| 400 |
|
---|
| 401 |
|
---|
| 402 | // remove_collection deletes the collection server of collection.
|
---|
| 403 | // This only needs to be called if a collectionserver is to be
|
---|
| 404 | // removed while the library is running. The destructor function
|
---|
| 405 | // cleans up all collectservers when the program exits.
|
---|
| 406 | void collectset::remove_collection (const text_t &collection, ostream &logout) {
|
---|
| 407 |
|
---|
| 408 | remove_collection(collection);
|
---|
| 409 |
|
---|
[1860] | 410 | outconvertclass text_t2ascii;
|
---|
[15421] | 411 | logout << text_t2ascii << "collectset::remove_collection: failed to remove collectserver for "
|
---|
[1860] | 412 | << collection << "\n";
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | void collectset::configure(const text_t &key, const text_tarray &cfgline)
|
---|
| 416 | {
|
---|
| 417 | if (key == "collection" || key == "collectdir") return;
|
---|
| 418 |
|
---|
| 419 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 420 | collectservermapclass::iterator end = cservers.end();
|
---|
| 421 |
|
---|
| 422 | while (here != end) {
|
---|
| 423 | assert ((*here).second.c != NULL);
|
---|
| 424 | if ((*here).second.c != NULL) {
|
---|
| 425 | if (key == "collectinfo") {
|
---|
| 426 | if ((*here).first == cfgline[0]) {
|
---|
| 427 | (*here).second.c->configure ("gsdlhome", cfgline[1]);
|
---|
| 428 | (*here).second.c->configure ("gdbmhome", cfgline[2]);
|
---|
| 429 | }
|
---|
| 430 | } else {
|
---|
| 431 | (*here).second.c->configure (key, cfgline);
|
---|
| 432 | }
|
---|
| 433 | }
|
---|
| 434 |
|
---|
[9620] | 435 | ++here;
|
---|
[1860] | 436 | }
|
---|
| 437 | }
|
---|
| 438 |
|
---|
| 439 | void collectset::getCollectionList (text_tarray &collist)
|
---|
| 440 | {
|
---|
| 441 | collist.erase(collist.begin(),collist.end());
|
---|
| 442 |
|
---|
| 443 | collectservermapclass::iterator here = cservers.begin();
|
---|
| 444 | collectservermapclass::iterator end = cservers.end();
|
---|
| 445 | while (here != end) {
|
---|
| 446 | assert ((*here).second.c != NULL);
|
---|
| 447 | if ((*here).second.c != NULL) {
|
---|
| 448 | collist.push_back ((*here).second.c->get_collection_name());
|
---|
| 449 | }
|
---|
[9620] | 450 | ++here;
|
---|
[1860] | 451 | }
|
---|
| 452 | }
|
---|