root/main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp @ 21324

Revision 21324, 18.3 KB (checked in by ak19, 11 years ago)

Changes to makefiles, configure files, and source code to work with the new configure flags that allow indexers to be individually compiled up by setting each indexer to be enabled or disabled (enable-mg, enable-mgpp, enable-lucene)

  • Property svn:keywords set to Author Date Id Revision
Line 
1 
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999  The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34  if (buildtype=="mg") {
35#ifndef ENABLE_MG
36    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38  }
39
40  else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44  }
45
46  else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50  }
51
52  else {
53    cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54  }
55
56}
57
58
59
60collectserver::collectserver ()
61  : collectinfo()
62{
63  configinfo.collection = "null";
64}
65
66collectserver::~collectserver () {
67
68  // clean up the sources
69  sourcelistclass::iterator source_here = sources.begin();
70  sourcelistclass::iterator source_end = sources.end();
71  while (source_here != source_end) {
72    if ((*source_here).s != NULL)
73      delete (*source_here).s;
74    ++source_here;
75  }
76  sources.clear();
77
78  // clean up the filters
79  filtermapclass::iterator filter_here = filters.begin();
80  filtermapclass::iterator filter_end = filters.end();
81  while (filter_here != filter_end) {
82    if ((*filter_here).second.f != NULL)
83      delete (*filter_here).second.f;
84    ++filter_here;
85  }
86  filters.clear();
87}
88
89// configure should be called for each line in the
90// configuration files to configure the collection server and everything
91// it contains. The configuration should take place just before initialisation
92void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
93  if (cfgline.size() >= 1) {
94    const text_t &value = cfgline[0];
95    if (key == "plugin")
96    {
97        //get the plugin name
98    const text_t &name = cfgline[0];
99   
100    if (name == "HTMLPlugin")
101    {
102        for (int hI = 1; hI < cfgline.size(); hI++)
103        {
104            const text_t &plugOption = cfgline[hI];
105           
106            if (plugOption == "-use_realistic_book")
107            {
108                collectinfo.useBook = true;
109                break;
110            }
111        }
112    }
113    }
114    else if (key == "gsdlhome") configinfo.gsdlhome = value;
115    else if (key == "gdbmhome") configinfo.dbhome = value;
116    else if (key == "collecthome") configinfo.collecthome = value;
117    else if (key == "collection") {
118      configinfo.collection = value;
119      collectinfo.shortInfo.name = value;
120    }
121    else if (key == "collectdir") configinfo.collectdir = value;
122    else if (key == "host") collectinfo.shortInfo.host = value;
123    else if (key == "port") collectinfo.shortInfo.port = value.getint();
124    else if (key == "public") {
125      if (value == "true") collectinfo.isPublic = true;
126      else collectinfo.isPublic = false;
127    } else if (key == "beta") {
128      if (value == "true") collectinfo.isBeta = true;
129      else collectinfo.isBeta = false;
130    } else if (key == "collectgroup") {
131      if (value == "true") collectinfo.isCollectGroup = true;
132      else collectinfo.isCollectGroup = false;
133    } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
134    else if (key == "supercollectionoptions") {
135      text_tarray::const_iterator begin = cfgline.begin();
136      text_tarray::const_iterator end = cfgline.end();
137      while(begin != end) {
138   
139    if (*begin == "uniform_search_results_formatting") {
140      collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
141    }
142    begin++;
143      }
144    }
145    else if (key == "builddate") collectinfo.buildDate = value.getint();
146    else if (key == "languages") collectinfo.languages = cfgline;
147    else if (key == "numdocs") collectinfo.numDocs = value.getint();
148    else if (key == "numsections") collectinfo.numSections = value.getint();
149    else if (key == "numwords") collectinfo.numWords = value.getint();
150    else if (key == "numbytes") collectinfo.numBytes = value.getint();
151    else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
152    else if (key == "collectionmeta") {
153      // genuine collmeta get added as collectionmeta and collection_macros
154      // .collmeta just get added as collection_macros
155      text_t params;
156      if (cfgline.size() == 3) {
157    // get the params for later
158    text_t::const_iterator first=cfgline[1].begin()+1;
159    text_t::const_iterator last=cfgline[1].end()-1;
160    params=substr(first, last);
161      }
162     
163      text_t meta_name = cfgline[0];
164      if (*(meta_name.begin())=='.') {
165    // a .xxx collectionmeta. strip off the . and
166    // look it up in the indexmap to get the actual value
167   
168    text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
169    text_t new_name;
170    if (indexmap.from2to(name, new_name)) {
171      meta_name = new_name;
172    }
173      } else {
174    // add them to collectionmeta
175    text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
176    if (cfgline.size() == 2) {
177      lang_map[g_EmptyText] = cfgline[1];
178    } else if (cfgline.size() == 3 ) {
179      // get the lang out of params
180      paramhashtype params_hash;
181      splitparams(params, params_hash);
182     
183      text_t lang = params_hash["l"];
184      lang_map[lang] = cfgline[2];
185      if (lang_map[g_EmptyText].empty()) {
186        // want the first one as the default if no default specified
187        lang_map[g_EmptyText] = cfgline[2];
188      }
189    }
190    collectinfo.collectionmeta[cfgline[0]] = lang_map;
191   
192      }
193     
194      // add all collectionmeta to macro list
195      text_tmap params_map = collectinfo.collection_macros[meta_name];
196     
197      if (cfgline.size() == 2) {// no params for this macro
198    params_map[g_EmptyText] = cfgline[1];
199      }
200      else if (cfgline.size() == 3) {// has params
201    params_map[params] = cfgline[2];
202    if (params_map[g_EmptyText].empty()) {
203      params_map[g_EmptyText] = cfgline[2];
204    }
205      }
206      collectinfo.collection_macros[meta_name] = params_map;
207    }
208    else if (key == "collectionmacro") {
209      text_t nobrackets;
210      text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
211      // add all to macro list
212      if (cfgline.size() == 2) { // no params for this macro
213    params_map[g_EmptyText] = cfgline[1];
214      }
215      else if (cfgline.size() == 3) {// has params
216    // strip [ ] brackets from params
217    text_t::const_iterator first=cfgline[1].begin()+1;
218    text_t::const_iterator last=cfgline[1].end()-1;
219    nobrackets=substr(first, last);
220    params_map[nobrackets] = cfgline[2];
221      }
222      collectinfo.collection_macros[cfgline[0]] = params_map;
223     
224    } else if (key == "format" && cfgline.size() == 2)
225      collectinfo.format[cfgline[0]] = cfgline[1];
226    else if (key == "building" && cfgline.size() == 2)
227      collectinfo.building[cfgline[0]] = cfgline[1];
228    else if (key == "httpdomain") collectinfo.httpdomain = value;
229    else if (key == "httpprefix") collectinfo.httpprefix = value;
230    else if (key == "receptionist") collectinfo.receptionist = value;
231    else if (key == "buildtype") {
232      check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
233      collectinfo.buildType = value;
234    }
235    // backwards compatibility - searchytpes is now a format statement
236    else if (key == "searchtype") { // means buildtype is mgpp
237      if (collectinfo.buildType.empty()) {
238    check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
239    collectinfo.buildType = "mgpp";
240      }
241      joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
242      //collectinfo.searchTypes = cfgline;
243    }
244    else if (key == "separate_cjk") {
245      if (value == "true") collectinfo.isSegmented = true;
246      else collectinfo.isSegmented = false;
247    }
248    // What have we set in our collect.cfg file :  document or collection ?
249    else if (key == "authenticate") collectinfo.authenticate = value;
250
251    // What have we set for our group list
252    else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
253
254    // store all the mappings for use when collection meta is read later
255    // (build.cfg read before collect.cfg)
256    else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
257      indexmap.importmap (cfgline, true);
258     
259    }
260    // In the map the key-value pair contain the same
261    // data i.e key == data, if key is 2 then data is 2
262   
263    // What have we set for our public_documents ACL
264    else if (key == "public_documents")
265       {
266      text_tarray::const_iterator begin = cfgline.begin();
267      text_tarray::const_iterator end = cfgline.end();
268      while(begin != end)
269         {
270        // key = data i.e if key is 2 then data is 2
271        // collectinfo.public_documents[*begin] is the key
272        // *begin is the data value
273
274        collectinfo.public_documents[*begin] = *begin;
275        ++begin;
276         }
277       }
278   
279    // What have we set for our private_documents ACL
280    else if (key == "private_documents")
281       {
282      text_tarray::const_iterator begin = cfgline.begin();
283      text_tarray::const_iterator end = cfgline.end();
284      while(begin != end)
285         {
286        // key = data i.e if key is 2 then data is 2
287        // collectinfo.public_documents[*begin] is the key
288        // *begin is the data value
289       
290        collectinfo.private_documents[*begin] = *begin;
291        ++begin;
292         }
293       }
294
295    // dynamic_classifier <UniqueID> "<Options>"
296    else if (key == "dynamic_classifier")
297    {
298      collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
299    }
300  }
301 
302  // configure the filters
303  filtermapclass::iterator filter_here = filters.begin();
304  filtermapclass::iterator filter_end = filters.end();
305  while (filter_here != filter_end) {
306    assert ((*filter_here).second.f != NULL);
307    if ((*filter_here).second.f != NULL)
308      (*filter_here).second.f->configure(key, cfgline);
309
310    ++filter_here;
311  }
312
313  // configure the sources
314  sourcelistclass::iterator source_here = sources.begin();
315  sourcelistclass::iterator source_end = sources.end();
316  while (source_here != source_end) {
317    assert ((*source_here).s != NULL);
318    if ((*source_here).s != NULL)
319      (*source_here).s->configure(key, cfgline);
320   
321    ++source_here;
322  }
323}
324
325
326void collectserver::configure (const text_t &key, const text_t &value) {
327  text_tarray cfgline;
328  cfgline.push_back (value);
329  configure(key, cfgline);
330}
331
332void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
333  // if we've not been properly configured, then it is a foregone
334  // conclusion that we cannot be active
335  if (this->configinfo.collection == "null")
336    {
337      wasSuccess = false;
338    }
339  // if no build date exists, then the collection was probably not built;
340  // ditto if the number of documents is zero, then something is pretty
341  // wrong
342  else if (this->collectinfo.buildDate == 0 ||
343      this->collectinfo.numDocs == 0)
344    {
345      wasSuccess =  false;
346    }
347  // it is probably okay
348  else
349    wasSuccess = true;
350}
351
352
353bool collectserver::init (ostream &logout) {
354  // delete the indexmap
355  indexmap.clear();
356
357  // init the filters
358  filtermapclass::iterator filter_here = filters.begin();
359  filtermapclass::iterator filter_end = filters.end();
360  while (filter_here != filter_end) {
361    assert ((*filter_here).second.f != NULL);
362    if (((*filter_here).second.f != NULL) &&
363    !(*filter_here).second.f->init(logout)) return false;
364   
365    ++filter_here;
366  }
367
368  // init the sources
369  sourcelistclass::iterator source_here = sources.begin();
370  sourcelistclass::iterator source_end = sources.end();
371  while (source_here != source_end) {
372    assert ((*source_here).s != NULL);
373    if (((*source_here).s != NULL) &&
374    !(*source_here).s->init(logout)) return false;
375   
376    ++source_here;
377  }
378
379  return true;
380}
381
382
383void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
384              comerror_t &err, ostream &/*logout*/) {
385  reponse = collectinfo;
386  err = noError;
387}
388
389void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
390                    comerror_t &err, ostream &/*logout*/) {
391  response.clear ();
392
393  // get a list of filter names
394  filtermapclass::iterator filter_here = filters.begin();
395  filtermapclass::iterator filter_end = filters.end();
396  while (filter_here != filter_end) {
397    response.filterNames.insert ((*filter_here).first);
398    ++filter_here;
399  }
400
401  err = noError;
402}
403
404void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
405                       InfoFilterOptionsResponse_t &response,
406                       comerror_t &err, ostream &logout) {
407  outconvertclass text_t2ascii;
408
409  filterclass *thisfilter = filters.getfilter(request.filterName);
410  if (thisfilter != NULL) {
411    thisfilter->get_filteroptions (response, err, logout);
412  } else {
413    response.clear ();
414    err = protocolError;
415    logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
416       << "filter \"" << request.filterName << "\".\n\n";
417  }
418}
419
420void collectserver::filter (FilterRequest_t &request,
421                FilterResponse_t &response,
422                comerror_t &err, ostream &logout) {
423  outconvertclass text_t2ascii;
424
425  // translate any ".fc", ".pr" etc. stuff in the docSet
426  text_t translatedOID;
427  text_tarray translatedOIDs;
428  text_tarray::iterator doc_here = request.docSet.begin();
429  text_tarray::iterator doc_end = request.docSet.end();
430  while (doc_here != doc_end) {
431    if (needs_translating (*doc_here)) {
432      sourcelistclass::iterator source_here = sources.begin();
433      sourcelistclass::iterator source_end = sources.end();
434      while (source_here != source_end) {
435    assert ((*source_here).s != NULL);
436    if (((*source_here).s != NULL) &&
437        ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
438      if (err != noError) return;
439      break;
440    }
441    ++source_here;
442      }
443      translatedOIDs.push_back (translatedOID);
444    } else {
445      translatedOIDs.push_back (*doc_here);
446    }
447    ++doc_here;
448  }
449  request.docSet = translatedOIDs;
450
451  response.clear();
452
453  filterclass *thisfilter = filters.getfilter(request.filterName);
454  if (thisfilter != NULL) {
455    // filter the data
456    thisfilter->filter (request, response, err, logout);
457    if (err != noError) return;
458    // fill in the metadata for each of the OIDs (if it is requested)
459    if (request.filterResultOptions & FRmetadata) {
460      bool processed = false;
461      ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
462      ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
463      while (resultdoc_here != resultdoc_end) {
464    // try each of the sources in turn
465    sourcelistclass::iterator source_here = sources.begin();
466    sourcelistclass::iterator source_end = sources.end();
467    while (source_here != source_end) {
468      assert ((*source_here).s != NULL);
469      if (((*source_here).s != NULL) &&
470          ((*source_here).s->get_metadata(request.requestParams, request.refParams,
471                          request.getParents, request.fields,
472                          (*resultdoc_here).OID, (*resultdoc_here).metadata,
473                          err, logout))) {
474        if (err != noError) return;
475        processed = true;
476        break;
477      }
478      ++source_here;
479    }
480    if (!processed) {
481
482      logout << text_t2ascii << "Protocol Error: nothing processed for "
483       << "filter \"" << request.filterName << "\".\n\n";
484
485      err = protocolError;
486      return;
487    }
488    ++resultdoc_here;
489      }
490    } 
491 
492    err = noError;
493  }
494  else
495  {
496    response.clear ();
497    err = protocolError;
498    logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
499       << "filter \"" << request.filterName << "\".\n\n";
500  }
501}
502
503void collectserver::get_document (const DocumentRequest_t &request,
504                  DocumentResponse_t &response,
505                  comerror_t &err, ostream &logout) {
506
507  sourcelistclass::iterator source_here = sources.begin();
508  sourcelistclass::iterator source_end = sources.end();
509  while (source_here != source_end) {
510    assert ((*source_here).s != NULL);
511    if (((*source_here).s != NULL) &&
512    ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
513      if (err != noError) return;
514      break;
515    }
516    ++source_here;
517  }
518}
519
520void collectserver::is_searchable (bool &issearchable, comerror_t &err,
521                   ostream &logout) {
522
523  sourcelistclass::iterator source_here = sources.begin();
524  sourcelistclass::iterator source_end = sources.end();
525  while (source_here != source_end) {
526    assert ((*source_here).s != NULL);
527    if (((*source_here).s != NULL) &&
528    ((*source_here).s->is_searchable (issearchable, err, logout))) {
529      if (err != noError) return;
530      break;
531    }
532    ++source_here;
533  }
534}
535
536
537bool operator==(const collectserverptr &x, const collectserverptr &y) {
538  return (x.c == y.c);
539}
540
541bool operator<(const collectserverptr &x, const collectserverptr &y) {
542  return (x.c < y.c);
543}
544
545
546// thecollectserver remains the property of the calling code but
547// should not be deleted until it is removed from this list.
548void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
549  // can't add a null collection server
550  assert (thecollectserver != NULL);
551  if (thecollectserver == NULL) return;
552 
553  // can't add an collection server with no collection name
554  assert (!(thecollectserver->get_collection_name()).empty());
555  if ((thecollectserver->get_collection_name()).empty()) return;
556
557  collectserverptr cptr;
558  cptr.c = thecollectserver;
559  collectserverptrs[thecollectserver->get_collection_name()] = cptr;
560}
561
562// getcollectserver will return NULL if the collectserver could not be found
563collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
564  // can't find a collection with no name
565  if (collection.empty()) return NULL;
566
567  iterator here = collectserverptrs.find (collection);
568  if (here == collectserverptrs.end()) return NULL;
569 
570  return (*here).second.c;
571}
Note: See TracBrowser for help on using the browser.