root/main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp @ 24305

Revision 24305, 20.2 KB (checked in by ak19, 9 years ago)

The collectionmeta field related to titles in modelcol's collect.cfg now contains ex.dc.Title. For its display in the indexes drop-down of the search page to map to the titles display name, needed to adjust the lookup that runtime-src's collectserver does.

  • Property svn:keywords set to Author Date Id Revision
Line 
1 
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999  The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34  if (buildtype=="mg") {
35#ifndef ENABLE_MG
36    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38  }
39
40  else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44  }
45
46  else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50  }
51
52  else {
53    cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54  }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61  if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65  }
66  else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70  }
71  else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75  }
76  else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80  }
81  else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85  }
86
87  else {
88    cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89  }
90
91}
92
93
94
95collectserver::collectserver ()
96  : collectinfo()
97{
98  configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103  // clean up the sources
104  sourcelistclass::iterator source_here = sources.begin();
105  sourcelistclass::iterator source_end = sources.end();
106  while (source_here != source_end) {
107    if ((*source_here).s != NULL)
108      delete (*source_here).s;
109    ++source_here;
110  }
111  sources.clear();
112
113  // clean up the filters
114  filtermapclass::iterator filter_here = filters.begin();
115  filtermapclass::iterator filter_end = filters.end();
116  while (filter_here != filter_end) {
117    if ((*filter_here).second.f != NULL)
118      delete (*filter_here).second.f;
119    ++filter_here;
120  }
121  filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128  if (cfgline.size() >= 1) {
129    const text_t &value = cfgline[0];
130    if (key == "plugin")
131    {
132        //get the plugin name
133    const text_t &name = cfgline[0];
134   
135    if (name == "HTMLPlugin")
136    {
137        for (int hI = 1; hI < cfgline.size(); hI++)
138        {
139            const text_t &plugOption = cfgline[hI];
140           
141            if (plugOption == "-use_realistic_book")
142            {
143                collectinfo.useBook = true;
144                break;
145            }
146        }
147    }
148    }
149    else if (key == "gsdlhome") configinfo.gsdlhome = value;
150    else if (key == "gdbmhome") configinfo.dbhome = value;
151    else if (key == "collecthome") configinfo.collecthome = value;
152    else if (key == "collection") {
153      configinfo.collection = value;
154      collectinfo.shortInfo.name = value;
155    }
156    else if (key == "collectdir") configinfo.collectdir = value;
157    else if (key == "host") collectinfo.shortInfo.host = value;
158    else if (key == "port") collectinfo.shortInfo.port = value.getint();
159    else if (key == "public") {
160      if (value == "true") collectinfo.isPublic = true;
161      else collectinfo.isPublic = false;
162    } else if (key == "beta") {
163      if (value == "true") collectinfo.isBeta = true;
164      else collectinfo.isBeta = false;
165    } else if (key == "collectgroup") {
166      if (value == "true") collectinfo.isCollectGroup = true;
167      else collectinfo.isCollectGroup = false;
168    } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169    else if (key == "supercollectionoptions") {
170      text_tarray::const_iterator begin = cfgline.begin();
171      text_tarray::const_iterator end = cfgline.end();
172      while(begin != end) {
173   
174    if (*begin == "uniform_search_results_formatting") {
175      collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176    }
177    begin++;
178      }
179    }
180    else if (key == "builddate") collectinfo.buildDate = value.getint();
181    else if (key == "languages") collectinfo.languages = cfgline;
182    else if (key == "numdocs") collectinfo.numDocs = value.getint();
183    else if (key == "numsections") collectinfo.numSections = value.getint();
184    else if (key == "numwords") collectinfo.numWords = value.getint();
185    else if (key == "numbytes") collectinfo.numBytes = value.getint();
186    else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187    else if (key == "collectionmeta") {
188      // genuine collmeta get added as collectionmeta and collection_macros
189      // .collmeta just get added as collection_macros
190      text_t params;
191      if (cfgline.size() == 3) {
192    // get the params for later
193    text_t::const_iterator first=cfgline[1].begin()+1;
194    text_t::const_iterator last=cfgline[1].end()-1;
195    params=substr(first, last);
196      }
197     
198      text_t meta_name = cfgline[0];
199      if (*(meta_name.begin())=='.') {
200    // a .xxx collectionmeta. strip off the . and
201    // look it up in the indexmap to get the actual value
202   
203    text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204    text_t new_name;
205   
206    // make sure that any ex.XXX metadata in the string is turned into XXX for matching
207    name.replace("ex.","");
208
209    if (indexmap.from2to(name, new_name)) {
210      meta_name = new_name;
211    }
212      } else {
213    // add them to collectionmeta
214    text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
215    if (cfgline.size() == 2) {
216      lang_map[g_EmptyText] = cfgline[1];
217    } else if (cfgline.size() == 3 ) {
218      // get the lang out of params
219      paramhashtype params_hash;
220      splitparams(params, params_hash);
221     
222      text_t lang = params_hash["l"];
223      lang_map[lang] = cfgline[2];
224      if (lang_map[g_EmptyText].empty()) {
225        // want the first one as the default if no default specified
226        lang_map[g_EmptyText] = cfgline[2];
227      }
228    }
229    collectinfo.collectionmeta[cfgline[0]] = lang_map;
230   
231      }
232     
233      // add all collectionmeta to macro list
234      text_tmap params_map = collectinfo.collection_macros[meta_name];
235     
236      if (cfgline.size() == 2) {// no params for this macro
237    params_map[g_EmptyText] = cfgline[1];
238      }
239      else if (cfgline.size() == 3) {// has params
240    params_map[params] = cfgline[2];
241    if (params_map[g_EmptyText].empty()) {
242      params_map[g_EmptyText] = cfgline[2];
243    }
244      }
245      collectinfo.collection_macros[meta_name] = params_map;
246    }
247    else if (key == "collectionmacro") {
248      text_t nobrackets;
249      text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
250      // add all to macro list
251      if (cfgline.size() == 2) { // no params for this macro
252    params_map[g_EmptyText] = cfgline[1];
253      }
254      else if (cfgline.size() == 3) {// has params
255    // strip [ ] brackets from params
256    text_t::const_iterator first=cfgline[1].begin()+1;
257    text_t::const_iterator last=cfgline[1].end()-1;
258    nobrackets=substr(first, last);
259    params_map[nobrackets] = cfgline[2];
260      }
261      collectinfo.collection_macros[cfgline[0]] = params_map;
262     
263    } else if (key == "format" && cfgline.size() == 2)
264      collectinfo.format[cfgline[0]] = cfgline[1];
265    else if (key == "building" && cfgline.size() == 2)
266      collectinfo.building[cfgline[0]] = cfgline[1];
267    else if (key == "httpdomain") collectinfo.httpdomain = value;
268    else if (key == "httpprefix") collectinfo.httpprefix = value;
269    else if (key == "receptionist") collectinfo.receptionist = value;
270    else if (key == "buildtype") {
271      check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
272      collectinfo.buildType = value;
273    }
274    // backwards compatibility - searchytpes is now a format statement
275    else if (key == "searchtype") { // means buildtype is mgpp
276      if (collectinfo.buildType.empty()) {
277    check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
278    collectinfo.buildType = "mgpp";
279      }
280      joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
281      //collectinfo.searchTypes = cfgline;
282    }
283    else if (key == "infodbtype") {
284      check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
285      collectinfo.infodbType = value;
286    }
287    else if (key == "separate_cjk") {
288      if (value == "true") collectinfo.isSegmented = true;
289      else collectinfo.isSegmented = false;
290    }
291    // What have we set in our collect.cfg file :  document or collection ?
292    else if (key == "authenticate") collectinfo.authenticate = value;
293
294    // What have we set for our group list
295    else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
296
297    // build.cfg, earliestDatestamp of this collection needed for
298    // OAIServer to work out earliestDatestamp of this repository
299    else if (key == "earliestdatestamp") {
300        collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
301    }
302   
303    // store all the mappings for use when collection meta is read later
304    // (build.cfg read before collect.cfg)
305    else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
306      indexmap.importmap (cfgline, true);
307     
308    }
309    // In the map the key-value pair contain the same
310    // data i.e key == data, if key is 2 then data is 2
311   
312    // What have we set for our public_documents ACL
313    else if (key == "public_documents")
314       {
315      text_tarray::const_iterator begin = cfgline.begin();
316      text_tarray::const_iterator end = cfgline.end();
317      while(begin != end)
318         {
319        // key = data i.e if key is 2 then data is 2
320        // collectinfo.public_documents[*begin] is the key
321        // *begin is the data value
322
323        collectinfo.public_documents[*begin] = *begin;
324        ++begin;
325         }
326       }
327   
328    // What have we set for our private_documents ACL
329    else if (key == "private_documents")
330       {
331      text_tarray::const_iterator begin = cfgline.begin();
332      text_tarray::const_iterator end = cfgline.end();
333      while(begin != end)
334         {
335        // key = data i.e if key is 2 then data is 2
336        // collectinfo.public_documents[*begin] is the key
337        // *begin is the data value
338       
339        collectinfo.private_documents[*begin] = *begin;
340        ++begin;
341         }
342       }
343
344    // dynamic_classifier <UniqueID> "<Options>"
345    else if (key == "dynamic_classifier")
346    {
347      collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
348    }
349  }
350 
351  // configure the filters
352  filtermapclass::iterator filter_here = filters.begin();
353  filtermapclass::iterator filter_end = filters.end();
354  while (filter_here != filter_end) {
355    assert ((*filter_here).second.f != NULL);
356    if ((*filter_here).second.f != NULL)
357      (*filter_here).second.f->configure(key, cfgline);
358
359    ++filter_here;
360  }
361
362  // configure the sources
363  sourcelistclass::iterator source_here = sources.begin();
364  sourcelistclass::iterator source_end = sources.end();
365  while (source_here != source_end) {
366    assert ((*source_here).s != NULL);
367    if ((*source_here).s != NULL)
368      (*source_here).s->configure(key, cfgline);
369   
370    ++source_here;
371  }
372}
373
374
375void collectserver::configure (const text_t &key, const text_t &value) {
376  text_tarray cfgline;
377  cfgline.push_back (value);
378  configure(key, cfgline);
379}
380
381void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
382  // if we've not been properly configured, then it is a foregone
383  // conclusion that we cannot be active
384  if (this->configinfo.collection == "null")
385    {
386      wasSuccess = false;
387    }
388  // if no build date exists, then the collection was probably not built;
389  // ditto if the number of documents is zero, then something is pretty
390  // wrong
391  else if (this->collectinfo.buildDate == 0 ||
392      this->collectinfo.numDocs == 0)
393    {
394      wasSuccess =  false;
395    }
396  // it is probably okay
397  else
398    wasSuccess = true;
399}
400
401
402bool collectserver::init (ostream &logout) {
403  // delete the indexmap
404  indexmap.clear();
405
406  // init the filters
407  filtermapclass::iterator filter_here = filters.begin();
408  filtermapclass::iterator filter_end = filters.end();
409  while (filter_here != filter_end) {
410    assert ((*filter_here).second.f != NULL);
411    if (((*filter_here).second.f != NULL) &&
412    !(*filter_here).second.f->init(logout)) return false;
413   
414    ++filter_here;
415  }
416
417  // init the sources
418  sourcelistclass::iterator source_here = sources.begin();
419  sourcelistclass::iterator source_end = sources.end();
420  while (source_here != source_end) {
421    assert ((*source_here).s != NULL);
422    if (((*source_here).s != NULL) &&
423    !(*source_here).s->init(logout)) return false;
424   
425    ++source_here;
426  }
427
428  return true;
429}
430
431
432void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
433              comerror_t &err, ostream &/*logout*/) {
434  reponse = collectinfo;
435  err = noError;
436}
437
438void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
439                    comerror_t &err, ostream &/*logout*/) {
440  response.clear ();
441
442  // get a list of filter names
443  filtermapclass::iterator filter_here = filters.begin();
444  filtermapclass::iterator filter_end = filters.end();
445  while (filter_here != filter_end) {
446    response.filterNames.insert ((*filter_here).first);
447    ++filter_here;
448  }
449
450  err = noError;
451}
452
453void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
454                       InfoFilterOptionsResponse_t &response,
455                       comerror_t &err, ostream &logout) {
456  outconvertclass text_t2ascii;
457
458  filterclass *thisfilter = filters.getfilter(request.filterName);
459  if (thisfilter != NULL) {
460    thisfilter->get_filteroptions (response, err, logout);
461  } else {
462    response.clear ();
463    err = protocolError;
464    text_t& infodbtype = collectinfo.infodbType;
465
466    // Don't print out the warning if were's asking about SQLQueryFilter
467    // when we know the infodbtype is something other than .*sql.*
468
469    if ((request.filterName != "SQLQueryFilter")
470    || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
471      logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
472         << "filter \"" << request.filterName << "\".\n\n";
473    }
474  }
475}
476
477void collectserver::filter (FilterRequest_t &request,
478                FilterResponse_t &response,
479                comerror_t &err, ostream &logout) {
480  outconvertclass text_t2ascii;
481
482  // translate any ".fc", ".pr" etc. stuff in the docSet
483  text_t translatedOID;
484  text_tarray translatedOIDs;
485  text_tarray::iterator doc_here = request.docSet.begin();
486  text_tarray::iterator doc_end = request.docSet.end();
487  while (doc_here != doc_end) {
488    if (needs_translating (*doc_here)) {
489      sourcelistclass::iterator source_here = sources.begin();
490      sourcelistclass::iterator source_end = sources.end();
491      while (source_here != source_end) {
492    assert ((*source_here).s != NULL);
493    if (((*source_here).s != NULL) &&
494        ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
495      if (err != noError) return;
496      break;
497    }
498    ++source_here;
499      }
500      translatedOIDs.push_back (translatedOID);
501    } else {
502      translatedOIDs.push_back (*doc_here);
503    }
504    ++doc_here;
505  }
506  request.docSet = translatedOIDs;
507
508  response.clear();
509
510  filterclass *thisfilter = filters.getfilter(request.filterName);
511  if (thisfilter != NULL) {
512    // filter the data
513    thisfilter->filter (request, response, err, logout);
514    if (err != noError) return;
515    // fill in the metadata for each of the OIDs (if it is requested)
516    if (request.filterResultOptions & FRmetadata) {
517      bool processed = false;
518      ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
519      ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
520      while (resultdoc_here != resultdoc_end) {
521    // try each of the sources in turn
522    sourcelistclass::iterator source_here = sources.begin();
523    sourcelistclass::iterator source_end = sources.end();
524    while (source_here != source_end) {
525      assert ((*source_here).s != NULL);
526      if (((*source_here).s != NULL) &&
527          ((*source_here).s->get_metadata(request.requestParams, request.refParams,
528                          request.getParents, request.fields,
529                          (*resultdoc_here).OID, (*resultdoc_here).metadata,
530                          err, logout))) {
531        if (err != noError) return;
532        processed = true;
533        break;
534      }
535      ++source_here;
536    }
537    if (!processed) {
538
539      logout << text_t2ascii << "Protocol Error: nothing processed for "
540       << "filter \"" << request.filterName << "\".\n\n";
541
542      err = protocolError;
543      return;
544    }
545    ++resultdoc_here;
546      }
547    } 
548 
549    err = noError;
550  }
551  else
552  {
553    response.clear ();
554    err = protocolError;
555    logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
556       << "filter \"" << request.filterName << "\".\n\n";
557  }
558}
559
560void collectserver::get_document (const DocumentRequest_t &request,
561                  DocumentResponse_t &response,
562                  comerror_t &err, ostream &logout) {
563
564  sourcelistclass::iterator source_here = sources.begin();
565  sourcelistclass::iterator source_end = sources.end();
566  while (source_here != source_end) {
567    assert ((*source_here).s != NULL);
568    if (((*source_here).s != NULL) &&
569    ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
570      if (err != noError) return;
571      break;
572    }
573    ++source_here;
574  }
575}
576
577void collectserver::is_searchable (bool &issearchable, comerror_t &err,
578                   ostream &logout) {
579
580  sourcelistclass::iterator source_here = sources.begin();
581  sourcelistclass::iterator source_end = sources.end();
582  while (source_here != source_end) {
583    assert ((*source_here).s != NULL);
584    if (((*source_here).s != NULL) &&
585    ((*source_here).s->is_searchable (issearchable, err, logout))) {
586      if (err != noError) return;
587      break;
588    }
589    ++source_here;
590  }
591}
592
593
594bool operator==(const collectserverptr &x, const collectserverptr &y) {
595  return (x.c == y.c);
596}
597
598bool operator<(const collectserverptr &x, const collectserverptr &y) {
599  return (x.c < y.c);
600}
601
602
603// thecollectserver remains the property of the calling code but
604// should not be deleted until it is removed from this list.
605void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
606  // can't add a null collection server
607  assert (thecollectserver != NULL);
608  if (thecollectserver == NULL) return;
609 
610  // can't add an collection server with no collection name
611  assert (!(thecollectserver->get_collection_name()).empty());
612  if ((thecollectserver->get_collection_name()).empty()) return;
613
614  collectserverptr cptr;
615  cptr.c = thecollectserver;
616  collectserverptrs[thecollectserver->get_collection_name()] = cptr;
617}
618
619// getcollectserver will return NULL if the collectserver could not be found
620collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
621  // can't find a collection with no name
622  if (collection.empty()) return NULL;
623
624  iterator here = collectserverptrs.find (collection);
625  if (here == collectserverptrs.end()) return NULL;
626 
627  return (*here).second.c;
628}
Note: See TracBrowser for help on using the browser.