root/main/trunk/greenstone2/runtime-src/src/colservr/collectserver.cpp @ 24411

Revision 24411, 20.5 KB (checked in by ak19, 8 years ago)

Katherine's commit for modelcol's collect.cfg explained that its collectionmeta section should not contain ex.* prefixes for GS extracted metadata. (It can and does contain ex. prefixes for extracted embedded metadata, since otherwise we may have multiple occurrences of dc.Title in there, when one of them is meant to refer to ex.dc.Title.) This resulted in correcting collect.cfg to not refer to ex.Title anymore in its collectionmeta section. And GLI is processing all ex. prefixes now, so undoing the previous commit in runtime-source where all ex. prefixes were removed from the collectionmeta section: this is no longer applicable (since there will be no GS extracted meta like ex.Title in the collectionmeta section) AND we don't want ex. prefixes removed from embedded ex.* metadata (like ex.dc.*).

  • Property svn:keywords set to Author Date Id Revision
Line 
1 
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999  The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32void check_if_valid_buildtype(const text_t& buildtype)
33{
34  if (buildtype=="mg") {
35#ifndef ENABLE_MG
36    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
37#endif
38  }
39
40  else if (buildtype=="mgpp") {
41#ifndef ENABLE_MGPP
42    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
43#endif
44  }
45
46  else if (buildtype=="lucene") {
47#ifndef ENABLE_LUCENE
48    cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
49#endif
50  }
51
52  else {
53    cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
54  }
55
56}
57
58
59void check_if_valid_infodbtype(const text_t& infodbtype)
60{
61  if (infodbtype=="gdbm") {
62#ifndef USE_GDBM
63    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
64#endif
65  }
66  else if (infodbtype=="gdbm-txtgz") {
67#ifndef USE_GDBM
68    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
69#endif
70  }
71  else if (infodbtype=="jdbm") {
72#ifndef USE_JDBM
73    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
74#endif
75  }
76  else if (infodbtype=="sqlite") {
77#ifndef USE_SQLITE
78    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
79#endif
80  }
81  else if (infodbtype=="mssql") {
82#ifndef USE_MSSQL
83    cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
84#endif
85  }
86
87  else {
88    cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
89  }
90
91}
92
93
94
95collectserver::collectserver ()
96  : collectinfo()
97{
98  configinfo.collection = "null";
99}
100
101collectserver::~collectserver () {
102
103  // clean up the sources
104  sourcelistclass::iterator source_here = sources.begin();
105  sourcelistclass::iterator source_end = sources.end();
106  while (source_here != source_end) {
107    if ((*source_here).s != NULL)
108      delete (*source_here).s;
109    ++source_here;
110  }
111  sources.clear();
112
113  // clean up the filters
114  filtermapclass::iterator filter_here = filters.begin();
115  filtermapclass::iterator filter_end = filters.end();
116  while (filter_here != filter_end) {
117    if ((*filter_here).second.f != NULL)
118      delete (*filter_here).second.f;
119    ++filter_here;
120  }
121  filters.clear();
122}
123
124// configure should be called for each line in the
125// configuration files to configure the collection server and everything
126// it contains. The configuration should take place just before initialisation
127void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
128  if (cfgline.size() >= 1) {
129    const text_t &value = cfgline[0];
130    if (key == "plugin")
131    {
132        //get the plugin name
133    const text_t &name = cfgline[0];
134   
135    if (name == "HTMLPlugin")
136    {
137        for (int hI = 1; hI < cfgline.size(); hI++)
138        {
139            const text_t &plugOption = cfgline[hI];
140           
141            if (plugOption == "-use_realistic_book")
142            {
143                collectinfo.useBook = true;
144                break;
145            }
146        }
147    }
148    }
149    else if (key == "gsdlhome") configinfo.gsdlhome = value;
150    else if (key == "gdbmhome") configinfo.dbhome = value;
151    else if (key == "collecthome") configinfo.collecthome = value;
152    else if (key == "collection") {
153      configinfo.collection = value;
154      collectinfo.shortInfo.name = value;
155    }
156    else if (key == "collectdir") configinfo.collectdir = value;
157    else if (key == "host") collectinfo.shortInfo.host = value;
158    else if (key == "port") collectinfo.shortInfo.port = value.getint();
159    else if (key == "public") {
160      if (value == "true") collectinfo.isPublic = true;
161      else collectinfo.isPublic = false;
162    } else if (key == "beta") {
163      if (value == "true") collectinfo.isBeta = true;
164      else collectinfo.isBeta = false;
165    } else if (key == "collectgroup") {
166      if (value == "true") collectinfo.isCollectGroup = true;
167      else collectinfo.isCollectGroup = false;
168    } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
169    else if (key == "supercollectionoptions") {
170      text_tarray::const_iterator begin = cfgline.begin();
171      text_tarray::const_iterator end = cfgline.end();
172      while(begin != end) {
173   
174    if (*begin == "uniform_search_results_formatting") {
175      collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
176    }
177    begin++;
178      }
179    }
180    else if (key == "builddate") collectinfo.buildDate = value.getint();
181    else if (key == "languages") collectinfo.languages = cfgline;
182    else if (key == "numdocs") collectinfo.numDocs = value.getint();
183    else if (key == "numsections") collectinfo.numSections = value.getint();
184    else if (key == "numwords") collectinfo.numWords = value.getint();
185    else if (key == "numbytes") collectinfo.numBytes = value.getint();
186    else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
187    else if (key == "collectionmeta") {
188      // genuine collmeta get added as collectionmeta and collection_macros
189      // .collmeta just get added as collection_macros
190      text_t params;
191      if (cfgline.size() == 3) {
192    // get the params for later
193    text_t::const_iterator first=cfgline[1].begin()+1;
194    text_t::const_iterator last=cfgline[1].end()-1;
195    params=substr(first, last);
196      }
197     
198      text_t meta_name = cfgline[0];
199      if (*(meta_name.begin())=='.') {
200    // a .xxx collectionmeta. strip off the . and
201    // look it up in the indexmap to get the actual value
202   
203    text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
204    text_t new_name;
205   
206    // Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
207    // Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here.
208    // So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
209    // contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.
210
211    if (indexmap.from2to(name, new_name)) {
212      meta_name = new_name;
213    }
214      } else {
215    // add them to collectionmeta
216    text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
217    if (cfgline.size() == 2) {
218      lang_map[g_EmptyText] = cfgline[1];
219    } else if (cfgline.size() == 3 ) {
220      // get the lang out of params
221      paramhashtype params_hash;
222      splitparams(params, params_hash);
223     
224      text_t lang = params_hash["l"];
225      lang_map[lang] = cfgline[2];
226      if (lang_map[g_EmptyText].empty()) {
227        // want the first one as the default if no default specified
228        lang_map[g_EmptyText] = cfgline[2];
229      }
230    }
231    collectinfo.collectionmeta[cfgline[0]] = lang_map;
232   
233      }
234     
235      // add all collectionmeta to macro list
236      text_tmap params_map = collectinfo.collection_macros[meta_name];
237     
238      if (cfgline.size() == 2) {// no params for this macro
239    params_map[g_EmptyText] = cfgline[1];
240      }
241      else if (cfgline.size() == 3) {// has params
242    params_map[params] = cfgline[2];
243    if (params_map[g_EmptyText].empty()) {
244      params_map[g_EmptyText] = cfgline[2];
245    }
246      }
247      collectinfo.collection_macros[meta_name] = params_map;
248    }
249    else if (key == "collectionmacro") {
250      text_t nobrackets;
251      text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
252      // add all to macro list
253      if (cfgline.size() == 2) { // no params for this macro
254    params_map[g_EmptyText] = cfgline[1];
255      }
256      else if (cfgline.size() == 3) {// has params
257    // strip [ ] brackets from params
258    text_t::const_iterator first=cfgline[1].begin()+1;
259    text_t::const_iterator last=cfgline[1].end()-1;
260    nobrackets=substr(first, last);
261    params_map[nobrackets] = cfgline[2];
262      }
263      collectinfo.collection_macros[cfgline[0]] = params_map;
264     
265    } else if (key == "format" && cfgline.size() == 2)
266      collectinfo.format[cfgline[0]] = cfgline[1];
267    else if (key == "building" && cfgline.size() == 2)
268      collectinfo.building[cfgline[0]] = cfgline[1];
269    else if (key == "httpdomain") collectinfo.httpdomain = value;
270    else if (key == "httpprefix") collectinfo.httpprefix = value;
271    else if (key == "receptionist") collectinfo.receptionist = value;
272    else if (key == "buildtype") {
273      check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
274      collectinfo.buildType = value;
275    }
276    // backwards compatibility - searchytpes is now a format statement
277    else if (key == "searchtype") { // means buildtype is mgpp
278      if (collectinfo.buildType.empty()) {
279    check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
280    collectinfo.buildType = "mgpp";
281      }
282      joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
283      //collectinfo.searchTypes = cfgline;
284    }
285    else if (key == "infodbtype") {
286      check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
287      collectinfo.infodbType = value;
288    }
289    else if (key == "separate_cjk") {
290      if (value == "true") collectinfo.isSegmented = true;
291      else collectinfo.isSegmented = false;
292    }
293    // What have we set in our collect.cfg file :  document or collection ?
294    else if (key == "authenticate") collectinfo.authenticate = value;
295
296    // What have we set for our group list
297    else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
298
299    // build.cfg, earliestDatestamp of this collection needed for
300    // OAIServer to work out earliestDatestamp of this repository
301    else if (key == "earliestdatestamp") {
302        collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
303    }
304   
305    // store all the mappings for use when collection meta is read later
306    // (build.cfg read before collect.cfg)
307    else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
308      indexmap.importmap (cfgline, true);
309     
310    }
311    // In the map the key-value pair contain the same
312    // data i.e key == data, if key is 2 then data is 2
313   
314    // What have we set for our public_documents ACL
315    else if (key == "public_documents")
316       {
317      text_tarray::const_iterator begin = cfgline.begin();
318      text_tarray::const_iterator end = cfgline.end();
319      while(begin != end)
320         {
321        // key = data i.e if key is 2 then data is 2
322        // collectinfo.public_documents[*begin] is the key
323        // *begin is the data value
324
325        collectinfo.public_documents[*begin] = *begin;
326        ++begin;
327         }
328       }
329   
330    // What have we set for our private_documents ACL
331    else if (key == "private_documents")
332       {
333      text_tarray::const_iterator begin = cfgline.begin();
334      text_tarray::const_iterator end = cfgline.end();
335      while(begin != end)
336         {
337        // key = data i.e if key is 2 then data is 2
338        // collectinfo.public_documents[*begin] is the key
339        // *begin is the data value
340       
341        collectinfo.private_documents[*begin] = *begin;
342        ++begin;
343         }
344       }
345
346    // dynamic_classifier <UniqueID> "<Options>"
347    else if (key == "dynamic_classifier")
348    {
349      collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
350    }
351  }
352 
353  // configure the filters
354  filtermapclass::iterator filter_here = filters.begin();
355  filtermapclass::iterator filter_end = filters.end();
356  while (filter_here != filter_end) {
357    assert ((*filter_here).second.f != NULL);
358    if ((*filter_here).second.f != NULL)
359      (*filter_here).second.f->configure(key, cfgline);
360
361    ++filter_here;
362  }
363
364  // configure the sources
365  sourcelistclass::iterator source_here = sources.begin();
366  sourcelistclass::iterator source_end = sources.end();
367  while (source_here != source_end) {
368    assert ((*source_here).s != NULL);
369    if ((*source_here).s != NULL)
370      (*source_here).s->configure(key, cfgline);
371   
372    ++source_here;
373  }
374}
375
376
377void collectserver::configure (const text_t &key, const text_t &value) {
378  text_tarray cfgline;
379  cfgline.push_back (value);
380  configure(key, cfgline);
381}
382
383void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
384  // if we've not been properly configured, then it is a foregone
385  // conclusion that we cannot be active
386  if (this->configinfo.collection == "null")
387    {
388      wasSuccess = false;
389    }
390  // if no build date exists, then the collection was probably not built;
391  // ditto if the number of documents is zero, then something is pretty
392  // wrong
393  else if (this->collectinfo.buildDate == 0 ||
394      this->collectinfo.numDocs == 0)
395    {
396      wasSuccess =  false;
397    }
398  // it is probably okay
399  else
400    wasSuccess = true;
401}
402
403
404bool collectserver::init (ostream &logout) {
405  // delete the indexmap
406  indexmap.clear();
407
408  // init the filters
409  filtermapclass::iterator filter_here = filters.begin();
410  filtermapclass::iterator filter_end = filters.end();
411  while (filter_here != filter_end) {
412    assert ((*filter_here).second.f != NULL);
413    if (((*filter_here).second.f != NULL) &&
414    !(*filter_here).second.f->init(logout)) return false;
415   
416    ++filter_here;
417  }
418
419  // init the sources
420  sourcelistclass::iterator source_here = sources.begin();
421  sourcelistclass::iterator source_end = sources.end();
422  while (source_here != source_end) {
423    assert ((*source_here).s != NULL);
424    if (((*source_here).s != NULL) &&
425    !(*source_here).s->init(logout)) return false;
426   
427    ++source_here;
428  }
429
430  return true;
431}
432
433
434void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
435              comerror_t &err, ostream &/*logout*/) {
436  reponse = collectinfo;
437  err = noError;
438}
439
440void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
441                    comerror_t &err, ostream &/*logout*/) {
442  response.clear ();
443
444  // get a list of filter names
445  filtermapclass::iterator filter_here = filters.begin();
446  filtermapclass::iterator filter_end = filters.end();
447  while (filter_here != filter_end) {
448    response.filterNames.insert ((*filter_here).first);
449    ++filter_here;
450  }
451
452  err = noError;
453}
454
455void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
456                       InfoFilterOptionsResponse_t &response,
457                       comerror_t &err, ostream &logout) {
458  outconvertclass text_t2ascii;
459
460  filterclass *thisfilter = filters.getfilter(request.filterName);
461  if (thisfilter != NULL) {
462    thisfilter->get_filteroptions (response, err, logout);
463  } else {
464    response.clear ();
465    err = protocolError;
466    text_t& infodbtype = collectinfo.infodbType;
467
468    // Don't print out the warning if were's asking about SQLQueryFilter
469    // when we know the infodbtype is something other than .*sql.*
470
471    if ((request.filterName != "SQLQueryFilter")
472    || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
473      logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
474         << "filter \"" << request.filterName << "\".\n\n";
475    }
476  }
477}
478
479void collectserver::filter (FilterRequest_t &request,
480                FilterResponse_t &response,
481                comerror_t &err, ostream &logout) {
482  outconvertclass text_t2ascii;
483
484  // translate any ".fc", ".pr" etc. stuff in the docSet
485  text_t translatedOID;
486  text_tarray translatedOIDs;
487  text_tarray::iterator doc_here = request.docSet.begin();
488  text_tarray::iterator doc_end = request.docSet.end();
489  while (doc_here != doc_end) {
490    if (needs_translating (*doc_here)) {
491      sourcelistclass::iterator source_here = sources.begin();
492      sourcelistclass::iterator source_end = sources.end();
493      while (source_here != source_end) {
494    assert ((*source_here).s != NULL);
495    if (((*source_here).s != NULL) &&
496        ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
497      if (err != noError) return;
498      break;
499    }
500    ++source_here;
501      }
502      translatedOIDs.push_back (translatedOID);
503    } else {
504      translatedOIDs.push_back (*doc_here);
505    }
506    ++doc_here;
507  }
508  request.docSet = translatedOIDs;
509
510  response.clear();
511
512  filterclass *thisfilter = filters.getfilter(request.filterName);
513  if (thisfilter != NULL) {
514    // filter the data
515    thisfilter->filter (request, response, err, logout);
516    if (err != noError) return;
517    // fill in the metadata for each of the OIDs (if it is requested)
518    if (request.filterResultOptions & FRmetadata) {
519      bool processed = false;
520      ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
521      ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
522      while (resultdoc_here != resultdoc_end) {
523    // try each of the sources in turn
524    sourcelistclass::iterator source_here = sources.begin();
525    sourcelistclass::iterator source_end = sources.end();
526    while (source_here != source_end) {
527      assert ((*source_here).s != NULL);
528      if (((*source_here).s != NULL) &&
529          ((*source_here).s->get_metadata(request.requestParams, request.refParams,
530                          request.getParents, request.fields,
531                          (*resultdoc_here).OID, (*resultdoc_here).metadata,
532                          err, logout))) {
533        if (err != noError) return;
534        processed = true;
535        break;
536      }
537      ++source_here;
538    }
539    if (!processed) {
540
541      logout << text_t2ascii << "Protocol Error: nothing processed for "
542       << "filter \"" << request.filterName << "\".\n\n";
543
544      err = protocolError;
545      return;
546    }
547    ++resultdoc_here;
548      }
549    } 
550 
551    err = noError;
552  }
553  else
554  {
555    response.clear ();
556    err = protocolError;
557    logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
558       << "filter \"" << request.filterName << "\".\n\n";
559  }
560}
561
562void collectserver::get_document (const DocumentRequest_t &request,
563                  DocumentResponse_t &response,
564                  comerror_t &err, ostream &logout) {
565
566  sourcelistclass::iterator source_here = sources.begin();
567  sourcelistclass::iterator source_end = sources.end();
568  while (source_here != source_end) {
569    assert ((*source_here).s != NULL);
570    if (((*source_here).s != NULL) &&
571    ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
572      if (err != noError) return;
573      break;
574    }
575    ++source_here;
576  }
577}
578
579void collectserver::is_searchable (bool &issearchable, comerror_t &err,
580                   ostream &logout) {
581
582  sourcelistclass::iterator source_here = sources.begin();
583  sourcelistclass::iterator source_end = sources.end();
584  while (source_here != source_end) {
585    assert ((*source_here).s != NULL);
586    if (((*source_here).s != NULL) &&
587    ((*source_here).s->is_searchable (issearchable, err, logout))) {
588      if (err != noError) return;
589      break;
590    }
591    ++source_here;
592  }
593}
594
595
596bool operator==(const collectserverptr &x, const collectserverptr &y) {
597  return (x.c == y.c);
598}
599
600bool operator<(const collectserverptr &x, const collectserverptr &y) {
601  return (x.c < y.c);
602}
603
604
605// thecollectserver remains the property of the calling code but
606// should not be deleted until it is removed from this list.
607void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
608  // can't add a null collection server
609  assert (thecollectserver != NULL);
610  if (thecollectserver == NULL) return;
611 
612  // can't add an collection server with no collection name
613  assert (!(thecollectserver->get_collection_name()).empty());
614  if ((thecollectserver->get_collection_name()).empty()) return;
615
616  collectserverptr cptr;
617  cptr.c = thecollectserver;
618  collectserverptrs[thecollectserver->get_collection_name()] = cptr;
619}
620
621// getcollectserver will return NULL if the collectserver could not be found
622collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
623  // can't find a collection with no name
624  if (collection.empty()) return NULL;
625
626  iterator here = collectserverptrs.find (collection);
627  if (here == collectserverptrs.end()) return NULL;
628 
629  return (*here).second.c;
630}
Note: See TracBrowser for help on using the browser.