root/gsdl/trunk/src/colservr/source.cpp @ 16310

Revision 16310, 13.4 KB (checked in by davidb, 12 years ago)

Introduction of 'collecthome' which parallels 'gsdlhome' to allow the toplevel collect folder to be outside of the gsdlhome area

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include <assert.h>
30
31
32sourceclass::sourceclass ()
33{
34  db_ptr = NULL;
35  textsearchptr = NULL;
36  classname = "source";
37}
38
39sourceclass::~sourceclass ()
40{
41  if (db_ptr != NULL) delete db_ptr;
42  if (textsearchptr != NULL) delete textsearchptr;
43}
44
45// configure should be called once for each configuration line
46void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
47{
48  if (cfgline.size() >= 1) {
49    const text_t &value = cfgline[0];
50
51    if (key == "collection") collection = value;
52    else if (key == "collectdir") collectdir = value;
53    else if (key == "gsdlhome") gsdlhome = value;
54    else if (key == "collecthome") collecthome = value;
55    else if (key == "gdbmhome") dbhome = value;
56  }
57
58  if (key == "indexmap") {
59    indexmap.importmap (cfgline);
60
61  } else if (key == "defaultindex") {
62    indexmap.from2to (cfgline[0], defaultindex);
63
64  } else if (key == "subcollectionmap") {
65    subcollectionmap.importmap (cfgline);
66
67  } else if (key == "defaultsubcollection") {
68    subcollectionmap.from2to (cfgline[0], defaultsubcollection);
69
70  } else if (key == "languagemap") {
71    languagemap.importmap (cfgline);
72
73  } else if (key == "defaultlanguage") {
74    languagemap.from2to (cfgline[0], defaultlanguage);
75  } else if (key == "indexstem") {
76    indexstem = cfgline[0];
77  }
78}
79
80text_t sourceclass::resolve_db_filename(const text_t& idx,
81                    const text_t& file_ext)
82{
83  // This is an exact copy of the method (of the same name) in filterclass
84  // Makes sense to merge them, in which either gsdlhome, collecthome,
85  // dbhome, and collection need to also be passed in as parameters,
86  // or else there is some notion of a shared base class that both
87  // filter and source inherit from
88
89  // NB: there is an even greater opportunity to share more code in this
90  // function if sql_db_ptr/db_ptr and db_filename and sql_db_filename
91  // are also drawn from one object
92
93  text_t resolved_filename;
94
95  if (gsdlhome==dbhome) {
96    // dbhome has defaulted to gsdlhome which we take to means the
97    // database has been specifically moved out of gsdlhome area.
98    // => it should be whereever collecthome is set to
99
100    resolved_filename = filename_cat(collecthome, collection, "index", "text", idx);
101  }
102  else {
103    // dbhome is explicitly set to something other than gsdlhome
104    // => use dbhome
105    resolved_filename = filename_cat(dbhome, "collect", collection, "index", "text", idx);
106  }
107
108  resolved_filename += file_ext;
109
110  return resolved_filename;
111}
112
113
114
115// init should be called after all the configuration is done but
116// before any other methods are called
117bool sourceclass::init (ostream &logout)
118{
119  outconvertclass text_t2ascii;
120 
121  if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect");
122  if (dbhome.empty()) dbhome = gsdlhome;
123
124  if (defaultindex.empty()) {
125    // use first index in map as default if no default is set explicitly
126    text_tarray toarray;
127    indexmap.gettoarray(toarray);
128    if (toarray.size()) {
129      defaultindex = toarray[0];
130    }
131  }
132
133  if (defaultsubcollection.empty()) {
134    // use first subcollection in map as default if no default is set explicitly
135    text_tarray toarray;
136    subcollectionmap.gettoarray(toarray);
137    if (toarray.size()) {
138      defaultsubcollection = toarray[0];
139    }
140  }
141
142  if (defaultlanguage.empty()) {
143    // use first language in map as default if no default is set explicitly
144    text_tarray toarray;
145    languagemap.gettoarray(toarray);
146    if (toarray.size()) {
147      defaultlanguage = toarray[0];
148    }
149  }
150 
151  // get the collection directory name
152  if (collectdir.empty()) {
153    collectdir = filename_cat (collecthome, collection);
154  }
155
156  if (db_ptr == NULL) {
157    // most likely a configuration problem
158    logout << text_t2ascii
159       << "configuration error: queryfilter contains a null dbclass\n\n";
160    return false;
161  }
162
163  // get the filename for the database and make sure it exists
164  if (indexstem.empty()) {
165    indexstem = collection;
166  }
167  db_filename = resolve_db_filename(indexstem, db_ptr->getfileextension());
168  if (!file_exists(db_filename)) {
169    logout << text_t2ascii
170       << "warning: database \"" << db_filename << "\" does not exist\n\n";
171    // return false;
172  }
173 
174  return true;
175}
176
177
178// translate_OID translates OIDs using ".pr", ."fc" etc.
179bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
180                 comerror_t &err, ostream &logout)
181{
182  outconvertclass text_t2ascii;
183
184  err = noError;
185  if (db_ptr == NULL) {
186    // most likely a configuration problem
187    logout << text_t2ascii
188       << "configuration error: " << classname << " contains a null dbclass\n\n";
189    err = configurationError;
190    return true;
191  }
192
193  // open the database
194  db_ptr->setlogout(&logout);
195  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
196    // most likely a system problem (we have already checked that the database exists)
197    logout << text_t2ascii
198       << "system problem: open on database \"" << db_filename << "\" failed\n\n";
199    err = systemProblem;
200    return true;
201  }
202
203  infodbclass info;
204  OIDout = db_ptr->translate_OID (OIDin, info);
205  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
206  return true;
207}
208
209
210// get_metadata fills out the metadata if possible, if it is not responsable
211// for the given OID then it will return false.
212bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
213                bool getParents, const text_tset &fields,
214                const text_t &OID, MetadataInfo_tmap &metadata,
215                comerror_t &err, ostream &logout)
216{
217  outconvertclass text_t2ascii;
218
219  metadata.erase(metadata.begin(), metadata.end());
220
221  err = noError;
222  if (db_ptr == NULL) {
223    // most likely a configuration problem
224    logout << text_t2ascii
225       << "configuration error: " << classname <<" contains a null dbclass\n\n";
226    err = configurationError;
227    return true;
228  }
229
230  // open the database
231  db_ptr->setlogout(&logout);
232  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
233    // most likely a system problem (we have already checked that the database exists)
234    logout << text_t2ascii
235       << "system problem: open on database \"" << db_filename << "\" failed\n\n";
236    err = systemProblem;
237    return true;
238  }
239
240  // get the metadata - if getParents is set we need to get
241  // info for all parents of OID as well as OID
242  vector<infodbclass> info_array;
243  text_tarray OIDs;
244  if (getParents) get_parents_array (OID, OIDs);
245  OIDs.push_back (OID);
246
247  text_tarray::const_iterator this_OID = OIDs.begin();
248  text_tarray::const_iterator end_OID = OIDs.end();
249
250  while (this_OID != end_OID) {
251    infodbclass info;
252    if (!db_ptr->getinfo(*this_OID, info)) return false;
253
254    // adjust the metadata
255    text_t &contains = info["contains"];
256    if (contains.empty()) info["haschildren"] = 0;
257    else info["haschildren"] = 1;
258    //contains.clear();
259
260    info_array.push_back(info);
261    ++this_OID;
262  }
263
264  // if fields set is empty we want to get all available metadata
265  text_tset tfields = fields;
266  if (tfields.empty() && !info_array.empty()) {
267    infodbclass::iterator t_info = info_array[0].begin();
268    infodbclass::iterator e_info = info_array[0].end();
269    while (t_info != e_info) {
270      if ((*t_info).first != "contains")
271    tfields.insert ((*t_info).first);
272      ++t_info;
273    }
274    tfields.insert ("hasnext");
275    tfields.insert ("hasprevious");
276  }
277   
278  // collect together the metadata
279  bool donenextprevtest = false;
280  bool hasnext=false, hasprevious=false;
281  MetadataInfo_t this_metadata;
282  text_tarray *pos_metadata;
283  text_tset::const_iterator fields_here = tfields.begin();
284  text_tset::const_iterator fields_end = tfields.end();
285
286  while (fields_here != fields_end) {
287    this_metadata.clear();
288    this_metadata.isRef = false;
289
290    vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
291    vector<infodbclass>::reverse_iterator end_info = info_array.rend();
292    MetadataInfo_t *tmetaptr = &this_metadata;
293    while (this_info != end_info) {
294
295      pos_metadata = (*this_info).getmultinfo(*fields_here);
296      if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
297   
298    // collect metadata
299    if (!donenextprevtest) {
300      donenextprevtest = true;
301     
302      // cache parent contents array
303      text_t thisparent = get_parent (OID);
304      if (!thisparent.empty()) {
305        if (thisparent != parentOID) {
306          parentOID = thisparent;
307          parentcontents.erase(parentcontents.begin(), parentcontents.end());
308          if (db_ptr->getinfo(parentOID, parentinfo)) {
309        text_t &parentinfocontains = parentinfo["contains"];
310        if (!parentinfocontains.empty())
311          splitchar (parentinfocontains.begin(), parentinfocontains.end(),
312                 ';', parentcontents);
313          }
314        }
315       
316        // do tests
317        text_tarray::const_iterator parentcontents_here = parentcontents.begin();
318        text_tarray::const_iterator parentcontents_end = parentcontents.end();
319        text_t shrunk_OID = OID;
320        shrink_parent (shrunk_OID);
321        while (parentcontents_here != parentcontents_end) {
322          if (*parentcontents_here == shrunk_OID) {
323        if (parentcontents_here == parentcontents.begin()) hasprevious = false;
324        else hasprevious = true;
325       
326        ++parentcontents_here;
327       
328        if (parentcontents_here == parentcontents.end()) hasnext = false;
329        else hasnext = true;
330       
331        break;
332          } 
333         
334          ++parentcontents_here;
335        }
336     
337        // fill in metadata
338        if ((*fields_here == "hasnext" && hasnext) ||
339        (*fields_here == "hasprevious" && hasprevious))
340          tmetaptr->values.push_back("1");
341        else
342          tmetaptr->values.push_back("0");
343      } else
344        tmetaptr->values.push_back("0");
345    }
346      }
347      //else if (pos_metadata != NULL && *fields_here != "contains") {
348      else if (pos_metadata != NULL) {
349    tmetaptr->values = *pos_metadata;
350      }
351      else
352    tmetaptr->values.push_back("");
353
354      ++this_info;
355      if (this_info != end_info) {
356    tmetaptr->parent = new MetadataInfo_t();
357    tmetaptr = tmetaptr->parent;
358      }
359    }
360    metadata[*fields_here] = this_metadata;
361    ++fields_here;
362  }
363
364  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
365  return true;
366}
367
368bool sourceclass::get_document (const text_t &OID, text_t &doc,
369                comerror_t &err, ostream &logout)
370{
371  outconvertclass text_t2ascii;
372
373  err = noError;
374  if (db_ptr == NULL) {
375    // most likely a configuration problem
376    logout << text_t2ascii
377       << "configuration error: " << classname << " contains a null dbclass\n\n";
378    err = configurationError;
379    return true;
380  }
381
382  // open the database
383  db_ptr->setlogout(&logout);
384  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
385    // most likely a system problem (we have already checked that the database exists)
386    logout << text_t2ascii
387       << "system problem: open on database \"" << db_filename << "\" failed\n\n";
388    err = systemProblem;
389    return true;
390  }
391
392  text_t tOID = OID;
393  if (needs_translating (OID))
394    translate_OID (OID, tOID, err, logout);
395  infodbclass info;
396  if (!db_ptr->getinfo(tOID, info)) {
397    db_ptr->closedatabase();  // Important that local library doesn't leave any files open
398    return false;
399  }
400 
401  if (info["hastxt"].getint() == 1) {
402    int docnum = info["docnum"].getint();
403   
404    // set the collection directory
405    textsearchptr->setcollectdir (collectdir);
406   
407    // get the text
408    textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
409                   defaultlanguage, collection, docnum, doc);
410  }
411
412  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
413  return true;
414}
415
416bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
417{
418  err = noError;
419  issearchable = false;
420
421  text_tarray fromarray;
422  indexmap.getfromarray(fromarray);
423  if (fromarray.size() == 0) {
424    return true;
425  } else if (fromarray.size() == 1) {
426    if (fromarray[0] == "dummy:text") {
427      // always return true - issearchable is false here though
428      return true;
429    }
430  }
431  issearchable = true;
432  return true;
433}
434
435
436bool operator==(const sourceptr &x, const sourceptr &y) {
437  return (x.s == y.s);
438}
439
440bool operator<(const sourceptr &x, const sourceptr &y) {
441  return (x.s < y.s);
442}
443
444
445// thesource remains the property of the calling code but
446// should not be deleted until it is removed from this list.
447void sourcelistclass::addsource (sourceclass *thesource) {
448  // can't add a source that doesn't exist
449  assert (thesource != NULL);
450  if (thesource == NULL) return;
451
452  sourceptr sp;
453  sp.s = thesource;
454
455  sourceptrs.push_back(sp);
456}
Note: See TracBrowser for help on using the browser.