source: gsdl/trunk/src/colservr/source.cpp@ 15591

Last change on this file since 15591 was 15591, checked in by mdewsnip, 16 years ago

(Adding new DB support) Moved all the code in gdbmsource into source, and changed mggdbmsource and lucenegdbmsource to inherit from source instead of gdbmsource. This is part of removing GDBM from these files altogether.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include "gsdltools.h"
30#include <assert.h>
31
32
33sourceclass::sourceclass ()
34{
35 db_ptr = NULL;
36 textsearchptr = NULL;
37 classname = "source";
38}
39
40sourceclass::~sourceclass ()
41{
42 if (db_ptr != NULL) delete db_ptr;
43 if (textsearchptr != NULL) delete textsearchptr;
44}
45
46// configure should be called once for each configuration line
47void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
48{
49 if (cfgline.size() >= 1) {
50 const text_t &value = cfgline[0];
51
52 if (key == "collection") collection = value;
53 else if (key == "collectdir") collectdir = value;
54 else if (key == "gsdlhome") gsdlhome = value;
55 else if (key == "gdbmhome") dbhome = value;
56 }
57
58 if (key == "indexmap") {
59 indexmap.importmap (cfgline);
60
61 } else if (key == "defaultindex") {
62 indexmap.from2to (cfgline[0], defaultindex);
63
64 } else if (key == "subcollectionmap") {
65 subcollectionmap.importmap (cfgline);
66
67 } else if (key == "defaultsubcollection") {
68 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
69
70 } else if (key == "languagemap") {
71 languagemap.importmap (cfgline);
72
73 } else if (key == "defaultlanguage") {
74 languagemap.from2to (cfgline[0], defaultlanguage);
75 } else if (key == "indexstem") {
76 indexstem = cfgline[0];
77 }
78}
79
80// init should be called after all the configuration is done but
81// before any other methods are called
82bool sourceclass::init (ostream &logout)
83{
84 outconvertclass text_t2ascii;
85
86 if (dbhome.empty()) dbhome = gsdlhome;
87
88 if (!sourceclass::init (logout)) return false;
89
90 if (defaultindex.empty()) {
91 // use first index in map as default if no default is set explicitly
92 text_tarray toarray;
93 indexmap.gettoarray(toarray);
94 if (toarray.size()) {
95 defaultindex = toarray[0];
96 }
97 }
98
99 if (defaultsubcollection.empty()) {
100 // use first subcollection in map as default if no default is set explicitly
101 text_tarray toarray;
102 subcollectionmap.gettoarray(toarray);
103 if (toarray.size()) {
104 defaultsubcollection = toarray[0];
105 }
106 }
107
108 if (defaultlanguage.empty()) {
109 // use first language in map as default if no default is set explicitly
110 text_tarray toarray;
111 languagemap.gettoarray(toarray);
112 if (toarray.size()) {
113 defaultlanguage = toarray[0];
114 }
115 }
116
117 // get the collection directory name
118 if (collectdir.empty()) {
119 collectdir = filename_cat (gsdlhome, "collect", collection);
120 }
121
122 // get the filename for the database and make sure it exists
123 if (indexstem.empty()) {
124 indexstem = collection;
125 }
126 db_filename = filename_cat(dbhome, "collect", collection, "index", "text", indexstem);
127 if (littleEndian()) db_filename += ".ldb";
128 else db_filename += ".bdb";
129
130 if (!file_exists(db_filename)) {
131 logout << text_t2ascii
132 << "warning: database \"" << db_filename << "\" does not exist\n\n";
133 // return false;
134 }
135
136 return true;
137}
138
139
140// translate_OID translates OIDs using ".pr", ."fc" etc.
141bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
142 comerror_t &err, ostream &logout)
143{
144 outconvertclass text_t2ascii;
145
146 err = noError;
147 if (db_ptr == NULL) {
148 // most likely a configuration problem
149 logout << text_t2ascii
150 << "configuration error: " << classname << " contains a null dbclass\n\n";
151 err = configurationError;
152 return true;
153 }
154
155 // open the database
156 db_ptr->setlogout(&logout);
157 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
158 // most likely a system problem (we have already checked that the database exists)
159 logout << text_t2ascii
160 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
161 err = systemProblem;
162 return true;
163 }
164
165 infodbclass info;
166 OIDout = db_ptr->translate_OID (OIDin, info);
167 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
168 return true;
169}
170
171
172// get_metadata fills out the metadata if possible, if it is not responsable
173// for the given OID then it will return false.
174bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
175 bool getParents, const text_tset &fields,
176 const text_t &OID, MetadataInfo_tmap &metadata,
177 comerror_t &err, ostream &logout)
178{
179 outconvertclass text_t2ascii;
180
181 metadata.erase(metadata.begin(), metadata.end());
182
183 err = noError;
184 if (db_ptr == NULL) {
185 // most likely a configuration problem
186 logout << text_t2ascii
187 << "configuration error: " << classname <<" contains a null dbclass\n\n";
188 err = configurationError;
189 return true;
190 }
191
192 // open the database
193 db_ptr->setlogout(&logout);
194 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
195 // most likely a system problem (we have already checked that the database exists)
196 logout << text_t2ascii
197 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
198 err = systemProblem;
199 return true;
200 }
201
202 // get the metadata - if getParents is set we need to get
203 // info for all parents of OID as well as OID
204 vector<infodbclass> info_array;
205 text_tarray OIDs;
206 if (getParents) get_parents_array (OID, OIDs);
207 OIDs.push_back (OID);
208
209 text_tarray::const_iterator this_OID = OIDs.begin();
210 text_tarray::const_iterator end_OID = OIDs.end();
211
212 while (this_OID != end_OID) {
213 infodbclass info;
214 if (!db_ptr->getinfo(*this_OID, info)) return false;
215
216 // adjust the metadata
217 text_t &contains = info["contains"];
218 if (contains.empty()) info["haschildren"] = 0;
219 else info["haschildren"] = 1;
220 //contains.clear();
221
222 info_array.push_back(info);
223 ++this_OID;
224 }
225
226 // if fields set is empty we want to get all available metadata
227 text_tset tfields = fields;
228 if (tfields.empty() && !info_array.empty()) {
229 infodbclass::iterator t_info = info_array[0].begin();
230 infodbclass::iterator e_info = info_array[0].end();
231 while (t_info != e_info) {
232 if ((*t_info).first != "contains")
233 tfields.insert ((*t_info).first);
234 ++t_info;
235 }
236 tfields.insert ("hasnext");
237 tfields.insert ("hasprevious");
238 }
239
240 // collect together the metadata
241 bool donenextprevtest = false;
242 bool hasnext=false, hasprevious=false;
243 MetadataInfo_t this_metadata;
244 text_tarray *pos_metadata;
245 text_tset::const_iterator fields_here = tfields.begin();
246 text_tset::const_iterator fields_end = tfields.end();
247
248 while (fields_here != fields_end) {
249 this_metadata.clear();
250 this_metadata.isRef = false;
251
252 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
253 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
254 MetadataInfo_t *tmetaptr = &this_metadata;
255 while (this_info != end_info) {
256
257 pos_metadata = (*this_info).getmultinfo(*fields_here);
258 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
259
260 // collect metadata
261 if (!donenextprevtest) {
262 donenextprevtest = true;
263
264 // cache parent contents array
265 text_t thisparent = get_parent (OID);
266 if (!thisparent.empty()) {
267 if (thisparent != parentOID) {
268 parentOID = thisparent;
269 parentcontents.erase(parentcontents.begin(), parentcontents.end());
270 if (db_ptr->getinfo(parentOID, parentinfo)) {
271 text_t &parentinfocontains = parentinfo["contains"];
272 if (!parentinfocontains.empty())
273 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
274 ';', parentcontents);
275 }
276 }
277
278 // do tests
279 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
280 text_tarray::const_iterator parentcontents_end = parentcontents.end();
281 text_t shrunk_OID = OID;
282 shrink_parent (shrunk_OID);
283 while (parentcontents_here != parentcontents_end) {
284 if (*parentcontents_here == shrunk_OID) {
285 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
286 else hasprevious = true;
287
288 ++parentcontents_here;
289
290 if (parentcontents_here == parentcontents.end()) hasnext = false;
291 else hasnext = true;
292
293 break;
294 }
295
296 ++parentcontents_here;
297 }
298
299 // fill in metadata
300 if ((*fields_here == "hasnext" && hasnext) ||
301 (*fields_here == "hasprevious" && hasprevious))
302 tmetaptr->values.push_back("1");
303 else
304 tmetaptr->values.push_back("0");
305 } else
306 tmetaptr->values.push_back("0");
307 }
308 }
309 //else if (pos_metadata != NULL && *fields_here != "contains") {
310 else if (pos_metadata != NULL) {
311 tmetaptr->values = *pos_metadata;
312 }
313 else
314 tmetaptr->values.push_back("");
315
316 ++this_info;
317 if (this_info != end_info) {
318 tmetaptr->parent = new MetadataInfo_t();
319 tmetaptr = tmetaptr->parent;
320 }
321 }
322 metadata[*fields_here] = this_metadata;
323 ++fields_here;
324 }
325
326 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
327 return true;
328}
329
330bool sourceclass::get_document (const text_t &OID, text_t &doc,
331 comerror_t &err, ostream &logout)
332{
333 outconvertclass text_t2ascii;
334
335 err = noError;
336 if (db_ptr == NULL) {
337 // most likely a configuration problem
338 logout << text_t2ascii
339 << "configuration error: " << classname << " contains a null dbclass\n\n";
340 err = configurationError;
341 return true;
342 }
343
344 // open the database
345 db_ptr->setlogout(&logout);
346 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
347 // most likely a system problem (we have already checked that the database exists)
348 logout << text_t2ascii
349 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
350 err = systemProblem;
351 return true;
352 }
353
354 text_t tOID = OID;
355 if (needs_translating (OID))
356 translate_OID (OID, tOID, err, logout);
357 infodbclass info;
358 if (!db_ptr->getinfo(tOID, info)) {
359 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
360 return false;
361 }
362
363 if (info["hastxt"].getint() == 1) {
364 int docnum = info["docnum"].getint();
365
366 // set the collection directory
367 textsearchptr->setcollectdir (collectdir);
368
369 // get the text
370 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
371 defaultlanguage, collection, docnum, doc);
372 }
373
374 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
375 return true;
376}
377
378bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
379{
380 err = noError;
381 issearchable = false;
382
383 text_tarray fromarray;
384 indexmap.getfromarray(fromarray);
385 if (fromarray.size() == 0) {
386 return true;
387 } else if (fromarray.size() == 1) {
388 if (fromarray[0] == "dummy:text") {
389 // always return true - issearchable is false here though
390 return true;
391 }
392 }
393 issearchable = true;
394 return true;
395}
396
397
398bool operator==(const sourceptr &x, const sourceptr &y) {
399 return (x.s == y.s);
400}
401
402bool operator<(const sourceptr &x, const sourceptr &y) {
403 return (x.s < y.s);
404}
405
406
407// thesource remains the property of the calling code but
408// should not be deleted until it is removed from this list.
409void sourcelistclass::addsource (sourceclass *thesource) {
410 // can't add a source that doesn't exist
411 assert (thesource != NULL);
412 if (thesource == NULL) return;
413
414 sourceptr sp;
415 sp.s = thesource;
416
417 sourceptrs.push_back(sp);
418}
Note: See TracBrowser for help on using the repository browser.