source: gsdl/trunk/src/colservr/source.cpp@ 15617

Last change on this file since 15617 was 15617, checked in by mdewsnip, 16 years ago

Fixed a nasty problem causing a segfault as a result of copying the code from gdbmsource into source.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include "gsdltools.h"
30#include <assert.h>
31
32
33sourceclass::sourceclass ()
34{
35 db_ptr = NULL;
36 textsearchptr = NULL;
37 classname = "source";
38}
39
40sourceclass::~sourceclass ()
41{
42 if (db_ptr != NULL) delete db_ptr;
43 if (textsearchptr != NULL) delete textsearchptr;
44}
45
46// configure should be called once for each configuration line
47void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
48{
49 if (cfgline.size() >= 1) {
50 const text_t &value = cfgline[0];
51
52 if (key == "collection") collection = value;
53 else if (key == "collectdir") collectdir = value;
54 else if (key == "gsdlhome") gsdlhome = value;
55 else if (key == "gdbmhome") dbhome = value;
56 }
57
58 if (key == "indexmap") {
59 indexmap.importmap (cfgline);
60
61 } else if (key == "defaultindex") {
62 indexmap.from2to (cfgline[0], defaultindex);
63
64 } else if (key == "subcollectionmap") {
65 subcollectionmap.importmap (cfgline);
66
67 } else if (key == "defaultsubcollection") {
68 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
69
70 } else if (key == "languagemap") {
71 languagemap.importmap (cfgline);
72
73 } else if (key == "defaultlanguage") {
74 languagemap.from2to (cfgline[0], defaultlanguage);
75 } else if (key == "indexstem") {
76 indexstem = cfgline[0];
77 }
78}
79
80// init should be called after all the configuration is done but
81// before any other methods are called
82bool sourceclass::init (ostream &logout)
83{
84 outconvertclass text_t2ascii;
85
86 if (dbhome.empty()) dbhome = gsdlhome;
87
88 if (defaultindex.empty()) {
89 // use first index in map as default if no default is set explicitly
90 text_tarray toarray;
91 indexmap.gettoarray(toarray);
92 if (toarray.size()) {
93 defaultindex = toarray[0];
94 }
95 }
96
97 if (defaultsubcollection.empty()) {
98 // use first subcollection in map as default if no default is set explicitly
99 text_tarray toarray;
100 subcollectionmap.gettoarray(toarray);
101 if (toarray.size()) {
102 defaultsubcollection = toarray[0];
103 }
104 }
105
106 if (defaultlanguage.empty()) {
107 // use first language in map as default if no default is set explicitly
108 text_tarray toarray;
109 languagemap.gettoarray(toarray);
110 if (toarray.size()) {
111 defaultlanguage = toarray[0];
112 }
113 }
114
115 // get the collection directory name
116 if (collectdir.empty()) {
117 collectdir = filename_cat (gsdlhome, "collect", collection);
118 }
119
120 // get the filename for the database and make sure it exists
121 if (indexstem.empty()) {
122 indexstem = collection;
123 }
124 db_filename = filename_cat(dbhome, "collect", collection, "index", "text", indexstem);
125 if (littleEndian()) db_filename += ".ldb";
126 else db_filename += ".bdb";
127
128 if (!file_exists(db_filename)) {
129 logout << text_t2ascii
130 << "warning: database \"" << db_filename << "\" does not exist\n\n";
131 // return false;
132 }
133
134 return true;
135}
136
137
138// translate_OID translates OIDs using ".pr", ."fc" etc.
139bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
140 comerror_t &err, ostream &logout)
141{
142 outconvertclass text_t2ascii;
143
144 err = noError;
145 if (db_ptr == NULL) {
146 // most likely a configuration problem
147 logout << text_t2ascii
148 << "configuration error: " << classname << " contains a null dbclass\n\n";
149 err = configurationError;
150 return true;
151 }
152
153 // open the database
154 db_ptr->setlogout(&logout);
155 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
156 // most likely a system problem (we have already checked that the database exists)
157 logout << text_t2ascii
158 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
159 err = systemProblem;
160 return true;
161 }
162
163 infodbclass info;
164 OIDout = db_ptr->translate_OID (OIDin, info);
165 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
166 return true;
167}
168
169
170// get_metadata fills out the metadata if possible, if it is not responsable
171// for the given OID then it will return false.
172bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
173 bool getParents, const text_tset &fields,
174 const text_t &OID, MetadataInfo_tmap &metadata,
175 comerror_t &err, ostream &logout)
176{
177 outconvertclass text_t2ascii;
178
179 metadata.erase(metadata.begin(), metadata.end());
180
181 err = noError;
182 if (db_ptr == NULL) {
183 // most likely a configuration problem
184 logout << text_t2ascii
185 << "configuration error: " << classname <<" contains a null dbclass\n\n";
186 err = configurationError;
187 return true;
188 }
189
190 // open the database
191 db_ptr->setlogout(&logout);
192 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
193 // most likely a system problem (we have already checked that the database exists)
194 logout << text_t2ascii
195 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
196 err = systemProblem;
197 return true;
198 }
199
200 // get the metadata - if getParents is set we need to get
201 // info for all parents of OID as well as OID
202 vector<infodbclass> info_array;
203 text_tarray OIDs;
204 if (getParents) get_parents_array (OID, OIDs);
205 OIDs.push_back (OID);
206
207 text_tarray::const_iterator this_OID = OIDs.begin();
208 text_tarray::const_iterator end_OID = OIDs.end();
209
210 while (this_OID != end_OID) {
211 infodbclass info;
212 if (!db_ptr->getinfo(*this_OID, info)) return false;
213
214 // adjust the metadata
215 text_t &contains = info["contains"];
216 if (contains.empty()) info["haschildren"] = 0;
217 else info["haschildren"] = 1;
218 //contains.clear();
219
220 info_array.push_back(info);
221 ++this_OID;
222 }
223
224 // if fields set is empty we want to get all available metadata
225 text_tset tfields = fields;
226 if (tfields.empty() && !info_array.empty()) {
227 infodbclass::iterator t_info = info_array[0].begin();
228 infodbclass::iterator e_info = info_array[0].end();
229 while (t_info != e_info) {
230 if ((*t_info).first != "contains")
231 tfields.insert ((*t_info).first);
232 ++t_info;
233 }
234 tfields.insert ("hasnext");
235 tfields.insert ("hasprevious");
236 }
237
238 // collect together the metadata
239 bool donenextprevtest = false;
240 bool hasnext=false, hasprevious=false;
241 MetadataInfo_t this_metadata;
242 text_tarray *pos_metadata;
243 text_tset::const_iterator fields_here = tfields.begin();
244 text_tset::const_iterator fields_end = tfields.end();
245
246 while (fields_here != fields_end) {
247 this_metadata.clear();
248 this_metadata.isRef = false;
249
250 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
251 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
252 MetadataInfo_t *tmetaptr = &this_metadata;
253 while (this_info != end_info) {
254
255 pos_metadata = (*this_info).getmultinfo(*fields_here);
256 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
257
258 // collect metadata
259 if (!donenextprevtest) {
260 donenextprevtest = true;
261
262 // cache parent contents array
263 text_t thisparent = get_parent (OID);
264 if (!thisparent.empty()) {
265 if (thisparent != parentOID) {
266 parentOID = thisparent;
267 parentcontents.erase(parentcontents.begin(), parentcontents.end());
268 if (db_ptr->getinfo(parentOID, parentinfo)) {
269 text_t &parentinfocontains = parentinfo["contains"];
270 if (!parentinfocontains.empty())
271 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
272 ';', parentcontents);
273 }
274 }
275
276 // do tests
277 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
278 text_tarray::const_iterator parentcontents_end = parentcontents.end();
279 text_t shrunk_OID = OID;
280 shrink_parent (shrunk_OID);
281 while (parentcontents_here != parentcontents_end) {
282 if (*parentcontents_here == shrunk_OID) {
283 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
284 else hasprevious = true;
285
286 ++parentcontents_here;
287
288 if (parentcontents_here == parentcontents.end()) hasnext = false;
289 else hasnext = true;
290
291 break;
292 }
293
294 ++parentcontents_here;
295 }
296
297 // fill in metadata
298 if ((*fields_here == "hasnext" && hasnext) ||
299 (*fields_here == "hasprevious" && hasprevious))
300 tmetaptr->values.push_back("1");
301 else
302 tmetaptr->values.push_back("0");
303 } else
304 tmetaptr->values.push_back("0");
305 }
306 }
307 //else if (pos_metadata != NULL && *fields_here != "contains") {
308 else if (pos_metadata != NULL) {
309 tmetaptr->values = *pos_metadata;
310 }
311 else
312 tmetaptr->values.push_back("");
313
314 ++this_info;
315 if (this_info != end_info) {
316 tmetaptr->parent = new MetadataInfo_t();
317 tmetaptr = tmetaptr->parent;
318 }
319 }
320 metadata[*fields_here] = this_metadata;
321 ++fields_here;
322 }
323
324 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
325 return true;
326}
327
328bool sourceclass::get_document (const text_t &OID, text_t &doc,
329 comerror_t &err, ostream &logout)
330{
331 outconvertclass text_t2ascii;
332
333 err = noError;
334 if (db_ptr == NULL) {
335 // most likely a configuration problem
336 logout << text_t2ascii
337 << "configuration error: " << classname << " contains a null dbclass\n\n";
338 err = configurationError;
339 return true;
340 }
341
342 // open the database
343 db_ptr->setlogout(&logout);
344 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
345 // most likely a system problem (we have already checked that the database exists)
346 logout << text_t2ascii
347 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
348 err = systemProblem;
349 return true;
350 }
351
352 text_t tOID = OID;
353 if (needs_translating (OID))
354 translate_OID (OID, tOID, err, logout);
355 infodbclass info;
356 if (!db_ptr->getinfo(tOID, info)) {
357 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
358 return false;
359 }
360
361 if (info["hastxt"].getint() == 1) {
362 int docnum = info["docnum"].getint();
363
364 // set the collection directory
365 textsearchptr->setcollectdir (collectdir);
366
367 // get the text
368 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
369 defaultlanguage, collection, docnum, doc);
370 }
371
372 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
373 return true;
374}
375
376bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
377{
378 err = noError;
379 issearchable = false;
380
381 text_tarray fromarray;
382 indexmap.getfromarray(fromarray);
383 if (fromarray.size() == 0) {
384 return true;
385 } else if (fromarray.size() == 1) {
386 if (fromarray[0] == "dummy:text") {
387 // always return true - issearchable is false here though
388 return true;
389 }
390 }
391 issearchable = true;
392 return true;
393}
394
395
396bool operator==(const sourceptr &x, const sourceptr &y) {
397 return (x.s == y.s);
398}
399
400bool operator<(const sourceptr &x, const sourceptr &y) {
401 return (x.s < y.s);
402}
403
404
405// thesource remains the property of the calling code but
406// should not be deleted until it is removed from this list.
407void sourcelistclass::addsource (sourceclass *thesource) {
408 // can't add a source that doesn't exist
409 assert (thesource != NULL);
410 if (thesource == NULL) return;
411
412 sourceptr sp;
413 sp.s = thesource;
414
415 sourceptrs.push_back(sp);
416}
Note: See TracBrowser for help on using the repository browser.