source: trunk/gsdl/src/colservr/lucenegdbmsource.cpp@ 9190

Last change on this file since 9190 was 9190, checked in by kjdon, 19 years ago

need to pass the level tag to expat_document

  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1/**********************************************************************
2 *
3 * lucenegdbmsource.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "lucenegdbmsource.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include "gsdltools.h"
30#include "expat_document.h"
31#include "lucenesearch.h"
32
33
34lucenegdbmsourceclass::lucenegdbmsourceclass () {
35 gdbmptr = NULL;
36 textsearchptr = NULL;
37}
38
39lucenegdbmsourceclass::~lucenegdbmsourceclass () {
40 if (gdbmptr != NULL) delete gdbmptr;
41 if (textsearchptr != NULL) delete textsearchptr;
42}
43
44void lucenegdbmsourceclass::configure (const text_t &key, const text_tarray &cfgline) {
45 if (cfgline.size() >= 1) {
46 const text_t &value = cfgline[0];
47
48 if (key == "collection") collection = value;
49 else if (key == "collectdir") collectdir = value;
50 else if (key == "gsdlhome") gsdlhome = value;
51 else if (key == "gdbmhome") gdbmhome = value;
52 }
53
54 if (key == "indexmap") {
55 indexmap.importmap (cfgline);
56
57 } else if (key == "defaultindex") {
58 indexmap.from2to (cfgline[0], defaultindex);
59
60 } else if (key == "subcollectionmap") {
61 subcollectionmap.importmap (cfgline);
62
63 } else if (key == "defaultsubcollection") {
64 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
65
66 } else if (key == "languagemap") {
67 languagemap.importmap (cfgline);
68
69 } else if (key == "defaultlanguage")
70 languagemap.from2to (cfgline[0], defaultlanguage);
71}
72
73bool lucenegdbmsourceclass::init (ostream &logout) {
74 outconvertclass text_t2ascii;
75
76 if (gdbmhome.empty()) gdbmhome = gsdlhome;
77
78 if (!sourceclass::init (logout)) return false;
79
80 if (defaultindex.empty()) {
81 // use first index in map as default if no default is set explicitly
82 text_tarray toarray;
83 indexmap.gettoarray(toarray);
84 if (toarray.size()) {
85 defaultindex = toarray[0];
86 }
87 }
88
89 if (defaultsubcollection.empty()) {
90 // use first subcollection in map as default if no default is set explicitly
91 text_tarray toarray;
92 subcollectionmap.gettoarray(toarray);
93 if (toarray.size()) {
94 defaultsubcollection = toarray[0];
95 }
96 }
97
98 if (defaultlanguage.empty()) {
99 // use first language in map as default if no default is set explicitly
100 text_tarray toarray;
101 languagemap.gettoarray(toarray);
102 if (toarray.size()) {
103 defaultlanguage = toarray[0];
104 }
105 }
106
107 // get the collection directory name
108 if (collectdir.empty()) {
109 collectdir = filename_cat (gsdlhome, "collect", collection);
110 }
111
112 // get the filename for the database and make sure it exists
113 gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", collection);
114 if (littleEndian()) gdbm_filename += ".ldb";
115 else gdbm_filename += ".bdb";
116
117 if (!file_exists(gdbm_filename)) {
118 logout << text_t2ascii
119 << "warning: **** gdbm database \"" //****
120 << gdbm_filename << "\" does not exist\n\n";
121 // return false; //****
122 }
123
124 return true;
125}
126
127bool lucenegdbmsourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
128 comerror_t &err, ostream &logout) {
129
130 outconvertclass text_t2ascii;
131
132 err = noError;
133 if (gdbmptr == NULL) {
134 // most likely a configuration problem
135 logout << text_t2ascii
136 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
137 err = configurationError;
138 return true;
139 }
140
141 // open the database
142 gdbmptr->setlogout(&logout);
143 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
144 // most likely a system problem (we have already checked that the
145 // gdbm database exists)
146 logout << text_t2ascii
147 << "system problem: open on gdbm database \""
148 << gdbm_filename << "\" failed\n\n";
149 err = systemProblem;
150 return true;
151 }
152
153 infodbclass info;
154 OIDout = gdbmptr->translate_OID (OIDin, info);
155 return true;
156}
157
158bool lucenegdbmsourceclass::get_metadata (const text_t &/*requestParams*/, const text_t &/*refParams*/,
159 bool getParents, const text_tset &fields,
160 const text_t &OID, MetadataInfo_tmap &metadata,
161 comerror_t &err, ostream &logout) {
162 outconvertclass text_t2ascii;
163
164 metadata.erase(metadata.begin(), metadata.end());
165
166 err = noError;
167 if (gdbmptr == NULL) {
168 // most likely a configuration problem
169 logout << text_t2ascii
170 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
171 err = configurationError;
172 return true;
173 }
174
175 // open the database
176 gdbmptr->setlogout(&logout);
177 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
178 // most likely a system problem (we have already checked that the
179 // gdbm database exists)
180 logout << text_t2ascii
181 << "system problem: open on gdbm database \""
182 << gdbm_filename << "\" failed\n\n";
183 err = systemProblem;
184 return true;
185 }
186
187 // get the metadata - if getParents is set we need to get
188 // info for all parents of OID as well as OID
189 vector<infodbclass> info_array;
190 text_tarray OIDs;
191 if (getParents) get_parents_array (OID, OIDs);
192 OIDs.push_back (OID);
193
194 text_tarray::const_iterator this_OID = OIDs.begin();
195 text_tarray::const_iterator end_OID = OIDs.end();
196
197 while (this_OID != end_OID) {
198 infodbclass info;
199 if (!gdbmptr->getinfo(*this_OID, info)) return false;
200
201 // adjust the metadata
202 text_t &contains = info["contains"];
203 if (contains.empty()) info["haschildren"] = 0;
204 else info["haschildren"] = 1;
205 contains.clear();
206
207 info_array.push_back(info);
208 this_OID ++;
209 }
210
211 // if fields set is empty we want to get all available metadata
212 text_tset tfields = fields;
213 if (tfields.empty() && !info_array.empty()) {
214 infodbclass::iterator t_info = info_array[0].begin();
215 infodbclass::iterator e_info = info_array[0].end();
216 while (t_info != e_info) {
217 if ((*t_info).first != "contains")
218 tfields.insert ((*t_info).first);
219 t_info ++;
220 }
221 tfields.insert ("hasnext");
222 tfields.insert ("hasprevious");
223 }
224
225 // collect together the metadata
226 bool donenextprevtest = false;
227 bool hasnext=false, hasprevious=false;
228 MetadataInfo_t this_metadata;
229 text_tarray *pos_metadata;
230 text_tset::const_iterator fields_here = tfields.begin();
231 text_tset::const_iterator fields_end = tfields.end();
232
233 while (fields_here != fields_end) {
234 this_metadata.clear();
235 this_metadata.isRef = false;
236
237 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
238 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
239 MetadataInfo_t *tmetaptr = &this_metadata;
240 while (this_info != end_info) {
241
242 pos_metadata = (*this_info).getmultinfo(*fields_here);
243 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
244
245 // collect metadata
246 if (!donenextprevtest) {
247 donenextprevtest = true;
248
249 // cache parent contents array
250 text_t thisparent = get_parent (OID);
251 if (!thisparent.empty()) {
252 if (thisparent != parentOID) {
253 parentOID = thisparent;
254 parentcontents.erase(parentcontents.begin(), parentcontents.end());
255 if (gdbmptr->getinfo(parentOID, parentinfo)) {
256 text_t &parentinfocontains = parentinfo["contains"];
257 if (!parentinfocontains.empty())
258 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
259 ';', parentcontents);
260 }
261 }
262
263 // do tests
264 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
265 text_tarray::const_iterator parentcontents_end = parentcontents.end();
266 text_t shrunk_OID = OID;
267 shrink_parent (shrunk_OID);
268 while (parentcontents_here != parentcontents_end) {
269 if (*parentcontents_here == shrunk_OID) {
270 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
271 else hasprevious = true;
272
273 parentcontents_here++;
274
275 if (parentcontents_here == parentcontents.end()) hasnext = false;
276 else hasnext = true;
277
278 break;
279 }
280
281 parentcontents_here ++;
282 }
283
284 // fill in metadata
285 if ((*fields_here == "hasnext" && hasnext) ||
286 (*fields_here == "hasprevious" && hasprevious))
287 tmetaptr->values.push_back("1");
288 else
289 tmetaptr->values.push_back("0");
290 } else
291 tmetaptr->values.push_back("0");
292 }
293 }
294 else if (pos_metadata != NULL && *fields_here != "contains") {
295 tmetaptr->values = *pos_metadata;
296 }
297 else
298 tmetaptr->values.push_back("");
299
300 this_info ++;
301 if (this_info != end_info) {
302 tmetaptr->parent = new MetadataInfo_t();
303 tmetaptr = tmetaptr->parent;
304 }
305 }
306 metadata[*fields_here] = this_metadata;
307 fields_here++;
308 }
309 return true;
310}
311
312
313bool lucenegdbmsourceclass::get_document (const text_t &OID, text_t &doc,
314 comerror_t &err, ostream &logout) {
315
316 outconvertclass text_t2ascii;
317 err = noError;
318 if (gdbmptr == NULL) {
319 // most likely a configuration problem
320 logout << text_t2ascii
321 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
322 err = configurationError;
323 return true;
324 }
325
326 // open the database
327 gdbmptr->setlogout(&logout);
328 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
329 // most likely a system problem (we have already checked that the
330 // gdbm database exists)
331 logout << text_t2ascii
332 << "system problem: open on gdbm database \""
333 << gdbm_filename << "\" failed\n\n";
334 err = systemProblem;
335 return true;
336 }
337
338 text_t tOID = OID;
339 if (needs_translating (OID))
340 translate_OID (OID, tOID, err, logout);
341 infodbclass info;
342 if (!gdbmptr->getinfo(tOID, info)) return false;
343
344 if (info["hastxt"].getint() == 0) { // there is no text for this section
345 return false; // true??
346 }
347 int docnum = info["docnum"].getint();
348
349 // get the parent id
350 text_t parent_OID;
351 get_top(tOID, parent_OID);
352
353 // locate the parent info ingdbm db
354 if (!gdbmptr->getinfo(parent_OID, info)) return false;
355
356 text_t archive_dir = info["assocfilepath"];
357 text_t full_path_to_doc = filename_cat(collectdir, "index", "text", archive_dir, "doc.xml");
358
359 doc.clear();
360 expat_document(full_path_to_doc, ((lucenesearchclass*)textsearchptr)->gdbm_level, text_t(docnum), doc);
361 return true;
362}
363
364bool lucenegdbmsourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout) {
365 err = noError;
366 issearchable = false;
367
368 text_tarray fromarray;
369 indexmap.getfromarray(fromarray);
370 if (fromarray.size() == 0) {
371 return true;
372 } else if (fromarray.size() == 1) {
373 if (fromarray[0] == "dummy:text") {
374 // always return true - issearchable is false here though
375 return true;
376 }
377 }
378 issearchable = true;
379 return true;
380}
Note: See TracBrowser for help on using the repository browser.