source: trunk/gsdl/src/colservr/lucenegdbmsource.cpp@ 8027

Last change on this file since 8027 was 8027, checked in by davidb, 20 years ago

Introduction of lucene*.cpp,h classes to support searching with
this Java based indexing tool.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.0 KB
Line 
1/**********************************************************************
2 *
3 * lucenegdbmsource.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "lucenegdbmsource.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include "gsdltools.h"
30
31
32lucenegdbmsourceclass::lucenegdbmsourceclass () {
33 gdbmptr = NULL;
34 textsearchptr = NULL;
35}
36
37lucenegdbmsourceclass::~lucenegdbmsourceclass () {
38 if (gdbmptr != NULL) delete gdbmptr;
39 if (textsearchptr != NULL) delete textsearchptr;
40}
41
42void lucenegdbmsourceclass::configure (const text_t &key, const text_tarray &cfgline) {
43 if (cfgline.size() >= 1) {
44 const text_t &value = cfgline[0];
45
46 if (key == "collection") collection = value;
47 else if (key == "collectdir") collectdir = value;
48 else if (key == "gsdlhome") gsdlhome = value;
49 else if (key == "gdbmhome") gdbmhome = value;
50 }
51
52 if (key == "indexmap") {
53 indexmap.importmap (cfgline);
54
55 } else if (key == "defaultindex") {
56 indexmap.from2to (cfgline[0], defaultindex);
57
58 } else if (key == "subcollectionmap") {
59 subcollectionmap.importmap (cfgline);
60
61 } else if (key == "defaultsubcollection") {
62 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
63
64 } else if (key == "languagemap") {
65 languagemap.importmap (cfgline);
66
67 } else if (key == "defaultlanguage")
68 languagemap.from2to (cfgline[0], defaultlanguage);
69}
70
71bool lucenegdbmsourceclass::init (ostream &logout) {
72 outconvertclass text_t2ascii;
73
74 if (gdbmhome.empty()) gdbmhome = gsdlhome;
75
76 if (!sourceclass::init (logout)) return false;
77
78 if (defaultindex.empty()) {
79 // use first index in map as default if no default is set explicitly
80 text_tarray toarray;
81 indexmap.gettoarray(toarray);
82 if (toarray.size()) {
83 defaultindex = toarray[0];
84 }
85 }
86
87 if (defaultsubcollection.empty()) {
88 // use first subcollection in map as default if no default is set explicitly
89 text_tarray toarray;
90 subcollectionmap.gettoarray(toarray);
91 if (toarray.size()) {
92 defaultsubcollection = toarray[0];
93 }
94 }
95
96 if (defaultlanguage.empty()) {
97 // use first language in map as default if no default is set explicitly
98 text_tarray toarray;
99 languagemap.gettoarray(toarray);
100 if (toarray.size()) {
101 defaultlanguage = toarray[0];
102 }
103 }
104
105 // get the collection directory name
106 if (collectdir.empty()) {
107 collectdir = filename_cat (gsdlhome, "collect", collection);
108 }
109
110 // get the filename for the database and make sure it exists
111 gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", collection);
112 if (littleEndian()) gdbm_filename += ".ldb";
113 else gdbm_filename += ".bdb";
114
115 if (!file_exists(gdbm_filename)) {
116 logout << text_t2ascii
117 << "warning: **** gdbm database \"" //****
118 << gdbm_filename << "\" does not exist\n\n";
119 // return false; //****
120 }
121
122 return true;
123}
124
125bool lucenegdbmsourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
126 comerror_t &err, ostream &logout) {
127
128 outconvertclass text_t2ascii;
129
130 err = noError;
131 if (gdbmptr == NULL) {
132 // most likely a configuration problem
133 logout << text_t2ascii
134 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
135 err = configurationError;
136 return true;
137 }
138
139 // open the database
140 gdbmptr->setlogout(&logout);
141 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
142 // most likely a system problem (we have already checked that the
143 // gdbm database exists)
144 logout << text_t2ascii
145 << "system problem: open on gdbm database \""
146 << gdbm_filename << "\" failed\n\n";
147 err = systemProblem;
148 return true;
149 }
150
151 infodbclass info;
152 OIDout = gdbmptr->translate_OID (OIDin, info);
153 return true;
154}
155
156bool lucenegdbmsourceclass::get_metadata (const text_t &/*requestParams*/, const text_t &/*refParams*/,
157 bool getParents, const text_tset &fields,
158 const text_t &OID, MetadataInfo_tmap &metadata,
159 comerror_t &err, ostream &logout) {
160 outconvertclass text_t2ascii;
161
162 metadata.erase(metadata.begin(), metadata.end());
163
164 err = noError;
165 if (gdbmptr == NULL) {
166 // most likely a configuration problem
167 logout << text_t2ascii
168 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
169 err = configurationError;
170 return true;
171 }
172
173 // open the database
174 gdbmptr->setlogout(&logout);
175 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
176 // most likely a system problem (we have already checked that the
177 // gdbm database exists)
178 logout << text_t2ascii
179 << "system problem: open on gdbm database \""
180 << gdbm_filename << "\" failed\n\n";
181 err = systemProblem;
182 return true;
183 }
184
185 // get the metadata - if getParents is set we need to get
186 // info for all parents of OID as well as OID
187 vector<infodbclass> info_array;
188 text_tarray OIDs;
189 if (getParents) get_parents_array (OID, OIDs);
190 OIDs.push_back (OID);
191
192 text_tarray::const_iterator this_OID = OIDs.begin();
193 text_tarray::const_iterator end_OID = OIDs.end();
194
195 while (this_OID != end_OID) {
196 infodbclass info;
197 if (!gdbmptr->getinfo(*this_OID, info)) return false;
198
199 // adjust the metadata
200 text_t &contains = info["contains"];
201 if (contains.empty()) info["haschildren"] = 0;
202 else info["haschildren"] = 1;
203 contains.clear();
204
205 info_array.push_back(info);
206 this_OID ++;
207 }
208
209 // if fields set is empty we want to get all available metadata
210 text_tset tfields = fields;
211 if (tfields.empty() && !info_array.empty()) {
212 infodbclass::iterator t_info = info_array[0].begin();
213 infodbclass::iterator e_info = info_array[0].end();
214 while (t_info != e_info) {
215 if ((*t_info).first != "contains")
216 tfields.insert ((*t_info).first);
217 t_info ++;
218 }
219 tfields.insert ("hasnext");
220 tfields.insert ("hasprevious");
221 }
222
223 // collect together the metadata
224 bool donenextprevtest = false;
225 bool hasnext=false, hasprevious=false;
226 MetadataInfo_t this_metadata;
227 text_tarray *pos_metadata;
228 text_tset::const_iterator fields_here = tfields.begin();
229 text_tset::const_iterator fields_end = tfields.end();
230
231 while (fields_here != fields_end) {
232 this_metadata.clear();
233 this_metadata.isRef = false;
234
235 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
236 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
237 MetadataInfo_t *tmetaptr = &this_metadata;
238 while (this_info != end_info) {
239
240 pos_metadata = (*this_info).getmultinfo(*fields_here);
241 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
242
243 // collect metadata
244 if (!donenextprevtest) {
245 donenextprevtest = true;
246
247 // cache parent contents array
248 text_t thisparent = get_parent (OID);
249 if (!thisparent.empty()) {
250 if (thisparent != parentOID) {
251 parentOID = thisparent;
252 parentcontents.erase(parentcontents.begin(), parentcontents.end());
253 if (gdbmptr->getinfo(parentOID, parentinfo)) {
254 text_t &parentinfocontains = parentinfo["contains"];
255 if (!parentinfocontains.empty())
256 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
257 ';', parentcontents);
258 }
259 }
260
261 // do tests
262 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
263 text_tarray::const_iterator parentcontents_end = parentcontents.end();
264 text_t shrunk_OID = OID;
265 shrink_parent (shrunk_OID);
266 while (parentcontents_here != parentcontents_end) {
267 if (*parentcontents_here == shrunk_OID) {
268 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
269 else hasprevious = true;
270
271 parentcontents_here++;
272
273 if (parentcontents_here == parentcontents.end()) hasnext = false;
274 else hasnext = true;
275
276 break;
277 }
278
279 parentcontents_here ++;
280 }
281
282 // fill in metadata
283 if ((*fields_here == "hasnext" && hasnext) ||
284 (*fields_here == "hasprevious" && hasprevious))
285 tmetaptr->values.push_back("1");
286 else
287 tmetaptr->values.push_back("0");
288 } else
289 tmetaptr->values.push_back("0");
290 }
291 }
292 else if (pos_metadata != NULL && *fields_here != "contains") {
293 tmetaptr->values = *pos_metadata;
294 }
295 else
296 tmetaptr->values.push_back("");
297
298 this_info ++;
299 if (this_info != end_info) {
300 tmetaptr->parent = new MetadataInfo_t();
301 tmetaptr = tmetaptr->parent;
302 }
303 }
304 metadata[*fields_here] = this_metadata;
305 fields_here++;
306 }
307 return true;
308}
309
310
311bool lucenegdbmsourceclass::get_document (const text_t &OID, text_t &doc,
312 comerror_t &err, ostream &logout) {
313
314 outconvertclass text_t2ascii;
315
316 err = noError;
317 if (gdbmptr == NULL) {
318 // most likely a configuration problem
319 logout << text_t2ascii
320 << "configuration error: lucenegdbmsource contains a null gdbmclass\n\n";
321 err = configurationError;
322 return true;
323 }
324
325 // open the database
326 gdbmptr->setlogout(&logout);
327 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
328 // most likely a system problem (we have already checked that the
329 // gdbm database exists)
330 logout << text_t2ascii
331 << "system problem: open on gdbm database \""
332 << gdbm_filename << "\" failed\n\n";
333 err = systemProblem;
334 return true;
335 }
336
337 text_t tOID = OID;
338 if (needs_translating (OID))
339 translate_OID (OID, tOID, err, logout);
340 infodbclass info;
341 if (!gdbmptr->getinfo(tOID, info)) return false;
342
343 if (info["hastxt"].getint() == 1) {
344 int docnum = info["docnum"].getint();
345
346 // set the collection directory
347 textsearchptr->setcollectdir (collectdir);
348
349 // get the text
350 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
351 defaultlanguage, collection, docnum, doc);
352 }
353 return true;
354}
355
356bool lucenegdbmsourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout) {
357 err = noError;
358 issearchable = false;
359
360 text_tarray fromarray;
361 indexmap.getfromarray(fromarray);
362 if (fromarray.size() == 0) {
363 return true;
364 } else if (fromarray.size() == 1) {
365 if (fromarray[0] == "dummy:text") {
366 // always return true - issearchable is false here though
367 return true;
368 }
369 }
370 issearchable = true;
371 return true;
372}
Note: See TracBrowser for help on using the repository browser.