source: trunk/gsdl/src/colservr/gdbmsource.cpp@ 10021

Last change on this file since 10021 was 9937, checked in by kjdon, 19 years ago

modified the filters/sources etc so that if an indexstem is specified in the build.cfg file, then this will be used as the root of the index/gdbm filenames instead of the collection name. colleciton name still used by default. this means that we can rename a coll directory without rebuilding.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/**********************************************************************
2 *
3 * gdbmsource.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gdbmsource.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include "gsdltools.h"
30
31
32gdbmsourceclass::gdbmsourceclass () {
33 gdbmptr = NULL;
34 textsearchptr = NULL;
35 classname = "gdbmsource";
36}
37
38gdbmsourceclass::~gdbmsourceclass () {
39 if (gdbmptr != NULL) delete gdbmptr;
40 if (textsearchptr != NULL) delete textsearchptr;
41}
42
43void gdbmsourceclass::configure (const text_t &key, const text_tarray &cfgline) {
44 if (cfgline.size() >= 1) {
45 const text_t &value = cfgline[0];
46
47 if (key == "collection") collection = value;
48 else if (key == "collectdir") collectdir = value;
49 else if (key == "gsdlhome") gsdlhome = value;
50 else if (key == "gdbmhome") gdbmhome = value;
51 }
52
53 if (key == "indexmap") {
54 indexmap.importmap (cfgline);
55
56 } else if (key == "defaultindex") {
57 indexmap.from2to (cfgline[0], defaultindex);
58
59 } else if (key == "subcollectionmap") {
60 subcollectionmap.importmap (cfgline);
61
62 } else if (key == "defaultsubcollection") {
63 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
64
65 } else if (key == "languagemap") {
66 languagemap.importmap (cfgline);
67
68 } else if (key == "defaultlanguage") {
69 languagemap.from2to (cfgline[0], defaultlanguage);
70 } else if (key == "indexstem") {
71 indexstem = cfgline[0];
72 }
73
74}
75
76bool gdbmsourceclass::init (ostream &logout) {
77 outconvertclass text_t2ascii;
78
79 if (gdbmhome.empty()) gdbmhome = gsdlhome;
80
81 if (!sourceclass::init (logout)) return false;
82
83 if (defaultindex.empty()) {
84 // use first index in map as default if no default is set explicitly
85 text_tarray toarray;
86 indexmap.gettoarray(toarray);
87 if (toarray.size()) {
88 defaultindex = toarray[0];
89 }
90 }
91
92 if (defaultsubcollection.empty()) {
93 // use first subcollection in map as default if no default is set explicitly
94 text_tarray toarray;
95 subcollectionmap.gettoarray(toarray);
96 if (toarray.size()) {
97 defaultsubcollection = toarray[0];
98 }
99 }
100
101 if (defaultlanguage.empty()) {
102 // use first language in map as default if no default is set explicitly
103 text_tarray toarray;
104 languagemap.gettoarray(toarray);
105 if (toarray.size()) {
106 defaultlanguage = toarray[0];
107 }
108 }
109
110 // get the collection directory name
111 if (collectdir.empty()) {
112 collectdir = filename_cat (gsdlhome, "collect", collection);
113 }
114
115 // get the filename for the database and make sure it exists
116 if (indexstem.empty()) {
117 indexstem = collection;
118 }
119 gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", indexstem);
120 if (littleEndian()) gdbm_filename += ".ldb";
121 else gdbm_filename += ".bdb";
122
123 if (!file_exists(gdbm_filename)) {
124 logout << text_t2ascii
125 << "warning: gdbm database \"" //****
126 << gdbm_filename << "\" does not exist\n\n";
127 // return false; //****
128 }
129
130 return true;
131}
132
133bool gdbmsourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
134 comerror_t &err, ostream &logout) {
135
136 outconvertclass text_t2ascii;
137
138 err = noError;
139 if (gdbmptr == NULL) {
140 // most likely a configuration problem
141 logout << text_t2ascii
142 << "configuration error: " << classname << " contains a null gdbmclass\n\n";
143 err = configurationError;
144 return true;
145 }
146
147 // open the database
148 gdbmptr->setlogout(&logout);
149 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
150 // most likely a system problem (we have already checked that the
151 // gdbm database exists)
152 logout << text_t2ascii
153 << "system problem: open on gdbm database \""
154 << gdbm_filename << "\" failed\n\n";
155 err = systemProblem;
156 return true;
157 }
158
159 infodbclass info;
160 OIDout = gdbmptr->translate_OID (OIDin, info);
161 return true;
162}
163
164bool gdbmsourceclass::get_metadata (const text_t &/*requestParams*/, const text_t &/*refParams*/,
165 bool getParents, const text_tset &fields,
166 const text_t &OID, MetadataInfo_tmap &metadata,
167 comerror_t &err, ostream &logout) {
168 outconvertclass text_t2ascii;
169
170 metadata.erase(metadata.begin(), metadata.end());
171
172 err = noError;
173 if (gdbmptr == NULL) {
174 // most likely a configuration problem
175 logout << text_t2ascii
176 << "configuration error: " << classname <<" contains a null gdbmclass\n\n";
177 err = configurationError;
178 return true;
179 }
180
181 // open the database
182 gdbmptr->setlogout(&logout);
183 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
184 // most likely a system problem (we have already checked that the
185 // gdbm database exists)
186 logout << text_t2ascii
187 << "system problem: open on gdbm database \""
188 << gdbm_filename << "\" failed\n\n";
189 err = systemProblem;
190 return true;
191 }
192
193 // get the metadata - if getParents is set we need to get
194 // info for all parents of OID as well as OID
195 vector<infodbclass> info_array;
196 text_tarray OIDs;
197 if (getParents) get_parents_array (OID, OIDs);
198 OIDs.push_back (OID);
199
200 text_tarray::const_iterator this_OID = OIDs.begin();
201 text_tarray::const_iterator end_OID = OIDs.end();
202
203 while (this_OID != end_OID) {
204 infodbclass info;
205 if (!gdbmptr->getinfo(*this_OID, info)) return false;
206
207 // adjust the metadata
208 text_t &contains = info["contains"];
209 if (contains.empty()) info["haschildren"] = 0;
210 else info["haschildren"] = 1;
211 contains.clear();
212
213 info_array.push_back(info);
214 ++this_OID;
215 }
216
217 // if fields set is empty we want to get all available metadata
218 text_tset tfields = fields;
219 if (tfields.empty() && !info_array.empty()) {
220 infodbclass::iterator t_info = info_array[0].begin();
221 infodbclass::iterator e_info = info_array[0].end();
222 while (t_info != e_info) {
223 if ((*t_info).first != "contains")
224 tfields.insert ((*t_info).first);
225 ++t_info;
226 }
227 tfields.insert ("hasnext");
228 tfields.insert ("hasprevious");
229 }
230
231 // collect together the metadata
232 bool donenextprevtest = false;
233 bool hasnext=false, hasprevious=false;
234 MetadataInfo_t this_metadata;
235 text_tarray *pos_metadata;
236 text_tset::const_iterator fields_here = tfields.begin();
237 text_tset::const_iterator fields_end = tfields.end();
238
239 while (fields_here != fields_end) {
240 this_metadata.clear();
241 this_metadata.isRef = false;
242
243 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
244 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
245 MetadataInfo_t *tmetaptr = &this_metadata;
246 while (this_info != end_info) {
247
248 pos_metadata = (*this_info).getmultinfo(*fields_here);
249 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
250
251 // collect metadata
252 if (!donenextprevtest) {
253 donenextprevtest = true;
254
255 // cache parent contents array
256 text_t thisparent = get_parent (OID);
257 if (!thisparent.empty()) {
258 if (thisparent != parentOID) {
259 parentOID = thisparent;
260 parentcontents.erase(parentcontents.begin(), parentcontents.end());
261 if (gdbmptr->getinfo(parentOID, parentinfo)) {
262 text_t &parentinfocontains = parentinfo["contains"];
263 if (!parentinfocontains.empty())
264 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
265 ';', parentcontents);
266 }
267 }
268
269 // do tests
270 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
271 text_tarray::const_iterator parentcontents_end = parentcontents.end();
272 text_t shrunk_OID = OID;
273 shrink_parent (shrunk_OID);
274 while (parentcontents_here != parentcontents_end) {
275 if (*parentcontents_here == shrunk_OID) {
276 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
277 else hasprevious = true;
278
279 ++parentcontents_here;
280
281 if (parentcontents_here == parentcontents.end()) hasnext = false;
282 else hasnext = true;
283
284 break;
285 }
286
287 ++parentcontents_here;
288 }
289
290 // fill in metadata
291 if ((*fields_here == "hasnext" && hasnext) ||
292 (*fields_here == "hasprevious" && hasprevious))
293 tmetaptr->values.push_back("1");
294 else
295 tmetaptr->values.push_back("0");
296 } else
297 tmetaptr->values.push_back("0");
298 }
299 }
300 else if (pos_metadata != NULL && *fields_here != "contains") {
301 tmetaptr->values = *pos_metadata;
302 }
303 else
304 tmetaptr->values.push_back("");
305
306 ++this_info;
307 if (this_info != end_info) {
308 tmetaptr->parent = new MetadataInfo_t();
309 tmetaptr = tmetaptr->parent;
310 }
311 }
312 metadata[*fields_here] = this_metadata;
313 ++fields_here;
314 }
315 return true;
316}
317
318
319bool gdbmsourceclass::get_document (const text_t &OID, text_t &doc,
320 comerror_t &err, ostream &logout) {
321
322 outconvertclass text_t2ascii;
323
324 err = noError;
325 if (gdbmptr == NULL) {
326 // most likely a configuration problem
327 logout << text_t2ascii
328 << "configuration error: " << classname << " contains a null gdbmclass\n\n";
329 err = configurationError;
330 return true;
331 }
332
333 // open the database
334 gdbmptr->setlogout(&logout);
335 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
336 // most likely a system problem (we have already checked that the
337 // gdbm database exists)
338 logout << text_t2ascii
339 << "system problem: open on gdbm database \""
340 << gdbm_filename << "\" failed\n\n";
341 err = systemProblem;
342 return true;
343 }
344
345 text_t tOID = OID;
346 if (needs_translating (OID))
347 translate_OID (OID, tOID, err, logout);
348 infodbclass info;
349 if (!gdbmptr->getinfo(tOID, info)) return false;
350
351 if (info["hastxt"].getint() == 1) {
352 int docnum = info["docnum"].getint();
353
354 // set the collection directory
355 textsearchptr->setcollectdir (collectdir);
356
357 // get the text
358 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
359 defaultlanguage, collection, docnum, doc);
360 }
361 return true;
362}
363
364bool gdbmsourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout) {
365 err = noError;
366 issearchable = false;
367
368 text_tarray fromarray;
369 indexmap.getfromarray(fromarray);
370 if (fromarray.size() == 0) {
371 return true;
372 } else if (fromarray.size() == 1) {
373 if (fromarray[0] == "dummy:text") {
374 // always return true - issearchable is false here though
375 return true;
376 }
377 }
378 issearchable = true;
379 return true;
380}
Note: See TracBrowser for help on using the repository browser.