source: main/trunk/greenstone2/runtime-src/src/colservr/source.cpp@ 21772

Last change on this file since 21772 was 21772, checked in by ak19, 11 years ago

Removing the Doc and Sec opening and closing tags, since they interfere with the validity of the html pages output (greenstone3 also fixed).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.6 KB
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include <assert.h>
30
31
32sourceclass::sourceclass ()
33{
34 db_ptr = NULL;
35 textsearchptr = NULL;
36 classname = "source";
37}
38
39sourceclass::~sourceclass ()
40{
41 if (db_ptr != NULL) delete db_ptr;
42 if (textsearchptr != NULL) delete textsearchptr;
43}
44
45// configure should be called once for each configuration line
46void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
47{
48 if (cfgline.size() >= 1) {
49 const text_t &value = cfgline[0];
50
51 if (key == "collection") collection = value;
52 else if (key == "collectdir") collectdir = value;
53 else if (key == "gsdlhome") gsdlhome = value;
54 else if (key == "collecthome") collecthome = value;
55 else if (key == "gdbmhome") dbhome = value;
56 }
57
58 if (key == "indexmap") {
59 indexmap.importmap (cfgline);
60
61 } else if (key == "defaultindex") {
62 indexmap.from2to (cfgline[0], defaultindex);
63
64 } else if (key == "subcollectionmap") {
65 subcollectionmap.importmap (cfgline);
66
67 } else if (key == "defaultsubcollection") {
68 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
69
70 } else if (key == "languagemap") {
71 languagemap.importmap (cfgline);
72
73 } else if (key == "defaultlanguage") {
74 languagemap.from2to (cfgline[0], defaultlanguage);
75 } else if (key == "indexstem") {
76 indexstem = cfgline[0];
77 }
78}
79
80text_t sourceclass::resolve_db_filename(const text_t& idx,
81 const text_t& file_ext)
82{
83 // This is an exact copy of the method (of the same name) in filterclass
84 // Makes sense to merge them, in which either gsdlhome, collecthome,
85 // dbhome, and collection need to also be passed in as parameters,
86 // or else there is some notion of a shared base class that both
87 // filter and source inherit from
88
89 // NB: there is an even greater opportunity to share more code in this
90 // function if sql_db_ptr/db_ptr and db_filename and sql_db_filename
91 // are also drawn from one object
92
93 text_t resolved_filename;
94
95 if (gsdlhome==dbhome) {
96 // dbhome has defaulted to gsdlhome which we take to means the
97 // database has been specifically moved out of gsdlhome area.
98 // => it should be whereever collecthome is set to
99
100 resolved_filename = filename_cat(collecthome, collection, "index", "text", idx);
101 }
102 else {
103 // dbhome is explicitly set to something other than gsdlhome
104 // => use dbhome
105 resolved_filename = filename_cat(dbhome, "collect", collection, "index", "text", idx);
106 }
107
108 resolved_filename += file_ext;
109
110 return resolved_filename;
111}
112
113
114
115// init should be called after all the configuration is done but
116// before any other methods are called
117bool sourceclass::init (ostream &logout)
118{
119 outconvertclass text_t2ascii;
120
121 if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect");
122 if (dbhome.empty()) dbhome = gsdlhome;
123
124 if (defaultindex.empty()) {
125 // use first index in map as default if no default is set explicitly
126 text_tarray toarray;
127 indexmap.gettoarray(toarray);
128 if (toarray.size()) {
129 defaultindex = toarray[0];
130 }
131 }
132
133 if (defaultsubcollection.empty()) {
134 // use first subcollection in map as default if no default is set explicitly
135 text_tarray toarray;
136 subcollectionmap.gettoarray(toarray);
137 if (toarray.size()) {
138 defaultsubcollection = toarray[0];
139 }
140 }
141
142 if (defaultlanguage.empty()) {
143 // use first language in map as default if no default is set explicitly
144 text_tarray toarray;
145 languagemap.gettoarray(toarray);
146 if (toarray.size()) {
147 defaultlanguage = toarray[0];
148 }
149 }
150
151 // get the collection directory name
152 if (collectdir.empty()) {
153 collectdir = filename_cat (collecthome, collection);
154 }
155
156 if (db_ptr == NULL) {
157 // most likely a configuration problem
158 logout << text_t2ascii
159 << "configuration error: queryfilter contains a null dbclass\n\n";
160 return false;
161 }
162
163 // get the filename for the database and make sure it exists
164 if (indexstem.empty()) {
165 indexstem = collection;
166 }
167 db_filename = resolve_db_filename(indexstem, db_ptr->getfileextension());
168 if (!file_exists(db_filename)) {
169 logout << text_t2ascii
170 << "warning: database \"" << db_filename << "\" does not exist\n\n";
171 // return false;
172 }
173
174 return true;
175}
176
177
178// translate_OID translates OIDs using ".pr", ."fc" etc.
179bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
180 comerror_t &err, ostream &logout)
181{
182 outconvertclass text_t2ascii;
183
184 err = noError;
185 if (db_ptr == NULL) {
186 // most likely a configuration problem
187 logout << text_t2ascii
188 << "configuration error: " << classname << " contains a null dbclass\n\n";
189 err = configurationError;
190 return true;
191 }
192
193 // open the database
194 db_ptr->setlogout(&logout);
195 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
196 // most likely a system problem (we have already checked that the database exists)
197 logout << text_t2ascii
198 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
199 err = systemProblem;
200 return true;
201 }
202
203 infodbclass info;
204 OIDout = db_ptr->translate_OID (OIDin, info);
205 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
206 return true;
207}
208
209
210// get_metadata fills out the metadata if possible, if it is not responsible
211// for the given OID then it will return false.
212bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
213 bool getParents, const text_tset &fields,
214 const text_t &OID, MetadataInfo_tmap &metadata,
215 comerror_t &err, ostream &logout)
216{
217 outconvertclass text_t2ascii;
218
219 metadata.erase(metadata.begin(), metadata.end());
220
221 err = noError;
222 if (db_ptr == NULL) {
223 // most likely a configuration problem
224 logout << text_t2ascii
225 << "configuration error: " << classname <<" contains a null dbclass\n\n";
226 err = configurationError;
227 return true;
228 }
229
230 // open the database
231 db_ptr->setlogout(&logout);
232 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
233 // most likely a system problem (we have already checked that the database exists)
234 logout << text_t2ascii
235 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
236 err = systemProblem;
237 return true;
238 }
239
240 // get the metadata - if getParents is set we need to get
241 // info for all parents of OID as well as OID
242 vector<infodbclass> info_array;
243 text_tarray OIDs;
244 if (getParents) get_parents_array (OID, OIDs);
245 OIDs.push_back (OID);
246
247 text_tarray::const_iterator this_OID = OIDs.begin();
248 text_tarray::const_iterator end_OID = OIDs.end();
249
250 while (this_OID != end_OID) {
251 infodbclass info;
252 if (!db_ptr->getinfo(*this_OID, info)) return false;
253
254 // adjust the metadata
255 text_t &contains = info["contains"];
256 if (contains.empty()) info["haschildren"] = 0;
257 else info["haschildren"] = 1;
258 //contains.clear();
259
260 info_array.push_back(info);
261 ++this_OID;
262 }
263
264 // if fields set is empty we want to get all available metadata
265 text_tset tfields = fields;
266 if (tfields.empty() && !info_array.empty()) {
267 infodbclass::iterator t_info = info_array[0].begin();
268 infodbclass::iterator e_info = info_array[0].end();
269 while (t_info != e_info) {
270 if ((*t_info).first != "contains")
271 tfields.insert ((*t_info).first);
272 ++t_info;
273 }
274 tfields.insert ("hasnext");
275 tfields.insert ("hasprevious");
276 }
277
278 // collect together the metadata
279 bool donenextprevtest = false;
280 bool hasnext=false, hasprevious=false;
281 MetadataInfo_t this_metadata;
282 text_tarray *pos_metadata;
283 text_tset::const_iterator fields_here = tfields.begin();
284 text_tset::const_iterator fields_end = tfields.end();
285
286 while (fields_here != fields_end) {
287 this_metadata.clear();
288 this_metadata.isRef = false;
289
290 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
291 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
292 MetadataInfo_t *tmetaptr = &this_metadata;
293 while (this_info != end_info) {
294
295 pos_metadata = (*this_info).getmultinfo(*fields_here);
296 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
297
298 // collect metadata
299 if (!donenextprevtest) {
300 donenextprevtest = true;
301
302 // cache parent contents array
303 text_t thisparent = get_parent (OID);
304 if (!thisparent.empty()) {
305 if (thisparent != parentOID) {
306 parentOID = thisparent;
307 parentcontents.erase(parentcontents.begin(), parentcontents.end());
308 if (db_ptr->getinfo(parentOID, parentinfo)) {
309 text_t &parentinfocontains = parentinfo["contains"];
310 if (!parentinfocontains.empty())
311 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
312 ';', parentcontents);
313 }
314 }
315
316 // do tests
317 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
318 text_tarray::const_iterator parentcontents_end = parentcontents.end();
319 text_t shrunk_OID = OID;
320 shrink_parent (shrunk_OID);
321 while (parentcontents_here != parentcontents_end) {
322 if (*parentcontents_here == shrunk_OID) {
323 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
324 else hasprevious = true;
325
326 ++parentcontents_here;
327
328 if (parentcontents_here == parentcontents.end()) hasnext = false;
329 else hasnext = true;
330
331 break;
332 }
333
334 ++parentcontents_here;
335 }
336
337 // fill in metadata
338 if ((*fields_here == "hasnext" && hasnext) ||
339 (*fields_here == "hasprevious" && hasprevious))
340 tmetaptr->values.push_back("1");
341 else
342 tmetaptr->values.push_back("0");
343 } else
344 tmetaptr->values.push_back("0");
345 }
346 }
347 //else if (pos_metadata != NULL && *fields_here != "contains") {
348 else if (pos_metadata != NULL) {
349 tmetaptr->values = *pos_metadata;
350 }
351 else
352 tmetaptr->values.push_back("");
353
354 ++this_info;
355 if (this_info != end_info) {
356 tmetaptr->parent = new MetadataInfo_t();
357 tmetaptr = tmetaptr->parent;
358 }
359 }
360 metadata[*fields_here] = this_metadata;
361 ++fields_here;
362 }
363
364 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
365 return true;
366}
367
368bool sourceclass::get_document (const text_t &OID, text_t &doc,
369 comerror_t &err, ostream &logout)
370{
371 outconvertclass text_t2ascii;
372
373 err = noError;
374 if (db_ptr == NULL) {
375 // most likely a configuration problem
376 logout << text_t2ascii
377 << "configuration error: " << classname << " contains a null dbclass\n\n";
378 err = configurationError;
379 return true;
380 }
381
382 // open the database
383 db_ptr->setlogout(&logout);
384 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
385 // most likely a system problem (we have already checked that the database exists)
386 logout << text_t2ascii
387 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
388 err = systemProblem;
389 return true;
390 }
391
392 text_t tOID = OID;
393 if (needs_translating (OID))
394 translate_OID (OID, tOID, err, logout);
395 infodbclass info;
396 if (!db_ptr->getinfo(tOID, info)) {
397 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
398 return false;
399 }
400
401 if (info["hastxt"].getint() == 1) {
402 int docnum = info["docnum"].getint();
403
404 // set the collection directory
405 textsearchptr->setcollectdir (collectdir);
406
407 // get the text
408 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
409 defaultlanguage, collection, docnum, doc);
410
411 // remove the <Doc></Doc> and <Sec></Sec> tags
412 doc.replace("<Doc>", "");
413 doc.replace("</Doc>", "");
414 doc.replace("<Sec>", "");
415 doc.replace("</Sec>", "");
416 }
417
418 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
419 return true;
420}
421
422bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
423{
424 err = noError;
425 issearchable = false;
426
427 text_tarray fromarray;
428 indexmap.getfromarray(fromarray);
429 if (fromarray.size() == 0) {
430 return true;
431 } else if (fromarray.size() == 1) {
432 if (fromarray[0] == "dummy:text") {
433 // always return true - issearchable is false here though
434 return true;
435 }
436 }
437 issearchable = true;
438 return true;
439}
440
441
442bool operator==(const sourceptr &x, const sourceptr &y) {
443 return (x.s == y.s);
444}
445
446bool operator<(const sourceptr &x, const sourceptr &y) {
447 return (x.s < y.s);
448}
449
450
451// thesource remains the property of the calling code but
452// should not be deleted until it is removed from this list.
453void sourcelistclass::addsource (sourceclass *thesource) {
454 // can't add a source that doesn't exist
455 assert (thesource != NULL);
456 if (thesource == NULL) return;
457
458 sourceptr sp;
459 sp.s = thesource;
460
461 sourceptrs.push_back(sp);
462}
Note: See TracBrowser for help on using the repository browser.