source: main/trunk/greenstone2/runtime-src/src/colservr/source.cpp@ 31389

Last change on this file since 31389 was 31389, checked in by ak19, 7 years ago

Oversight. Forgot the key statement when optimising the GS2 server-side OAI deletion policy code (to prevent it from getting all the metadata for a document ID that's been marked as deleted).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "colservertools.h"
28#include "fileutil.h"
29#include "OIDtools.h"
30#include <assert.h>
31
32
33sourceclass::sourceclass ()
34{
35 db_ptr = NULL;
36 oaidb_ptr = NULL;
37 textsearchptr = NULL;
38 classname = "source";
39}
40
41sourceclass::~sourceclass ()
42{
43 if (db_ptr != NULL) delete db_ptr;
44 if (textsearchptr != NULL) delete textsearchptr;
45
46 // http://stackoverflow.com/questions/677620/do-i-need-to-explicitly-call-the-base-virtual-destructor
47 // Answer: "No, destructors are called automatically [by the derived destructor] in the reverse
48 // order of construction. (Base classes last). Do not call base class destructors."
49 // The link also explains why the destructors should be declared as virtual in the header files.
50
51 // now delete the oaidb object pointed to by the oaidb_ptr too
52 if (oaidb_ptr != NULL) delete oaidb_ptr;
53 oaidb_ptr = NULL;
54}
55
56// configure should be called once for each configuration line
57void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
58{
59 if (cfgline.size() >= 1) {
60 const text_t &value = cfgline[0];
61
62 if (key == "collection") collection = value;
63 else if (key == "collectdir") collectdir = value;
64 else if (key == "gsdlhome") gsdlhome = value;
65 else if (key == "collecthome") collecthome = value;
66 else if (key == "gdbmhome") dbhome = value;
67 }
68
69 if (key == "indexmap") {
70 indexmap.importmap (cfgline);
71
72 } else if (key == "defaultindex") {
73 indexmap.from2to (cfgline[0], defaultindex);
74
75 } else if (key == "subcollectionmap") {
76 subcollectionmap.importmap (cfgline);
77
78 } else if (key == "defaultsubcollection") {
79 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
80
81 } else if (key == "languagemap") {
82 languagemap.importmap (cfgline);
83
84 } else if (key == "defaultlanguage") {
85 languagemap.from2to (cfgline[0], defaultlanguage);
86 } else if (key == "indexstem") {
87 indexstem = cfgline[0];
88 }
89}
90
91
92// init should be called after all the configuration is done but
93// before any other methods are called
94bool sourceclass::init (ostream &logout)
95{
96 outconvertclass text_t2ascii;
97
98 if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect");
99 if (dbhome.empty()) dbhome = gsdlhome;
100
101 if (defaultindex.empty()) {
102 // use first index in map as default if no default is set explicitly
103 text_tarray toarray;
104 indexmap.gettoarray(toarray);
105 if (toarray.size()) {
106 defaultindex = toarray[0];
107 }
108 }
109
110 if (defaultsubcollection.empty()) {
111 // use first subcollection in map as default if no default is set explicitly
112 text_tarray toarray;
113 subcollectionmap.gettoarray(toarray);
114 if (toarray.size()) {
115 defaultsubcollection = toarray[0];
116 }
117 }
118
119 if (defaultlanguage.empty()) {
120 // use first language in map as default if no default is set explicitly
121 text_tarray toarray;
122 languagemap.gettoarray(toarray);
123 if (toarray.size()) {
124 defaultlanguage = toarray[0];
125 }
126 }
127
128 // get the collection directory name
129 if (collectdir.empty()) {
130 collectdir = filename_cat (collecthome, collection);
131 }
132
133 if (db_ptr == NULL) {
134 // most likely a configuration problem
135 logout << text_t2ascii
136 << "configuration error: queryfilter contains a null dbclass\n\n";
137 return false;
138 }
139
140 // get the filename for the database and make sure it exists
141 if (indexstem.empty()) {
142 indexstem = collection;
143 }
144 db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
145 indexstem, db_ptr->getfileextension());
146 if (!file_exists(db_filename)) {
147 logout << text_t2ascii
148 << "warning: database \"" << db_filename << "\" does not exist\n\n";
149 // return false;
150 }
151
152 oaidb_filename = resolve_oaidb_filename(gsdlhome, dbhome, collecthome, collection,
153 oaidb_ptr->getfileextension());
154
155 return true;
156}
157
158
159// translate_OID translates OIDs using ".pr", ."fc" etc.
160bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
161 comerror_t &err, ostream &logout)
162{
163 outconvertclass text_t2ascii;
164
165 err = noError;
166 if (db_ptr == NULL) {
167 // most likely a configuration problem
168 logout << text_t2ascii
169 << "configuration error: " << classname << " contains a null dbclass\n\n";
170 err = configurationError;
171 return true;
172 }
173
174 // open the database
175 db_ptr->setlogout(&logout);
176 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
177 // most likely a system problem (we have already checked that the database exists)
178 logout << text_t2ascii
179 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
180 err = systemProblem;
181 return true;
182 }
183
184 infodbclass info;
185 OIDout = db_ptr->translate_OID (OIDin, info);
186 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
187 return true;
188}
189
190// get_oai_metadata fills out the oai metadata from oaidb if possible
191// if it is not responsible for the given OID then it will return false.
192bool sourceclass::get_oai_metadata (const text_t &requestParams, const text_t &refParams,
193 bool getParents, const text_tset &fields,
194 const text_t &OID, text_t &deleted_status, MetadataInfo_tmap &metadata,
195 comerror_t &err, ostream &logout)
196{
197 outconvertclass text_t2ascii;
198 metadata.erase(metadata.begin(), metadata.end());
199 err = noError;
200
201 //logout << text_t2ascii << "sourceclass::get_oai_meta for OID: " << OID << "\n";
202
203// ONLY if we're doing any OAI stuff (FROAI will be set then) will we even be in here
204// So next try to open the oai-inf db if it exists for this collection
205 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
206
207 logout << text_t2ascii
208 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
209 return false;
210
211 } else { // let's try opening the oaidb file
212 oaidb_ptr->setlogout(&logout);
213 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
214 // most likely a system problem (we have already checked that the database exists just above)
215 logout << text_t2ascii
216 << "system problem: open on database \""
217 << oaidb_filename << "\" failed\n\n";
218 err = systemProblem;
219 return false;
220 } // now we've opened the oai-inf db file successfully
221 }
222
223 // get the metadata (deleted status, timestamp, datestamp) for OID, and pushback onto the metadata array
224 infodbclass oai_info;
225 if (!oaidb_ptr->getinfo(OID, oai_info)) { // OID not found in oai_inf db
226 logout << text_t2ascii
227 << "warning: lookup for OID \"" << OID
228 << "\" in etc/oai-inf db failed when getting metadata (in sourceclass source.cpp).\n\n";
229
230 return false;
231 } else {
232 //logout << text_t2ascii << "@@@@ getting metadata for OID \"" << OID << "\" from etc/oai-inf db.\n\n";
233 //logout << text_t2ascii << "@@@@ timestamp: " << oai_info["timestamp"] << "\n\n";
234
235 metadata["oaiinf.status"].isRef = false;
236 metadata["oaiinf.status"].values.push_back(oai_info["status"]);
237 metadata["oaiinf.timestamp"].isRef = false;
238 metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
239 metadata["oaiinf.datestamp"].isRef = false;
240 metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
241
242 deleted_status = oai_info["status"];
243 }
244
245 // we're done with oai-inf db
246
247 oaidb_ptr->closedatabase(); // don't leave files open
248
249 return true;
250}
251
252
253// get_metadata fills out the metadata if possible, if it is not responsible
254// for the given OID then it will return false.
255bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
256 bool getParents, const text_tset &fields,
257 const text_t &OID, MetadataInfo_tmap &metadata,
258 comerror_t &err, ostream &logout, bool append)
259{
260 outconvertclass text_t2ascii;
261
262 if(!append) {
263 metadata.erase(metadata.begin(), metadata.end());
264 }
265
266 err = noError;
267 if (db_ptr == NULL) {
268 // most likely a configuration problem
269 logout << text_t2ascii
270 << "configuration error: " << classname <<" contains a null dbclass\n\n";
271 err = configurationError;
272 return true;
273 }
274
275 // open the database
276 db_ptr->setlogout(&logout);
277 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
278 // most likely a system problem (we have already checked that the database exists)
279 logout << text_t2ascii
280 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
281 err = systemProblem;
282 return true;
283 }
284
285 // get the metadata - if getParents is set we need to get
286 // info for all parents of OID as well as OID
287 vector<infodbclass> info_array;
288 text_tarray OIDs;
289 if (getParents) get_parents_array (OID, OIDs);
290 OIDs.push_back (OID);
291
292 text_tarray::const_iterator this_OID = OIDs.begin();
293 text_tarray::const_iterator end_OID = OIDs.end();
294
295 while (this_OID != end_OID) {
296 infodbclass info;
297 if (!db_ptr->getinfo(*this_OID, info)) return false;
298
299 // adjust the metadata
300 text_t &contains = info["contains"];
301 if (contains.empty()) info["haschildren"] = 0;
302 else info["haschildren"] = 1;
303 //contains.clear();
304
305 info_array.push_back(info);
306 ++this_OID;
307 }
308
309 // if fields set is empty we want to get all available metadata
310 text_tset tfields = fields;
311 if (tfields.empty() && !info_array.empty()) {
312 infodbclass::iterator t_info = info_array[0].begin();
313 infodbclass::iterator e_info = info_array[0].end();
314 while (t_info != e_info) {
315 if ((*t_info).first != "contains")
316 tfields.insert ((*t_info).first);
317 ++t_info;
318 }
319 tfields.insert ("hasnext");
320 tfields.insert ("hasprevious");
321 }
322
323 // collect together the metadata
324 bool donenextprevtest = false;
325 bool hasnext=false, hasprevious=false;
326 MetadataInfo_t this_metadata;
327 text_tarray *pos_metadata;
328 text_tset::const_iterator fields_here = tfields.begin();
329 text_tset::const_iterator fields_end = tfields.end();
330
331 while (fields_here != fields_end) {
332 this_metadata.clear();
333 this_metadata.isRef = false;
334
335 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
336 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
337 MetadataInfo_t *tmetaptr = &this_metadata;
338 while (this_info != end_info) {
339
340 pos_metadata = (*this_info).getmultinfo(*fields_here);
341 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
342
343 // collect metadata
344 if (!donenextprevtest) {
345 donenextprevtest = true;
346
347 // cache parent contents array
348 text_t thisparent = get_parent (OID);
349 if (!thisparent.empty()) {
350 if (thisparent != parentOID) {
351 parentOID = thisparent;
352 parentcontents.erase(parentcontents.begin(), parentcontents.end());
353 if (db_ptr->getinfo(parentOID, parentinfo)) {
354 text_t &parentinfocontains = parentinfo["contains"];
355 if (!parentinfocontains.empty())
356 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
357 ';', parentcontents);
358 }
359 }
360
361 // do tests
362 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
363 text_tarray::const_iterator parentcontents_end = parentcontents.end();
364 text_t shrunk_OID = OID;
365 shrink_parent (shrunk_OID);
366 while (parentcontents_here != parentcontents_end) {
367 if (*parentcontents_here == shrunk_OID) {
368 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
369 else hasprevious = true;
370
371 ++parentcontents_here;
372
373 if (parentcontents_here == parentcontents.end()) hasnext = false;
374 else hasnext = true;
375
376 break;
377 }
378
379 ++parentcontents_here;
380 }
381
382 // fill in metadata
383 if ((*fields_here == "hasnext" && hasnext) ||
384 (*fields_here == "hasprevious" && hasprevious))
385 tmetaptr->values.push_back("1");
386 else
387 tmetaptr->values.push_back("0");
388 } else
389 tmetaptr->values.push_back("0");
390 }
391 }
392 //else if (pos_metadata != NULL && *fields_here != "contains") {
393 else if (pos_metadata != NULL) {
394 tmetaptr->values = *pos_metadata;
395 }
396 else
397 tmetaptr->values.push_back("");
398
399 ++this_info;
400 if (this_info != end_info) {
401 tmetaptr->parent = new MetadataInfo_t();
402 tmetaptr = tmetaptr->parent;
403 }
404 }
405 metadata[*fields_here] = this_metadata;
406 ++fields_here;
407 }
408
409 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
410 return true;
411}
412
413bool sourceclass::get_document (const text_t &OID, text_t &doc,
414 comerror_t &err, ostream &logout)
415{
416 outconvertclass text_t2ascii;
417
418 err = noError;
419 if (db_ptr == NULL) {
420 // most likely a configuration problem
421 logout << text_t2ascii
422 << "configuration error: " << classname << " contains a null dbclass\n\n";
423 err = configurationError;
424 return true;
425 }
426
427 // open the database
428 db_ptr->setlogout(&logout);
429 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
430 // most likely a system problem (we have already checked that the database exists)
431 logout << text_t2ascii
432 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
433 err = systemProblem;
434 return true;
435 }
436
437 text_t tOID = OID;
438 if (needs_translating (OID))
439 translate_OID (OID, tOID, err, logout);
440 infodbclass info;
441 if (!db_ptr->getinfo(tOID, info)) {
442 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
443 return false;
444 }
445
446 if (info["hastxt"].getint() == 1) {
447 int docnum = info["docnum"].getint();
448
449 // set the collection directory
450 textsearchptr->setcollectdir (collectdir);
451
452 // get the text
453 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
454 defaultlanguage, collection, docnum, doc);
455
456 // remove the <Doc></Doc> and <Sec></Sec> tags
457 doc.replace("<Doc>", "");
458 doc.replace("</Doc>", "");
459 doc.replace("<Sec>", "");
460 doc.replace("</Sec>", "");
461 }
462
463 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
464 return true;
465}
466
467bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
468{
469 err = noError;
470 issearchable = false;
471
472 text_tarray fromarray;
473 indexmap.getfromarray(fromarray);
474 if (fromarray.size() == 0) {
475 return true;
476 } else if (fromarray.size() == 1) {
477 if (fromarray[0] == "dummy:text") {
478 // always return true - issearchable is false here though
479 return true;
480 }
481 }
482 issearchable = true;
483 return true;
484}
485
486
487bool operator==(const sourceptr &x, const sourceptr &y) {
488 return (x.s == y.s);
489}
490
491bool operator<(const sourceptr &x, const sourceptr &y) {
492 return (x.s < y.s);
493}
494
495
496// thesource remains the property of the calling code but
497// should not be deleted until it is removed from this list.
498void sourcelistclass::addsource (sourceclass *thesource) {
499 // can't add a source that doesn't exist
500 assert (thesource != NULL);
501 if (thesource == NULL) return;
502
503 sourceptr sp;
504 sp.s = thesource;
505
506 sourceptrs.push_back(sp);
507}
Note: See TracBrowser for help on using the repository browser.