source: main/trunk/greenstone2/runtime-src/src/colservr/source.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 17.6 KB
Line 
1/**********************************************************************
2 *
3 * source.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "source.h"
27#include "fileutil.h"
28#include "OIDtools.h"
29#include <assert.h>
30
31
32sourceclass::sourceclass ()
33{
34 db_ptr = NULL;
35 oaidb_ptr = NULL;
36 textsearchptr = NULL;
37 classname = "source";
38}
39
40sourceclass::~sourceclass ()
41{
42 if (db_ptr != NULL) delete db_ptr;
43 if (textsearchptr != NULL) delete textsearchptr;
44
45 // http://stackoverflow.com/questions/677620/do-i-need-to-explicitly-call-the-base-virtual-destructor
46 // Answer: "No, destructors are called automatically [by the derived destructor] in the reverse
47 // order of construction. (Base classes last). Do not call base class destructors."
48 // The link also explains why the destructors should be declared as virtual in the header files.
49
50 // now delete the oaidb object pointed to by the oaidb_ptr too
51 if (oaidb_ptr != NULL) delete oaidb_ptr;
52 oaidb_ptr = NULL;
53}
54
55// configure should be called once for each configuration line
56void sourceclass::configure (const text_t &key, const text_tarray &cfgline)
57{
58 if (cfgline.size() >= 1) {
59 const text_t &value = cfgline[0];
60
61 if (key == "collection") collection = value;
62 else if (key == "collectdir") collectdir = value;
63 else if (key == "gsdlhome") gsdlhome = value;
64 else if (key == "collecthome") collecthome = value;
65 else if (key == "gdbmhome") dbhome = value;
66 }
67
68 if (key == "indexmap") {
69 indexmap.importmap (cfgline);
70
71 } else if (key == "defaultindex") {
72 indexmap.from2to (cfgline[0], defaultindex);
73
74 } else if (key == "subcollectionmap") {
75 subcollectionmap.importmap (cfgline);
76
77 } else if (key == "defaultsubcollection") {
78 subcollectionmap.from2to (cfgline[0], defaultsubcollection);
79
80 } else if (key == "languagemap") {
81 languagemap.importmap (cfgline);
82
83 } else if (key == "defaultlanguage") {
84 languagemap.from2to (cfgline[0], defaultlanguage);
85 } else if (key == "indexstem") {
86 indexstem = cfgline[0];
87 }
88}
89
90text_t sourceclass::getcollectionpath()
91{
92 text_t resolved_filename;
93
94 if (gsdlhome==dbhome) {
95 // dbhome has defaulted to gsdlhome which we take to means the
96 // database has been specifically moved out of gsdlhome area.
97 // => it should be whereever collecthome is set to
98 resolved_filename = filename_cat(collecthome, collection);
99 }
100 else {
101 // dbhome is explicitly set to something other than gsdlhome
102 // => use dbhome
103 resolved_filename = filename_cat(dbhome, "collect", collection);
104 }
105
106 return resolved_filename;
107}
108
109text_t sourceclass::resolve_db_filename(const text_t& idx,
110 const text_t& file_ext)
111{
112 // This is an exact copy of the method (of the same name) in filterclass
113 // Makes sense to merge them, in which either gsdlhome, collecthome,
114 // dbhome, and collection need to also be passed in as parameters,
115 // or else there is some notion of a shared base class that both
116 // filter and source inherit from
117
118 // NB: there is an even greater opportunity to share more code in this
119 // function if sql_db_ptr/db_ptr and db_filename and sql_db_filename
120 // are also drawn from one object
121
122 /*
123 text_t resolved_filename;
124
125 if (gsdlhome==dbhome) {
126 // dbhome has defaulted to gsdlhome which we take to means the
127 // database has been specifically moved out of gsdlhome area.
128 // => it should be whereever collecthome is set to
129
130 resolved_filename = filename_cat(collecthome, collection, "index", "text", idx);
131 }
132 else {
133 // dbhome is explicitly set to something other than gsdlhome
134 // => use dbhome
135 resolved_filename = filename_cat(dbhome, "collect", collection, "index", "text", idx);
136 }
137 */
138
139 text_t resolved_filename = filename_cat(getcollectionpath(), "index", "text", idx);
140 resolved_filename += file_ext;
141
142 return resolved_filename;
143}
144
145text_t sourceclass::resolve_oaidb_filename(const text_t& file_ext)
146{
147 text_t resolved_filename = filename_cat(getcollectionpath(), "etc", "oai-inf");
148
149 resolved_filename += file_ext;
150
151 return resolved_filename;
152}
153
154// init should be called after all the configuration is done but
155// before any other methods are called
156bool sourceclass::init (ostream &logout)
157{
158 outconvertclass text_t2ascii;
159
160 if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect");
161 if (dbhome.empty()) dbhome = gsdlhome;
162
163 if (defaultindex.empty()) {
164 // use first index in map as default if no default is set explicitly
165 text_tarray toarray;
166 indexmap.gettoarray(toarray);
167 if (toarray.size()) {
168 defaultindex = toarray[0];
169 }
170 }
171
172 if (defaultsubcollection.empty()) {
173 // use first subcollection in map as default if no default is set explicitly
174 text_tarray toarray;
175 subcollectionmap.gettoarray(toarray);
176 if (toarray.size()) {
177 defaultsubcollection = toarray[0];
178 }
179 }
180
181 if (defaultlanguage.empty()) {
182 // use first language in map as default if no default is set explicitly
183 text_tarray toarray;
184 languagemap.gettoarray(toarray);
185 if (toarray.size()) {
186 defaultlanguage = toarray[0];
187 }
188 }
189
190 // get the collection directory name
191 if (collectdir.empty()) {
192 collectdir = filename_cat (collecthome, collection);
193 }
194
195 if (db_ptr == NULL) {
196 // most likely a configuration problem
197 logout << text_t2ascii
198 << "configuration error: queryfilter contains a null dbclass\n\n";
199 return false;
200 }
201
202 // get the filename for the database and make sure it exists
203 if (indexstem.empty()) {
204 indexstem = collection;
205 }
206 db_filename = resolve_db_filename(indexstem, db_ptr->getfileextension());
207 if (!file_exists(db_filename)) {
208 logout << text_t2ascii
209 << "warning: database \"" << db_filename << "\" does not exist\n\n";
210 // return false;
211 }
212
213 oaidb_filename = resolve_oaidb_filename(oaidb_ptr->getfileextension());
214
215 return true;
216}
217
218
219// translate_OID translates OIDs using ".pr", ."fc" etc.
220bool sourceclass::translate_OID (const text_t &OIDin, text_t &OIDout,
221 comerror_t &err, ostream &logout)
222{
223 outconvertclass text_t2ascii;
224
225 err = noError;
226 if (db_ptr == NULL) {
227 // most likely a configuration problem
228 logout << text_t2ascii
229 << "configuration error: " << classname << " contains a null dbclass\n\n";
230 err = configurationError;
231 return true;
232 }
233
234 // open the database
235 db_ptr->setlogout(&logout);
236 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
237 // most likely a system problem (we have already checked that the database exists)
238 logout << text_t2ascii
239 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
240 err = systemProblem;
241 return true;
242 }
243
244 infodbclass info;
245 OIDout = db_ptr->translate_OID (OIDin, info);
246 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
247 return true;
248}
249
250// get_oai_metadata fills out the oai metadata from oaidb if possible
251// if it is not responsible for the given OID then it will return false.
252bool sourceclass::get_oai_metadata (const text_t &requestParams, const text_t &refParams,
253 bool getParents, const text_tset &fields,
254 const text_t &OID, text_t &deleted_status, MetadataInfo_tmap &metadata,
255 comerror_t &err, ostream &logout)
256{
257 outconvertclass text_t2ascii;
258 metadata.erase(metadata.begin(), metadata.end());
259 err = noError;
260
261 //logout << text_t2ascii << "sourceclass::get_oai_meta for OID: " << OID << "\n";
262
263// ONLY if we're doing any OAI stuff (FROAI will be set then) will we even be in here
264// So next try to open the oai-inf db if it exists for this collection
265 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
266
267 logout << text_t2ascii
268 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
269 return false;
270
271 } else { // let's try opening the oaidb file
272 oaidb_ptr->setlogout(&logout);
273 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
274 // most likely a system problem (we have already checked that the database exists just above)
275 logout << text_t2ascii
276 << "system problem: open on database \""
277 << oaidb_filename << "\" failed\n\n";
278 err = systemProblem;
279 return false;
280 } // now we've opened the oai-inf db file successfully
281 }
282
283 // get the metadata (deleted status, timestamp, datestamp) for OID, and pushback onto the metadata array
284 infodbclass oai_info;
285 if (!oaidb_ptr->getinfo(OID, oai_info)) { // OID not found in oai_inf db
286 logout << text_t2ascii
287 << "warning: lookup for OID \"" << OID
288 << "\" in etc/oai-inf db failed when getting metadata (in sourceclass source.cpp).\n\n";
289
290 return false;
291 } else {
292 //logout << text_t2ascii << "@@@@ getting metadata for OID \"" << OID << "\" from etc/oai-inf db.\n\n";
293 //logout << text_t2ascii << "@@@@ timestamp: " << oai_info["timestamp"] << "\n\n";
294
295 metadata["oaiinf.status"].isRef = false;
296 metadata["oaiinf.status"].values.push_back(oai_info["status"]);
297 metadata["oaiinf.timestamp"].isRef = false;
298 metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
299 metadata["oaiinf.datestamp"].isRef = false;
300 metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
301 }
302
303 // we're done with oai-inf db
304
305 oaidb_ptr->closedatabase(); // don't leave files open
306
307 return true;
308}
309
310
311// get_metadata fills out the metadata if possible, if it is not responsible
312// for the given OID then it will return false.
313bool sourceclass::get_metadata (const text_t &requestParams, const text_t &refParams,
314 bool getParents, const text_tset &fields,
315 const text_t &OID, MetadataInfo_tmap &metadata,
316 comerror_t &err, ostream &logout, bool append)
317{
318 outconvertclass text_t2ascii;
319
320 if(!append) {
321 metadata.erase(metadata.begin(), metadata.end());
322 }
323
324 err = noError;
325 if (db_ptr == NULL) {
326 // most likely a configuration problem
327 logout << text_t2ascii
328 << "configuration error: " << classname <<" contains a null dbclass\n\n";
329 err = configurationError;
330 return true;
331 }
332
333 // open the database
334 db_ptr->setlogout(&logout);
335 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
336 // most likely a system problem (we have already checked that the database exists)
337 logout << text_t2ascii
338 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
339 err = systemProblem;
340 return true;
341 }
342
343 // get the metadata - if getParents is set we need to get
344 // info for all parents of OID as well as OID
345 vector<infodbclass> info_array;
346 text_tarray OIDs;
347 if (getParents) get_parents_array (OID, OIDs);
348 OIDs.push_back (OID);
349
350 text_tarray::const_iterator this_OID = OIDs.begin();
351 text_tarray::const_iterator end_OID = OIDs.end();
352
353 while (this_OID != end_OID) {
354 infodbclass info;
355 if (!db_ptr->getinfo(*this_OID, info)) return false;
356
357 // adjust the metadata
358 text_t &contains = info["contains"];
359 if (contains.empty()) info["haschildren"] = 0;
360 else info["haschildren"] = 1;
361 //contains.clear();
362
363 info_array.push_back(info);
364 ++this_OID;
365 }
366
367 // if fields set is empty we want to get all available metadata
368 text_tset tfields = fields;
369 if (tfields.empty() && !info_array.empty()) {
370 infodbclass::iterator t_info = info_array[0].begin();
371 infodbclass::iterator e_info = info_array[0].end();
372 while (t_info != e_info) {
373 if ((*t_info).first != "contains")
374 tfields.insert ((*t_info).first);
375 ++t_info;
376 }
377 tfields.insert ("hasnext");
378 tfields.insert ("hasprevious");
379 }
380
381 // collect together the metadata
382 bool donenextprevtest = false;
383 bool hasnext=false, hasprevious=false;
384 MetadataInfo_t this_metadata;
385 text_tarray *pos_metadata;
386 text_tset::const_iterator fields_here = tfields.begin();
387 text_tset::const_iterator fields_end = tfields.end();
388
389 while (fields_here != fields_end) {
390 this_metadata.clear();
391 this_metadata.isRef = false;
392
393 vector<infodbclass>::reverse_iterator this_info = info_array.rbegin();
394 vector<infodbclass>::reverse_iterator end_info = info_array.rend();
395 MetadataInfo_t *tmetaptr = &this_metadata;
396 while (this_info != end_info) {
397
398 pos_metadata = (*this_info).getmultinfo(*fields_here);
399 if ((*fields_here == "hasnext" || *fields_here == "hasprevious")) {
400
401 // collect metadata
402 if (!donenextprevtest) {
403 donenextprevtest = true;
404
405 // cache parent contents array
406 text_t thisparent = get_parent (OID);
407 if (!thisparent.empty()) {
408 if (thisparent != parentOID) {
409 parentOID = thisparent;
410 parentcontents.erase(parentcontents.begin(), parentcontents.end());
411 if (db_ptr->getinfo(parentOID, parentinfo)) {
412 text_t &parentinfocontains = parentinfo["contains"];
413 if (!parentinfocontains.empty())
414 splitchar (parentinfocontains.begin(), parentinfocontains.end(),
415 ';', parentcontents);
416 }
417 }
418
419 // do tests
420 text_tarray::const_iterator parentcontents_here = parentcontents.begin();
421 text_tarray::const_iterator parentcontents_end = parentcontents.end();
422 text_t shrunk_OID = OID;
423 shrink_parent (shrunk_OID);
424 while (parentcontents_here != parentcontents_end) {
425 if (*parentcontents_here == shrunk_OID) {
426 if (parentcontents_here == parentcontents.begin()) hasprevious = false;
427 else hasprevious = true;
428
429 ++parentcontents_here;
430
431 if (parentcontents_here == parentcontents.end()) hasnext = false;
432 else hasnext = true;
433
434 break;
435 }
436
437 ++parentcontents_here;
438 }
439
440 // fill in metadata
441 if ((*fields_here == "hasnext" && hasnext) ||
442 (*fields_here == "hasprevious" && hasprevious))
443 tmetaptr->values.push_back("1");
444 else
445 tmetaptr->values.push_back("0");
446 } else
447 tmetaptr->values.push_back("0");
448 }
449 }
450 //else if (pos_metadata != NULL && *fields_here != "contains") {
451 else if (pos_metadata != NULL) {
452 tmetaptr->values = *pos_metadata;
453 }
454 else
455 tmetaptr->values.push_back("");
456
457 ++this_info;
458 if (this_info != end_info) {
459 tmetaptr->parent = new MetadataInfo_t();
460 tmetaptr = tmetaptr->parent;
461 }
462 }
463 metadata[*fields_here] = this_metadata;
464 ++fields_here;
465 }
466
467 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
468 return true;
469}
470
471bool sourceclass::get_document (const text_t &OID, text_t &doc,
472 comerror_t &err, ostream &logout)
473{
474 outconvertclass text_t2ascii;
475
476 err = noError;
477 if (db_ptr == NULL) {
478 // most likely a configuration problem
479 logout << text_t2ascii
480 << "configuration error: " << classname << " contains a null dbclass\n\n";
481 err = configurationError;
482 return true;
483 }
484
485 // open the database
486 db_ptr->setlogout(&logout);
487 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
488 // most likely a system problem (we have already checked that the database exists)
489 logout << text_t2ascii
490 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
491 err = systemProblem;
492 return true;
493 }
494
495 text_t tOID = OID;
496 if (needs_translating (OID))
497 translate_OID (OID, tOID, err, logout);
498 infodbclass info;
499 if (!db_ptr->getinfo(tOID, info)) {
500 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
501 return false;
502 }
503
504 if (info["hastxt"].getint() == 1) {
505 int docnum = info["docnum"].getint();
506
507 // set the collection directory
508 textsearchptr->setcollectdir (collectdir);
509
510 // get the text
511 textsearchptr->docTargetDocument(defaultindex, defaultsubcollection,
512 defaultlanguage, collection, docnum, doc);
513
514 // remove the <Doc></Doc> and <Sec></Sec> tags
515 doc.replace("<Doc>", "");
516 doc.replace("</Doc>", "");
517 doc.replace("<Sec>", "");
518 doc.replace("</Sec>", "");
519 }
520
521 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
522 return true;
523}
524
525bool sourceclass::is_searchable(bool &issearchable, comerror_t &err, ostream &logout)
526{
527 err = noError;
528 issearchable = false;
529
530 text_tarray fromarray;
531 indexmap.getfromarray(fromarray);
532 if (fromarray.size() == 0) {
533 return true;
534 } else if (fromarray.size() == 1) {
535 if (fromarray[0] == "dummy:text") {
536 // always return true - issearchable is false here though
537 return true;
538 }
539 }
540 issearchable = true;
541 return true;
542}
543
544
545bool operator==(const sourceptr &x, const sourceptr &y) {
546 return (x.s == y.s);
547}
548
549bool operator<(const sourceptr &x, const sourceptr &y) {
550 return (x.s < y.s);
551}
552
553
554// thesource remains the property of the calling code but
555// should not be deleted until it is removed from this list.
556void sourcelistclass::addsource (sourceclass *thesource) {
557 // can't add a source that doesn't exist
558 assert (thesource != NULL);
559 if (thesource == NULL) return;
560
561 sourceptr sp;
562 sp.s = thesource;
563
564 sourceptrs.push_back(sp);
565}
Note: See TracBrowser for help on using the repository browser.