source: main/trunk/greenstone2/runtime-src/src/colservr/browsefilter.cpp@ 31903

Last change on this file since 31903 was 31903, checked in by ak19, 7 years ago

I hope these are all the changes necessary on the runtime side of GS2 to get the OAI server validation working for GS2: instead of working out the earliest datetime stamp of the OAI repository by comparing the builddate in index/build.cfg of each OAI collection and selecting the earliest, the oai-inf.db is now storing the special earliesttimestamp record. The timestamp of this record represents its collection's earliest timestamp. And the earliest of these among all OAI collections is now the earliest datetime of the OAI repository.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.5 KB
RevLine 
[227]1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[534]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[227]9 *
[534]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[227]24 *********************************************************************/
25
26#include "browsefilter.h"
[31388]27#include "colservertools.h"
[227]28#include "fileutil.h"
[31387]29#include <assert.h>
[227]30
31
32browsefilterclass::browsefilterclass () {
[15558]33 db_ptr = NULL;
[31387]34 oaidb_ptr = NULL;
[227]35
36 // -- onePerQuery StartResults integer
37 FilterOption_t filtopt;
38 filtopt.name = "StartResults";
39 filtopt.type = FilterOption_t::integert;
40 filtopt.repeatable = FilterOption_t::onePerQuery;
41 filtopt.defaultValue = "1";
42 filtopt.validValues.push_back("1");
[238]43 filtopt.validValues.push_back("10000");
[227]44 filterOptions["StartResults"] = filtopt;
45
46 // -- onePerQuery EndResults integer
47 filtopt.clear();
48 filtopt.name = "EndResults";
49 filtopt.type = FilterOption_t::integert;
50 filtopt.repeatable = FilterOption_t::onePerQuery;
[1256]51 filtopt.defaultValue = "-1";
52 filtopt.validValues.push_back("-1");
[238]53 filtopt.validValues.push_back("10000");
[227]54 filterOptions["EndResults"] = filtopt;
55
56 // -- onePerQuery ParentNode string ("" will return the browsing available)
57 filtopt.clear();
58 filtopt.name = "ParentNode";
59 filtopt.type = FilterOption_t::stringt;
60 filtopt.repeatable = FilterOption_t::onePerQuery;
[11259]61 filtopt.defaultValue = g_EmptyText;
[227]62 filterOptions["ParentNode"] = filtopt;
63}
64
[31387]65browsefilterclass::~browsefilterclass () {}
[227]66
[9937]67void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
68 filterclass::configure (key, cfgline);
69 if (key == "indexstem") {
70 indexstem = cfgline[0];
71 }
72}
73
[227]74bool browsefilterclass::init (ostream &logout) {
75 outconvertclass text_t2ascii;
76
77 if (!filterclass::init(logout)) return false;
78
[31387]79 if (db_ptr == NULL || oaidb_ptr == NULL) {
[15680]80 // most likely a configuration problem
81 logout << text_t2ascii
82 << "configuration error: browsefilter contains a null dbclass\n\n";
83 return false;
84 }
85
[9937]86 if (indexstem.empty()) {
87 indexstem = collection;
88 }
[16310]89
[31388]90 db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
91 indexstem,db_ptr->getfileextension());
[15558]92 if (!file_exists(db_filename)) {
[227]93 logout << text_t2ascii
[15558]94 << "warning: database \"" << db_filename << "\" does not exist\n\n";
95 // return false;
[227]96 }
97
[31388]98 oaidb_filename = resolve_oaidb_filename(gsdlhome, dbhome, collecthome, collection,
99 oaidb_ptr->getfileextension());
[31387]100
[227]101 return true;
102}
103
104void browsefilterclass::filter (const FilterRequest_t &request,
105 FilterResponse_t &response,
106 comerror_t &err, ostream &logout) {
[238]107 int numDocs = 0;
[227]108 outconvertclass text_t2ascii;
109
110 response.clear ();
111 err = noError;
112
113 // get the browse parameters
114 int startresults = filterOptions["StartResults"].defaultValue.getint();
115 int endresults = filterOptions["EndResults"].defaultValue.getint();
116 text_t parentnode = filterOptions["ParentNode"].defaultValue;
117 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
118 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
119 while (options_here != options_end) {
120 if ((*options_here).name == "StartResults")
121 startresults = (*options_here).value.getint();
122 else if ((*options_here).name == "EndResults")
123 endresults = (*options_here).value.getint();
124 else if ((*options_here).name == "ParentNode")
125 parentnode = (*options_here).value;
126 else {
127 logout << text_t2ascii
128 << "warning: unknown browsefilter option \""
129 << (*options_here).name
130 << "\" ignored.\n\n";
131 }
132
[9620]133 ++options_here;
[227]134 }
135
[31387]136 // if we're only working on oai, open oai_db, no need to work with index_db in browsefilter.cpp
137 // (but source.cpp uses both oai-inf.db and index.db to get metadata for OAI request)
138 // If we can't open the oai-inf db, this can be because it didn't exist in older versions of GS
139 // in that case, proceed as usual, using the index db.
140 if((request.filterResultOptions & FROAI)) { // OAI request
141 bool success = false;
142
143 if(parentnode == "oai") { // doing an OAI listidentifiers request
144
[31903]145 // open up the oai-inf db if it exists, and return all IDs *except* the special OID=OAI_EARLIESTTIMESTAMP_OID
146 // if oai-inf db doesn't exist, proceed as usual
[31387]147 success = get_oaiinf_db_entries(response, err, logout); //adds any stuff in oai-inf db for the current OID to resultdoc.metadata
148
149 response.numDocs = response.docInfo.size();
150 response.isApprox = Exact;
151 }
152 if (success) return; // oai request successfully completed with oai-inf.db, no need to open index_db
153 }
154
155 // Since we're here, it means we're not doing anything oai (or oai-inf.db did not exist/open)
156 // So we don't need to work with oai_db. Instead, work with index_db:
157
158 if (db_ptr == NULL) {
159 // most likely a configuration problem
160 logout << text_t2ascii
161 << "configuration error: browsefilter contains a null index dbclass\n\n";
162 err = configurationError;
163 return;
164 }
165
166 // open the database
167 db_ptr->setlogout(&logout);
168 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
169 // most likely a system problem (we have already checked that the database exists)
170 logout << text_t2ascii
171 << "system problem: open on database \""
172 << db_filename << "\" failed\n\n";
173 err = systemProblem;
174 return;
175 }
176
[259]177 infodbclass info;
178
179 // translate any ".fc", ".pr" etc. stuff in the parentnode
[15558]180 parentnode = db_ptr->translate_OID (parentnode, info);
[259]181
[227]182 // adjust topmost browsing node
183 if (parentnode.empty()) parentnode = "browse";
184
185 // get the node
186 if ((request.filterResultOptions & FROID) ||
187 (request.filterResultOptions & FRmetadata)) {
[15558]188 if (!db_ptr->getinfo(parentnode, info)) {
[31387]189 // didn't find the node in index db
[227]190 logout << text_t2ascii
191 << "warning: lookup for node \"" << parentnode
192 << "\" failed for browsefilter.\n\n";
193 } else {
194 // found the node
195
196 // replace " with the parent node name and split the contains string
197 // into the result set
[249]198 text_tarray resultset;
[227]199 text_t tmptext;
200 text_t &contains = info["contains"];
201 text_t::iterator contains_here = contains.begin();
202 text_t::iterator contains_end = contains.end();
203 while (contains_here != contains_end) {
204 if (*contains_here == '"') tmptext += parentnode;
205 else if (*contains_here == ';') {
[249]206 if (!tmptext.empty()) resultset.push_back (tmptext);
[227]207 tmptext.clear();
208 } else tmptext.push_back(*contains_here);
209
[9620]210 ++contains_here;
[227]211 }
212 // insert the last result in the set
[249]213 if (!tmptext.empty()) resultset.push_back (tmptext);
[227]214
[829]215 text_tarray offset_resultset;
216 text_t &md_type = info["mdtype"];
217 if (!md_type.empty())
218 {
219 text_t &md_offset = info["mdoffset"];
220 if (!md_offset.empty())
221 {
222 text_t offsettext;
223
224 text_t::iterator offset_here = md_offset.begin();
225 text_t::iterator offset_end = md_offset.end();
226 while (offset_here != offset_end)
227 {
228 if (*offset_here == ';')
229 {
230 if (offsettext.empty())
231 {
232 offset_resultset.push_back ("0");
233 }
234 else
235 {
236 offset_resultset.push_back (offsettext);
237 }
238 offsettext.clear();
239 }
240 else
241 {
242 offsettext.push_back(*offset_here);
243 }
244
[9620]245 ++offset_here;
[829]246 }
247 // insert the last result in the set
[1702]248 if (offsettext.empty())
249 {
250 offset_resultset.push_back ("0");
251 }
252 else
253 {
254 offset_resultset.push_back (offsettext);
255 }
[829]256 }
257 else
258 {
259 // add 0 offset for each 'contains' entry
260 text_tarray::iterator result_here = resultset.begin();
261 text_tarray::iterator result_end = resultset.end();
262 while (result_here != result_end) {
263 offset_resultset.push_back("0");
[9620]264 ++result_here;
[829]265 }
266 }
267
268 // do an intersection with the input set
269 if (!request.docSet.empty()) {
270
271 text_tarray intersect_resultset;
272 text_tarray intersect_offset_resultset;
273
274 text_tarray::const_iterator resultset_here = resultset.begin();
275 text_tarray::const_iterator resultset_end = resultset.end();
276 text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
277
278 while (resultset_here != resultset_end) {
279 if (in_set (request.docSet, *resultset_here))
280 {
281 intersect_resultset.push_back (*resultset_here);
282 intersect_offset_resultset.push_back (*offset_resultset_here);
283 }
[9620]284 ++resultset_here;
285 ++offset_resultset_here;
[829]286 }
287 resultset = intersect_resultset;
288 offset_resultset = intersect_offset_resultset;
289 }
290 }
291 else
292 {
293 // do an intersection with the input set
294 if (!request.docSet.empty()) {
295 intersect (resultset, request.docSet);
296 }
[1702]297
298 // add 0 offset for each 'contains' entry
299 text_tarray::iterator result_here = resultset.begin();
300 text_tarray::iterator result_end = resultset.end();
301 while (result_here != result_end) {
302 offset_resultset.push_back("0");
[9620]303 ++result_here;
[1702]304 }
[829]305 }
306
[227]307 // create the response
[238]308 numDocs = resultset.size();
[227]309 int resultnum = 1;
310 ResultDocInfo_t resultdoc;
[249]311 text_tarray::iterator result_here = resultset.begin();
312 text_tarray::iterator result_end = resultset.end();
[829]313 text_tarray::iterator offset_result_here = offset_resultset.begin();
[227]314
315 while (result_here != result_end) {
[1256]316 // if endresults is -1 get all results
317 if ((endresults != -1) && (resultnum > endresults)) break;
[227]318 if (resultnum >= startresults) {
319 resultdoc.OID = (*result_here);
[829]320 if (!md_type.empty())
321 {
322 resultdoc.classifier_metadata_type = md_type;
323 resultdoc.classifier_metadata_offset = offset_result_here->getint();
324 }
[227]325 response.docInfo.push_back(resultdoc);
326 }
327
[9620]328 ++resultnum;
329 ++result_here;
330 if (!md_type.empty()) ++offset_result_here;
[227]331 }
332 }
333 }
334
[15558]335 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
[238]336 response.numDocs = numDocs;
[398]337 response.isApprox = Exact;
[227]338}
[31387]339
340bool browsefilterclass::get_oaiinf_db_entries(FilterResponse_t &response,
341 comerror_t &err, ostream &logout)
342{
343 outconvertclass text_t2ascii;
344
345 //logout << text_t2ascii << "browsefilterclass::get_oaiinf_db_entries\n";
346
347 // ONLY if we're doing any OAI stuff (FROAI will be set then) will we be here
348 // So next try to open the oai-inf db if it exists for this collection
349
350
351 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
352
353 logout << text_t2ascii
354 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
355 return false;
356
357 } else { // let's try opening the oaidb file
358 oaidb_ptr->setlogout(&logout);
359 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
360 // most likely a system problem (we have already checked that the database exists just above)
361 logout << text_t2ascii
362 << "system problem: open on database \""
363 << oaidb_filename << "\" failed\n\n";
364 err = systemProblem;
365 return false;
366 } // now we've opened the oai-inf db file successfully
367 }
368
369 infodbclass oai_info;
370 ResultDocInfo_t resultdoc;
371
372 text_tarray keys = oaidb_ptr->getkeys();
373
374 text_tarray::iterator key_here = keys.begin();
375 text_tarray::iterator key_end = keys.end();
376 while (key_here != key_end) {
377
378 resultdoc.OID = (*key_here);
[31903]379
380 // OAI_EARLIESTTIMESTAMP_OID is the OID of a special record that we'll ignore
381 // here in browsefilter.cpp, since it's not a doc.
382 // When the *metadata* for this special OID is requested, source.cpp will handle it
383 if(resultdoc.OID == OAI_EARLIESTTIMESTAMP_OID) {
384 ++key_here;
385 continue;
386 }
387
[31387]388 if(!oaidb_ptr->getinfo(resultdoc.OID, oai_info)) {
389 logout << text_t2ascii
390 << "warning: lookup for node \"" << resultdoc.OID
391 << "\" in etc/oai-inf db failed for browsefilter.\n\n";
[31394]392 }
393 // We don't need to get the oai metadata from oai-inf.db at this stage. That will be
394 // handled by a separate metadata request. See collectserver::filter() and source.cpp's get_oai_metadata().
395 /*
396 else {
[31387]397 //logout << text_t2ascii << "@@@@ found node \"" << resultdoc.OID << "\" in etc/oai-inf db.\n\n";
398
399 resultdoc.metadata["oaiinf.status"].isRef = false;
400 resultdoc.metadata["oaiinf.status"].values.push_back(oai_info["status"]);
401 resultdoc.metadata["oaiinf.timestamp"].isRef = false;
402 resultdoc.metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
403 resultdoc.metadata["oaiinf.datestamp"].isRef = false;
404 resultdoc.metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
405 }
[31394]406 */
[31387]407
408 response.docInfo.push_back(resultdoc);
409 ++key_here;
410
411 }
412
413 // we're done with oai-inf db
414
415 oaidb_ptr->closedatabase(); // don't leave files open
416
417 return true;
418}
Note: See TracBrowser for help on using the repository browser.