source: main/trunk/greenstone2/runtime-src/src/colservr/browsefilter.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "browsefilter.h"
27#include "fileutil.h"
28#include <assert.h>
29
30
31browsefilterclass::browsefilterclass () {
32 db_ptr = NULL;
33 oaidb_ptr = NULL;
34
35 // -- onePerQuery StartResults integer
36 FilterOption_t filtopt;
37 filtopt.name = "StartResults";
38 filtopt.type = FilterOption_t::integert;
39 filtopt.repeatable = FilterOption_t::onePerQuery;
40 filtopt.defaultValue = "1";
41 filtopt.validValues.push_back("1");
42 filtopt.validValues.push_back("10000");
43 filterOptions["StartResults"] = filtopt;
44
45 // -- onePerQuery EndResults integer
46 filtopt.clear();
47 filtopt.name = "EndResults";
48 filtopt.type = FilterOption_t::integert;
49 filtopt.repeatable = FilterOption_t::onePerQuery;
50 filtopt.defaultValue = "-1";
51 filtopt.validValues.push_back("-1");
52 filtopt.validValues.push_back("10000");
53 filterOptions["EndResults"] = filtopt;
54
55 // -- onePerQuery ParentNode string ("" will return the browsing available)
56 filtopt.clear();
57 filtopt.name = "ParentNode";
58 filtopt.type = FilterOption_t::stringt;
59 filtopt.repeatable = FilterOption_t::onePerQuery;
60 filtopt.defaultValue = g_EmptyText;
61 filterOptions["ParentNode"] = filtopt;
62}
63
64browsefilterclass::~browsefilterclass () {}
65
66void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
67 filterclass::configure (key, cfgline);
68 if (key == "indexstem") {
69 indexstem = cfgline[0];
70 }
71}
72
73bool browsefilterclass::init (ostream &logout) {
74 outconvertclass text_t2ascii;
75
76 if (!filterclass::init(logout)) return false;
77
78 if (db_ptr == NULL || oaidb_ptr == NULL) {
79 // most likely a configuration problem
80 logout << text_t2ascii
81 << "configuration error: browsefilter contains a null dbclass\n\n";
82 return false;
83 }
84
85 if (indexstem.empty()) {
86 indexstem = collection;
87 }
88
89 db_filename = resolve_db_filename(indexstem,db_ptr->getfileextension());
90 if (!file_exists(db_filename)) {
91 logout << text_t2ascii
92 << "warning: database \"" << db_filename << "\" does not exist\n\n";
93 // return false;
94 }
95
96 oaidb_filename = resolve_oaidb_filename(oaidb_ptr->getfileextension());
97
98 return true;
99}
100
101void browsefilterclass::filter (const FilterRequest_t &request,
102 FilterResponse_t &response,
103 comerror_t &err, ostream &logout) {
104 int numDocs = 0;
105 outconvertclass text_t2ascii;
106
107 response.clear ();
108 err = noError;
109
110 // get the browse parameters
111 int startresults = filterOptions["StartResults"].defaultValue.getint();
112 int endresults = filterOptions["EndResults"].defaultValue.getint();
113 text_t parentnode = filterOptions["ParentNode"].defaultValue;
114 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
115 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
116 while (options_here != options_end) {
117 if ((*options_here).name == "StartResults")
118 startresults = (*options_here).value.getint();
119 else if ((*options_here).name == "EndResults")
120 endresults = (*options_here).value.getint();
121 else if ((*options_here).name == "ParentNode")
122 parentnode = (*options_here).value;
123 else {
124 logout << text_t2ascii
125 << "warning: unknown browsefilter option \""
126 << (*options_here).name
127 << "\" ignored.\n\n";
128 }
129
130 ++options_here;
131 }
132
133 // if we're only working on oai, open oai_db, no need to work with index_db in browsefilter.cpp
134 // (but source.cpp uses both oai-inf.db and index.db to get metadata for OAI request)
135 // If we can't open the oai-inf db, this can be because it didn't exist in older versions of GS
136 // in that case, proceed as usual, using the index db.
137 if((request.filterResultOptions & FROAI)) { // OAI request
138 bool success = false;
139
140 if(parentnode == "oai") { // doing an OAI listidentifiers request
141
142 // open up the oai-inf db, if it exists, and return all IDs
143 // if it doesn't exist, proceed as usual
144 success = get_oaiinf_db_entries(response, err, logout); //adds any stuff in oai-inf db for the current OID to resultdoc.metadata
145
146 response.numDocs = response.docInfo.size();
147 response.isApprox = Exact;
148 }
149 if (success) return; // oai request successfully completed with oai-inf.db, no need to open index_db
150 }
151
152 // Since we're here, it means we're not doing anything oai (or oai-inf.db did not exist/open)
153 // So we don't need to work with oai_db. Instead, work with index_db:
154
155 if (db_ptr == NULL) {
156 // most likely a configuration problem
157 logout << text_t2ascii
158 << "configuration error: browsefilter contains a null index dbclass\n\n";
159 err = configurationError;
160 return;
161 }
162
163 // open the database
164 db_ptr->setlogout(&logout);
165 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
166 // most likely a system problem (we have already checked that the database exists)
167 logout << text_t2ascii
168 << "system problem: open on database \""
169 << db_filename << "\" failed\n\n";
170 err = systemProblem;
171 return;
172 }
173
174 infodbclass info;
175
176 // translate any ".fc", ".pr" etc. stuff in the parentnode
177 parentnode = db_ptr->translate_OID (parentnode, info);
178
179 // adjust topmost browsing node
180 if (parentnode.empty()) parentnode = "browse";
181
182 // get the node
183 if ((request.filterResultOptions & FROID) ||
184 (request.filterResultOptions & FRmetadata)) {
185 if (!db_ptr->getinfo(parentnode, info)) {
186 // didn't find the node in index db
187 logout << text_t2ascii
188 << "warning: lookup for node \"" << parentnode
189 << "\" failed for browsefilter.\n\n";
190 } else {
191 // found the node
192
193 // replace " with the parent node name and split the contains string
194 // into the result set
195 text_tarray resultset;
196 text_t tmptext;
197 text_t &contains = info["contains"];
198 text_t::iterator contains_here = contains.begin();
199 text_t::iterator contains_end = contains.end();
200 while (contains_here != contains_end) {
201 if (*contains_here == '"') tmptext += parentnode;
202 else if (*contains_here == ';') {
203 if (!tmptext.empty()) resultset.push_back (tmptext);
204 tmptext.clear();
205 } else tmptext.push_back(*contains_here);
206
207 ++contains_here;
208 }
209 // insert the last result in the set
210 if (!tmptext.empty()) resultset.push_back (tmptext);
211
212 text_tarray offset_resultset;
213 text_t &md_type = info["mdtype"];
214 if (!md_type.empty())
215 {
216 text_t &md_offset = info["mdoffset"];
217 if (!md_offset.empty())
218 {
219 text_t offsettext;
220
221 text_t::iterator offset_here = md_offset.begin();
222 text_t::iterator offset_end = md_offset.end();
223 while (offset_here != offset_end)
224 {
225 if (*offset_here == ';')
226 {
227 if (offsettext.empty())
228 {
229 offset_resultset.push_back ("0");
230 }
231 else
232 {
233 offset_resultset.push_back (offsettext);
234 }
235 offsettext.clear();
236 }
237 else
238 {
239 offsettext.push_back(*offset_here);
240 }
241
242 ++offset_here;
243 }
244 // insert the last result in the set
245 if (offsettext.empty())
246 {
247 offset_resultset.push_back ("0");
248 }
249 else
250 {
251 offset_resultset.push_back (offsettext);
252 }
253 }
254 else
255 {
256 // add 0 offset for each 'contains' entry
257 text_tarray::iterator result_here = resultset.begin();
258 text_tarray::iterator result_end = resultset.end();
259 while (result_here != result_end) {
260 offset_resultset.push_back("0");
261 ++result_here;
262 }
263 }
264
265 // do an intersection with the input set
266 if (!request.docSet.empty()) {
267
268 text_tarray intersect_resultset;
269 text_tarray intersect_offset_resultset;
270
271 text_tarray::const_iterator resultset_here = resultset.begin();
272 text_tarray::const_iterator resultset_end = resultset.end();
273 text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
274
275 while (resultset_here != resultset_end) {
276 if (in_set (request.docSet, *resultset_here))
277 {
278 intersect_resultset.push_back (*resultset_here);
279 intersect_offset_resultset.push_back (*offset_resultset_here);
280 }
281 ++resultset_here;
282 ++offset_resultset_here;
283 }
284 resultset = intersect_resultset;
285 offset_resultset = intersect_offset_resultset;
286 }
287 }
288 else
289 {
290 // do an intersection with the input set
291 if (!request.docSet.empty()) {
292 intersect (resultset, request.docSet);
293 }
294
295 // add 0 offset for each 'contains' entry
296 text_tarray::iterator result_here = resultset.begin();
297 text_tarray::iterator result_end = resultset.end();
298 while (result_here != result_end) {
299 offset_resultset.push_back("0");
300 ++result_here;
301 }
302 }
303
304 // create the response
305 numDocs = resultset.size();
306 int resultnum = 1;
307 ResultDocInfo_t resultdoc;
308 text_tarray::iterator result_here = resultset.begin();
309 text_tarray::iterator result_end = resultset.end();
310 text_tarray::iterator offset_result_here = offset_resultset.begin();
311
312 while (result_here != result_end) {
313 // if endresults is -1 get all results
314 if ((endresults != -1) && (resultnum > endresults)) break;
315 if (resultnum >= startresults) {
316 resultdoc.OID = (*result_here);
317 if (!md_type.empty())
318 {
319 resultdoc.classifier_metadata_type = md_type;
320 resultdoc.classifier_metadata_offset = offset_result_here->getint();
321 }
322 response.docInfo.push_back(resultdoc);
323 }
324
325 ++resultnum;
326 ++result_here;
327 if (!md_type.empty()) ++offset_result_here;
328 }
329 }
330 }
331
332 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
333 response.numDocs = numDocs;
334 response.isApprox = Exact;
335}
336
337bool browsefilterclass::get_oaiinf_db_entries(FilterResponse_t &response,
338 comerror_t &err, ostream &logout)
339{
340 outconvertclass text_t2ascii;
341
342 //logout << text_t2ascii << "browsefilterclass::get_oaiinf_db_entries\n";
343
344 // ONLY if we're doing any OAI stuff (FROAI will be set then) will we be here
345 // So next try to open the oai-inf db if it exists for this collection
346
347
348 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
349
350 logout << text_t2ascii
351 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
352 return false;
353
354 } else { // let's try opening the oaidb file
355 oaidb_ptr->setlogout(&logout);
356 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
357 // most likely a system problem (we have already checked that the database exists just above)
358 logout << text_t2ascii
359 << "system problem: open on database \""
360 << oaidb_filename << "\" failed\n\n";
361 err = systemProblem;
362 return false;
363 } // now we've opened the oai-inf db file successfully
364 }
365
366 infodbclass oai_info;
367 ResultDocInfo_t resultdoc;
368
369 text_tarray keys = oaidb_ptr->getkeys();
370
371 text_tarray::iterator key_here = keys.begin();
372 text_tarray::iterator key_end = keys.end();
373 while (key_here != key_end) {
374
375 resultdoc.OID = (*key_here);
376
377 if(!oaidb_ptr->getinfo(resultdoc.OID, oai_info)) {
378 logout << text_t2ascii
379 << "warning: lookup for node \"" << resultdoc.OID
380 << "\" in etc/oai-inf db failed for browsefilter.\n\n";
381 } else {
382 //logout << text_t2ascii << "@@@@ found node \"" << resultdoc.OID << "\" in etc/oai-inf db.\n\n";
383
384 resultdoc.metadata["oaiinf.status"].isRef = false;
385 resultdoc.metadata["oaiinf.status"].values.push_back(oai_info["status"]);
386 resultdoc.metadata["oaiinf.timestamp"].isRef = false;
387 resultdoc.metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
388 resultdoc.metadata["oaiinf.datestamp"].isRef = false;
389 resultdoc.metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
390 }
391
392 response.docInfo.push_back(resultdoc);
393 ++key_here;
394
395 }
396
397 // we're done with oai-inf db
398
399 oaidb_ptr->closedatabase(); // don't leave files open
400
401 return true;
402}
Note: See TracBrowser for help on using the repository browser.