source: main/trunk/greenstone2/runtime-src/src/colservr/browsefilter.cpp@ 31388

Last change on this file since 31388 was 31388, checked in by ak19, 7 years ago

Second commit to do with implementing OAI deletion policy for GS2. This commit is only loosely related, as it shifts functions duplicated in source.h and filter.h (and cpp files) into the new colserver.h and cpp files for sharing.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.9 KB
Line 
1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "browsefilter.h"
27#include "colservertools.h"
28#include "fileutil.h"
29#include <assert.h>
30
31
32browsefilterclass::browsefilterclass () {
33 db_ptr = NULL;
34 oaidb_ptr = NULL;
35
36 // -- onePerQuery StartResults integer
37 FilterOption_t filtopt;
38 filtopt.name = "StartResults";
39 filtopt.type = FilterOption_t::integert;
40 filtopt.repeatable = FilterOption_t::onePerQuery;
41 filtopt.defaultValue = "1";
42 filtopt.validValues.push_back("1");
43 filtopt.validValues.push_back("10000");
44 filterOptions["StartResults"] = filtopt;
45
46 // -- onePerQuery EndResults integer
47 filtopt.clear();
48 filtopt.name = "EndResults";
49 filtopt.type = FilterOption_t::integert;
50 filtopt.repeatable = FilterOption_t::onePerQuery;
51 filtopt.defaultValue = "-1";
52 filtopt.validValues.push_back("-1");
53 filtopt.validValues.push_back("10000");
54 filterOptions["EndResults"] = filtopt;
55
56 // -- onePerQuery ParentNode string ("" will return the browsing available)
57 filtopt.clear();
58 filtopt.name = "ParentNode";
59 filtopt.type = FilterOption_t::stringt;
60 filtopt.repeatable = FilterOption_t::onePerQuery;
61 filtopt.defaultValue = g_EmptyText;
62 filterOptions["ParentNode"] = filtopt;
63}
64
65browsefilterclass::~browsefilterclass () {}
66
67void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
68 filterclass::configure (key, cfgline);
69 if (key == "indexstem") {
70 indexstem = cfgline[0];
71 }
72}
73
74bool browsefilterclass::init (ostream &logout) {
75 outconvertclass text_t2ascii;
76
77 if (!filterclass::init(logout)) return false;
78
79 if (db_ptr == NULL || oaidb_ptr == NULL) {
80 // most likely a configuration problem
81 logout << text_t2ascii
82 << "configuration error: browsefilter contains a null dbclass\n\n";
83 return false;
84 }
85
86 if (indexstem.empty()) {
87 indexstem = collection;
88 }
89
90 db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
91 indexstem,db_ptr->getfileextension());
92 if (!file_exists(db_filename)) {
93 logout << text_t2ascii
94 << "warning: database \"" << db_filename << "\" does not exist\n\n";
95 // return false;
96 }
97
98 oaidb_filename = resolve_oaidb_filename(gsdlhome, dbhome, collecthome, collection,
99 oaidb_ptr->getfileextension());
100
101 return true;
102}
103
104void browsefilterclass::filter (const FilterRequest_t &request,
105 FilterResponse_t &response,
106 comerror_t &err, ostream &logout) {
107 int numDocs = 0;
108 outconvertclass text_t2ascii;
109
110 response.clear ();
111 err = noError;
112
113 // get the browse parameters
114 int startresults = filterOptions["StartResults"].defaultValue.getint();
115 int endresults = filterOptions["EndResults"].defaultValue.getint();
116 text_t parentnode = filterOptions["ParentNode"].defaultValue;
117 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
118 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
119 while (options_here != options_end) {
120 if ((*options_here).name == "StartResults")
121 startresults = (*options_here).value.getint();
122 else if ((*options_here).name == "EndResults")
123 endresults = (*options_here).value.getint();
124 else if ((*options_here).name == "ParentNode")
125 parentnode = (*options_here).value;
126 else {
127 logout << text_t2ascii
128 << "warning: unknown browsefilter option \""
129 << (*options_here).name
130 << "\" ignored.\n\n";
131 }
132
133 ++options_here;
134 }
135
136 // if we're only working on oai, open oai_db, no need to work with index_db in browsefilter.cpp
137 // (but source.cpp uses both oai-inf.db and index.db to get metadata for OAI request)
138 // If we can't open the oai-inf db, this can be because it didn't exist in older versions of GS
139 // in that case, proceed as usual, using the index db.
140 if((request.filterResultOptions & FROAI)) { // OAI request
141 bool success = false;
142
143 if(parentnode == "oai") { // doing an OAI listidentifiers request
144
145 // open up the oai-inf db, if it exists, and return all IDs
146 // if it doesn't exist, proceed as usual
147 success = get_oaiinf_db_entries(response, err, logout); //adds any stuff in oai-inf db for the current OID to resultdoc.metadata
148
149 response.numDocs = response.docInfo.size();
150 response.isApprox = Exact;
151 }
152 if (success) return; // oai request successfully completed with oai-inf.db, no need to open index_db
153 }
154
155 // Since we're here, it means we're not doing anything oai (or oai-inf.db did not exist/open)
156 // So we don't need to work with oai_db. Instead, work with index_db:
157
158 if (db_ptr == NULL) {
159 // most likely a configuration problem
160 logout << text_t2ascii
161 << "configuration error: browsefilter contains a null index dbclass\n\n";
162 err = configurationError;
163 return;
164 }
165
166 // open the database
167 db_ptr->setlogout(&logout);
168 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
169 // most likely a system problem (we have already checked that the database exists)
170 logout << text_t2ascii
171 << "system problem: open on database \""
172 << db_filename << "\" failed\n\n";
173 err = systemProblem;
174 return;
175 }
176
177 infodbclass info;
178
179 // translate any ".fc", ".pr" etc. stuff in the parentnode
180 parentnode = db_ptr->translate_OID (parentnode, info);
181
182 // adjust topmost browsing node
183 if (parentnode.empty()) parentnode = "browse";
184
185 // get the node
186 if ((request.filterResultOptions & FROID) ||
187 (request.filterResultOptions & FRmetadata)) {
188 if (!db_ptr->getinfo(parentnode, info)) {
189 // didn't find the node in index db
190 logout << text_t2ascii
191 << "warning: lookup for node \"" << parentnode
192 << "\" failed for browsefilter.\n\n";
193 } else {
194 // found the node
195
196 // replace " with the parent node name and split the contains string
197 // into the result set
198 text_tarray resultset;
199 text_t tmptext;
200 text_t &contains = info["contains"];
201 text_t::iterator contains_here = contains.begin();
202 text_t::iterator contains_end = contains.end();
203 while (contains_here != contains_end) {
204 if (*contains_here == '"') tmptext += parentnode;
205 else if (*contains_here == ';') {
206 if (!tmptext.empty()) resultset.push_back (tmptext);
207 tmptext.clear();
208 } else tmptext.push_back(*contains_here);
209
210 ++contains_here;
211 }
212 // insert the last result in the set
213 if (!tmptext.empty()) resultset.push_back (tmptext);
214
215 text_tarray offset_resultset;
216 text_t &md_type = info["mdtype"];
217 if (!md_type.empty())
218 {
219 text_t &md_offset = info["mdoffset"];
220 if (!md_offset.empty())
221 {
222 text_t offsettext;
223
224 text_t::iterator offset_here = md_offset.begin();
225 text_t::iterator offset_end = md_offset.end();
226 while (offset_here != offset_end)
227 {
228 if (*offset_here == ';')
229 {
230 if (offsettext.empty())
231 {
232 offset_resultset.push_back ("0");
233 }
234 else
235 {
236 offset_resultset.push_back (offsettext);
237 }
238 offsettext.clear();
239 }
240 else
241 {
242 offsettext.push_back(*offset_here);
243 }
244
245 ++offset_here;
246 }
247 // insert the last result in the set
248 if (offsettext.empty())
249 {
250 offset_resultset.push_back ("0");
251 }
252 else
253 {
254 offset_resultset.push_back (offsettext);
255 }
256 }
257 else
258 {
259 // add 0 offset for each 'contains' entry
260 text_tarray::iterator result_here = resultset.begin();
261 text_tarray::iterator result_end = resultset.end();
262 while (result_here != result_end) {
263 offset_resultset.push_back("0");
264 ++result_here;
265 }
266 }
267
268 // do an intersection with the input set
269 if (!request.docSet.empty()) {
270
271 text_tarray intersect_resultset;
272 text_tarray intersect_offset_resultset;
273
274 text_tarray::const_iterator resultset_here = resultset.begin();
275 text_tarray::const_iterator resultset_end = resultset.end();
276 text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
277
278 while (resultset_here != resultset_end) {
279 if (in_set (request.docSet, *resultset_here))
280 {
281 intersect_resultset.push_back (*resultset_here);
282 intersect_offset_resultset.push_back (*offset_resultset_here);
283 }
284 ++resultset_here;
285 ++offset_resultset_here;
286 }
287 resultset = intersect_resultset;
288 offset_resultset = intersect_offset_resultset;
289 }
290 }
291 else
292 {
293 // do an intersection with the input set
294 if (!request.docSet.empty()) {
295 intersect (resultset, request.docSet);
296 }
297
298 // add 0 offset for each 'contains' entry
299 text_tarray::iterator result_here = resultset.begin();
300 text_tarray::iterator result_end = resultset.end();
301 while (result_here != result_end) {
302 offset_resultset.push_back("0");
303 ++result_here;
304 }
305 }
306
307 // create the response
308 numDocs = resultset.size();
309 int resultnum = 1;
310 ResultDocInfo_t resultdoc;
311 text_tarray::iterator result_here = resultset.begin();
312 text_tarray::iterator result_end = resultset.end();
313 text_tarray::iterator offset_result_here = offset_resultset.begin();
314
315 while (result_here != result_end) {
316 // if endresults is -1 get all results
317 if ((endresults != -1) && (resultnum > endresults)) break;
318 if (resultnum >= startresults) {
319 resultdoc.OID = (*result_here);
320 if (!md_type.empty())
321 {
322 resultdoc.classifier_metadata_type = md_type;
323 resultdoc.classifier_metadata_offset = offset_result_here->getint();
324 }
325 response.docInfo.push_back(resultdoc);
326 }
327
328 ++resultnum;
329 ++result_here;
330 if (!md_type.empty()) ++offset_result_here;
331 }
332 }
333 }
334
335 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
336 response.numDocs = numDocs;
337 response.isApprox = Exact;
338}
339
340bool browsefilterclass::get_oaiinf_db_entries(FilterResponse_t &response,
341 comerror_t &err, ostream &logout)
342{
343 outconvertclass text_t2ascii;
344
345 //logout << text_t2ascii << "browsefilterclass::get_oaiinf_db_entries\n";
346
347 // ONLY if we're doing any OAI stuff (FROAI will be set then) will we be here
348 // So next try to open the oai-inf db if it exists for this collection
349
350
351 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
352
353 logout << text_t2ascii
354 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
355 return false;
356
357 } else { // let's try opening the oaidb file
358 oaidb_ptr->setlogout(&logout);
359 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
360 // most likely a system problem (we have already checked that the database exists just above)
361 logout << text_t2ascii
362 << "system problem: open on database \""
363 << oaidb_filename << "\" failed\n\n";
364 err = systemProblem;
365 return false;
366 } // now we've opened the oai-inf db file successfully
367 }
368
369 infodbclass oai_info;
370 ResultDocInfo_t resultdoc;
371
372 text_tarray keys = oaidb_ptr->getkeys();
373
374 text_tarray::iterator key_here = keys.begin();
375 text_tarray::iterator key_end = keys.end();
376 while (key_here != key_end) {
377
378 resultdoc.OID = (*key_here);
379
380 if(!oaidb_ptr->getinfo(resultdoc.OID, oai_info)) {
381 logout << text_t2ascii
382 << "warning: lookup for node \"" << resultdoc.OID
383 << "\" in etc/oai-inf db failed for browsefilter.\n\n";
384 } else {
385 //logout << text_t2ascii << "@@@@ found node \"" << resultdoc.OID << "\" in etc/oai-inf db.\n\n";
386
387 resultdoc.metadata["oaiinf.status"].isRef = false;
388 resultdoc.metadata["oaiinf.status"].values.push_back(oai_info["status"]);
389 resultdoc.metadata["oaiinf.timestamp"].isRef = false;
390 resultdoc.metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
391 resultdoc.metadata["oaiinf.datestamp"].isRef = false;
392 resultdoc.metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
393 }
394
395 response.docInfo.push_back(resultdoc);
396 ++key_here;
397
398 }
399
400 // we're done with oai-inf db
401
402 oaidb_ptr->closedatabase(); // don't leave files open
403
404 return true;
405}
Note: See TracBrowser for help on using the repository browser.