source: main/trunk/greenstone2/runtime-src/src/colservr/browsefilter.cpp@ 31394

Last change on this file since 31394 was 31394, checked in by ak19, 7 years ago

Further commit to do with implementing GS2 server side of OAI deletion policy: 1. Forgot to svn add new files! 2. browsefilter code does not need to get the oai metadata, removed this duplicate code, as that part is handled in source.cpp's get_oai_metadata, which is the stage where the oai metadata is actually required.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.1 KB
Line 
1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "browsefilter.h"
27#include "colservertools.h"
28#include "fileutil.h"
29#include <assert.h>
30
31
32browsefilterclass::browsefilterclass () {
33 db_ptr = NULL;
34 oaidb_ptr = NULL;
35
36 // -- onePerQuery StartResults integer
37 FilterOption_t filtopt;
38 filtopt.name = "StartResults";
39 filtopt.type = FilterOption_t::integert;
40 filtopt.repeatable = FilterOption_t::onePerQuery;
41 filtopt.defaultValue = "1";
42 filtopt.validValues.push_back("1");
43 filtopt.validValues.push_back("10000");
44 filterOptions["StartResults"] = filtopt;
45
46 // -- onePerQuery EndResults integer
47 filtopt.clear();
48 filtopt.name = "EndResults";
49 filtopt.type = FilterOption_t::integert;
50 filtopt.repeatable = FilterOption_t::onePerQuery;
51 filtopt.defaultValue = "-1";
52 filtopt.validValues.push_back("-1");
53 filtopt.validValues.push_back("10000");
54 filterOptions["EndResults"] = filtopt;
55
56 // -- onePerQuery ParentNode string ("" will return the browsing available)
57 filtopt.clear();
58 filtopt.name = "ParentNode";
59 filtopt.type = FilterOption_t::stringt;
60 filtopt.repeatable = FilterOption_t::onePerQuery;
61 filtopt.defaultValue = g_EmptyText;
62 filterOptions["ParentNode"] = filtopt;
63}
64
65browsefilterclass::~browsefilterclass () {}
66
67void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
68 filterclass::configure (key, cfgline);
69 if (key == "indexstem") {
70 indexstem = cfgline[0];
71 }
72}
73
74bool browsefilterclass::init (ostream &logout) {
75 outconvertclass text_t2ascii;
76
77 if (!filterclass::init(logout)) return false;
78
79 if (db_ptr == NULL || oaidb_ptr == NULL) {
80 // most likely a configuration problem
81 logout << text_t2ascii
82 << "configuration error: browsefilter contains a null dbclass\n\n";
83 return false;
84 }
85
86 if (indexstem.empty()) {
87 indexstem = collection;
88 }
89
90 db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
91 indexstem,db_ptr->getfileextension());
92 if (!file_exists(db_filename)) {
93 logout << text_t2ascii
94 << "warning: database \"" << db_filename << "\" does not exist\n\n";
95 // return false;
96 }
97
98 oaidb_filename = resolve_oaidb_filename(gsdlhome, dbhome, collecthome, collection,
99 oaidb_ptr->getfileextension());
100
101 return true;
102}
103
104void browsefilterclass::filter (const FilterRequest_t &request,
105 FilterResponse_t &response,
106 comerror_t &err, ostream &logout) {
107 int numDocs = 0;
108 outconvertclass text_t2ascii;
109
110 response.clear ();
111 err = noError;
112
113 // get the browse parameters
114 int startresults = filterOptions["StartResults"].defaultValue.getint();
115 int endresults = filterOptions["EndResults"].defaultValue.getint();
116 text_t parentnode = filterOptions["ParentNode"].defaultValue;
117 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
118 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
119 while (options_here != options_end) {
120 if ((*options_here).name == "StartResults")
121 startresults = (*options_here).value.getint();
122 else if ((*options_here).name == "EndResults")
123 endresults = (*options_here).value.getint();
124 else if ((*options_here).name == "ParentNode")
125 parentnode = (*options_here).value;
126 else {
127 logout << text_t2ascii
128 << "warning: unknown browsefilter option \""
129 << (*options_here).name
130 << "\" ignored.\n\n";
131 }
132
133 ++options_here;
134 }
135
136 // if we're only working on oai, open oai_db, no need to work with index_db in browsefilter.cpp
137 // (but source.cpp uses both oai-inf.db and index.db to get metadata for OAI request)
138 // If we can't open the oai-inf db, this can be because it didn't exist in older versions of GS
139 // in that case, proceed as usual, using the index db.
140 if((request.filterResultOptions & FROAI)) { // OAI request
141 bool success = false;
142
143 if(parentnode == "oai") { // doing an OAI listidentifiers request
144
145 // open up the oai-inf db, if it exists, and return all IDs
146 // if it doesn't exist, proceed as usual
147 success = get_oaiinf_db_entries(response, err, logout); //adds any stuff in oai-inf db for the current OID to resultdoc.metadata
148
149 response.numDocs = response.docInfo.size();
150 response.isApprox = Exact;
151 }
152 if (success) return; // oai request successfully completed with oai-inf.db, no need to open index_db
153 }
154
155 // Since we're here, it means we're not doing anything oai (or oai-inf.db did not exist/open)
156 // So we don't need to work with oai_db. Instead, work with index_db:
157
158 if (db_ptr == NULL) {
159 // most likely a configuration problem
160 logout << text_t2ascii
161 << "configuration error: browsefilter contains a null index dbclass\n\n";
162 err = configurationError;
163 return;
164 }
165
166 // open the database
167 db_ptr->setlogout(&logout);
168 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
169 // most likely a system problem (we have already checked that the database exists)
170 logout << text_t2ascii
171 << "system problem: open on database \""
172 << db_filename << "\" failed\n\n";
173 err = systemProblem;
174 return;
175 }
176
177 infodbclass info;
178
179 // translate any ".fc", ".pr" etc. stuff in the parentnode
180 parentnode = db_ptr->translate_OID (parentnode, info);
181
182 // adjust topmost browsing node
183 if (parentnode.empty()) parentnode = "browse";
184
185 // get the node
186 if ((request.filterResultOptions & FROID) ||
187 (request.filterResultOptions & FRmetadata)) {
188 if (!db_ptr->getinfo(parentnode, info)) {
189 // didn't find the node in index db
190 logout << text_t2ascii
191 << "warning: lookup for node \"" << parentnode
192 << "\" failed for browsefilter.\n\n";
193 } else {
194 // found the node
195
196 // replace " with the parent node name and split the contains string
197 // into the result set
198 text_tarray resultset;
199 text_t tmptext;
200 text_t &contains = info["contains"];
201 text_t::iterator contains_here = contains.begin();
202 text_t::iterator contains_end = contains.end();
203 while (contains_here != contains_end) {
204 if (*contains_here == '"') tmptext += parentnode;
205 else if (*contains_here == ';') {
206 if (!tmptext.empty()) resultset.push_back (tmptext);
207 tmptext.clear();
208 } else tmptext.push_back(*contains_here);
209
210 ++contains_here;
211 }
212 // insert the last result in the set
213 if (!tmptext.empty()) resultset.push_back (tmptext);
214
215 text_tarray offset_resultset;
216 text_t &md_type = info["mdtype"];
217 if (!md_type.empty())
218 {
219 text_t &md_offset = info["mdoffset"];
220 if (!md_offset.empty())
221 {
222 text_t offsettext;
223
224 text_t::iterator offset_here = md_offset.begin();
225 text_t::iterator offset_end = md_offset.end();
226 while (offset_here != offset_end)
227 {
228 if (*offset_here == ';')
229 {
230 if (offsettext.empty())
231 {
232 offset_resultset.push_back ("0");
233 }
234 else
235 {
236 offset_resultset.push_back (offsettext);
237 }
238 offsettext.clear();
239 }
240 else
241 {
242 offsettext.push_back(*offset_here);
243 }
244
245 ++offset_here;
246 }
247 // insert the last result in the set
248 if (offsettext.empty())
249 {
250 offset_resultset.push_back ("0");
251 }
252 else
253 {
254 offset_resultset.push_back (offsettext);
255 }
256 }
257 else
258 {
259 // add 0 offset for each 'contains' entry
260 text_tarray::iterator result_here = resultset.begin();
261 text_tarray::iterator result_end = resultset.end();
262 while (result_here != result_end) {
263 offset_resultset.push_back("0");
264 ++result_here;
265 }
266 }
267
268 // do an intersection with the input set
269 if (!request.docSet.empty()) {
270
271 text_tarray intersect_resultset;
272 text_tarray intersect_offset_resultset;
273
274 text_tarray::const_iterator resultset_here = resultset.begin();
275 text_tarray::const_iterator resultset_end = resultset.end();
276 text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
277
278 while (resultset_here != resultset_end) {
279 if (in_set (request.docSet, *resultset_here))
280 {
281 intersect_resultset.push_back (*resultset_here);
282 intersect_offset_resultset.push_back (*offset_resultset_here);
283 }
284 ++resultset_here;
285 ++offset_resultset_here;
286 }
287 resultset = intersect_resultset;
288 offset_resultset = intersect_offset_resultset;
289 }
290 }
291 else
292 {
293 // do an intersection with the input set
294 if (!request.docSet.empty()) {
295 intersect (resultset, request.docSet);
296 }
297
298 // add 0 offset for each 'contains' entry
299 text_tarray::iterator result_here = resultset.begin();
300 text_tarray::iterator result_end = resultset.end();
301 while (result_here != result_end) {
302 offset_resultset.push_back("0");
303 ++result_here;
304 }
305 }
306
307 // create the response
308 numDocs = resultset.size();
309 int resultnum = 1;
310 ResultDocInfo_t resultdoc;
311 text_tarray::iterator result_here = resultset.begin();
312 text_tarray::iterator result_end = resultset.end();
313 text_tarray::iterator offset_result_here = offset_resultset.begin();
314
315 while (result_here != result_end) {
316 // if endresults is -1 get all results
317 if ((endresults != -1) && (resultnum > endresults)) break;
318 if (resultnum >= startresults) {
319 resultdoc.OID = (*result_here);
320 if (!md_type.empty())
321 {
322 resultdoc.classifier_metadata_type = md_type;
323 resultdoc.classifier_metadata_offset = offset_result_here->getint();
324 }
325 response.docInfo.push_back(resultdoc);
326 }
327
328 ++resultnum;
329 ++result_here;
330 if (!md_type.empty()) ++offset_result_here;
331 }
332 }
333 }
334
335 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
336 response.numDocs = numDocs;
337 response.isApprox = Exact;
338}
339
340bool browsefilterclass::get_oaiinf_db_entries(FilterResponse_t &response,
341 comerror_t &err, ostream &logout)
342{
343 outconvertclass text_t2ascii;
344
345 //logout << text_t2ascii << "browsefilterclass::get_oaiinf_db_entries\n";
346
347 // ONLY if we're doing any OAI stuff (FROAI will be set then) will we be here
348 // So next try to open the oai-inf db if it exists for this collection
349
350
351 if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
352
353 logout << text_t2ascii
354 << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";
355 return false;
356
357 } else { // let's try opening the oaidb file
358 oaidb_ptr->setlogout(&logout);
359 if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
360 // most likely a system problem (we have already checked that the database exists just above)
361 logout << text_t2ascii
362 << "system problem: open on database \""
363 << oaidb_filename << "\" failed\n\n";
364 err = systemProblem;
365 return false;
366 } // now we've opened the oai-inf db file successfully
367 }
368
369 infodbclass oai_info;
370 ResultDocInfo_t resultdoc;
371
372 text_tarray keys = oaidb_ptr->getkeys();
373
374 text_tarray::iterator key_here = keys.begin();
375 text_tarray::iterator key_end = keys.end();
376 while (key_here != key_end) {
377
378 resultdoc.OID = (*key_here);
379
380 if(!oaidb_ptr->getinfo(resultdoc.OID, oai_info)) {
381 logout << text_t2ascii
382 << "warning: lookup for node \"" << resultdoc.OID
383 << "\" in etc/oai-inf db failed for browsefilter.\n\n";
384 }
385 // We don't need to get the oai metadata from oai-inf.db at this stage. That will be
386 // handled by a separate metadata request. See collectserver::filter() and source.cpp's get_oai_metadata().
387 /*
388 else {
389 //logout << text_t2ascii << "@@@@ found node \"" << resultdoc.OID << "\" in etc/oai-inf db.\n\n";
390
391 resultdoc.metadata["oaiinf.status"].isRef = false;
392 resultdoc.metadata["oaiinf.status"].values.push_back(oai_info["status"]);
393 resultdoc.metadata["oaiinf.timestamp"].isRef = false;
394 resultdoc.metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
395 resultdoc.metadata["oaiinf.datestamp"].isRef = false;
396 resultdoc.metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
397 }
398 */
399
400 response.docInfo.push_back(resultdoc);
401 ++key_here;
402
403 }
404
405 // we're done with oai-inf db
406
407 oaidb_ptr->closedatabase(); // don't leave files open
408
409 return true;
410}
Note: See TracBrowser for help on using the repository browser.