root/main/trunk/greenstone2/runtime-src/src/colservr/browsefilter.cpp @ 31903

Revision 31903, 13.5 KB (checked in by ak19, 3 years ago)

I hope these are all the changes necessary on the runtime side of GS2 to get the OAI server validation working for GS2: instead of working out the earliest datetime stamp of the OAI repository by comparing the builddate in index/build.cfg of each OAI collection and selecting the earliest, the oai-inf.db is now storing the special earliesttimestamp record. The timestamp of this record represents its collection's earliest timestamp. And the earliest of these among all OAI collections is now the earliest datetime of the OAI repository.

  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "browsefilter.h"
27#include "colservertools.h"
28#include "fileutil.h"
29#include <assert.h>
30
31
32browsefilterclass::browsefilterclass () {
33  db_ptr = NULL;
34  oaidb_ptr = NULL;
35
36  // -- onePerQuery StartResults   integer
37  FilterOption_t filtopt;
38  filtopt.name = "StartResults";
39  filtopt.type = FilterOption_t::integert;
40  filtopt.repeatable = FilterOption_t::onePerQuery;
41  filtopt.defaultValue = "1";
42  filtopt.validValues.push_back("1");
43  filtopt.validValues.push_back("10000");
44  filterOptions["StartResults"] = filtopt;
45
46  // -- onePerQuery EndResults     integer
47  filtopt.clear();
48  filtopt.name = "EndResults";
49  filtopt.type = FilterOption_t::integert;
50  filtopt.repeatable = FilterOption_t::onePerQuery;
51  filtopt.defaultValue = "-1";
52  filtopt.validValues.push_back("-1");
53  filtopt.validValues.push_back("10000");
54  filterOptions["EndResults"] = filtopt;
55
56  // -- onePerQuery ParentNode     string ("" will return the browsing available)
57  filtopt.clear();
58  filtopt.name = "ParentNode";
59  filtopt.type = FilterOption_t::stringt;
60  filtopt.repeatable = FilterOption_t::onePerQuery;
61  filtopt.defaultValue = g_EmptyText;
62  filterOptions["ParentNode"] = filtopt;
63}
64
65browsefilterclass::~browsefilterclass () {}
66
67void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
68  filterclass::configure (key, cfgline);
69  if (key == "indexstem") {
70    indexstem = cfgline[0];
71  }
72}
73
74bool browsefilterclass::init (ostream &logout) {
75  outconvertclass text_t2ascii;
76
77  if (!filterclass::init(logout)) return false;
78
79  if (db_ptr == NULL || oaidb_ptr == NULL) {
80    // most likely a configuration problem
81    logout << text_t2ascii
82       << "configuration error: browsefilter contains a null dbclass\n\n";
83    return false;
84  }
85
86  if (indexstem.empty()) {
87    indexstem = collection;
88  }
89
90  db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
91                    indexstem,db_ptr->getfileextension());
92  if (!file_exists(db_filename)) {
93    logout << text_t2ascii
94       << "warning: database \"" << db_filename << "\" does not exist\n\n";
95    //    return false;
96  }
97
98  oaidb_filename = resolve_oaidb_filename(gsdlhome, dbhome, collecthome, collection,
99                      oaidb_ptr->getfileextension());
100
101  return true;
102}
103
104void browsefilterclass::filter (const FilterRequest_t &request,
105                FilterResponse_t &response,
106                comerror_t &err, ostream &logout) {
107  int numDocs = 0;
108  outconvertclass text_t2ascii;
109
110  response.clear ();
111  err = noError;
112
113  // get the browse parameters
114  int startresults = filterOptions["StartResults"].defaultValue.getint();
115  int endresults = filterOptions["EndResults"].defaultValue.getint();
116  text_t parentnode = filterOptions["ParentNode"].defaultValue;
117  OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
118  OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
119  while (options_here != options_end) {
120    if ((*options_here).name == "StartResults")
121      startresults = (*options_here).value.getint();
122    else if ((*options_here).name == "EndResults")
123      endresults = (*options_here).value.getint();
124    else if ((*options_here).name == "ParentNode")
125      parentnode = (*options_here).value;
126    else {
127      logout << text_t2ascii
128         << "warning: unknown browsefilter option \""
129         << (*options_here).name
130         << "\" ignored.\n\n";
131    }
132
133    ++options_here;
134  }
135
136  // if we're only working on oai, open oai_db, no need to work with index_db in browsefilter.cpp
137  // (but source.cpp uses both oai-inf.db and index.db to get metadata for OAI request)
138  // If we can't open the oai-inf db, this can be because it didn't exist in older versions of GS
139  // in that case, proceed as usual, using the index db.
140  if((request.filterResultOptions & FROAI)) { // OAI request   
141    bool success = false;
142
143    if(parentnode == "oai") { // doing an OAI listidentifiers request
144
145      // open up the oai-inf db if it exists, and return all IDs *except* the special OID=OAI_EARLIESTTIMESTAMP_OID
146      // if oai-inf db doesn't exist, proceed as usual
147      success = get_oaiinf_db_entries(response, err, logout); //adds any stuff in oai-inf db for the current OID to resultdoc.metadata
148
149      response.numDocs = response.docInfo.size();
150      response.isApprox = Exact;
151    }
152    if (success) return; // oai request successfully completed with oai-inf.db, no need to open index_db
153  }
154
155  // Since we're here, it means we're not doing anything oai (or oai-inf.db did not exist/open)
156  // So we don't need to work with oai_db. Instead, work with index_db:
157
158  if (db_ptr == NULL) {
159    // most likely a configuration problem
160    logout << text_t2ascii
161       << "configuration error: browsefilter contains a null index dbclass\n\n";
162    err = configurationError;
163    return;
164  }
165
166  // open the database
167  db_ptr->setlogout(&logout);
168  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
169    // most likely a system problem (we have already checked that the database exists)
170    logout << text_t2ascii
171       << "system problem: open on database \""
172       << db_filename << "\" failed\n\n";
173    err = systemProblem;
174    return;
175  }
176
177  infodbclass info;
178
179  // translate any ".fc", ".pr" etc. stuff in the parentnode
180  parentnode = db_ptr->translate_OID (parentnode, info);
181
182  // adjust topmost browsing node
183  if (parentnode.empty()) parentnode = "browse";
184
185  // get the node
186  if ((request.filterResultOptions & FROID) ||
187      (request.filterResultOptions & FRmetadata)) {
188    if (!db_ptr->getinfo(parentnode, info)) {
189      // didn't find the node in index db
190      logout << text_t2ascii
191         << "warning: lookup for node \"" << parentnode
192         << "\" failed for browsefilter.\n\n";
193    } else {
194      // found the node
195
196      // replace " with the parent node name and split the contains string
197      // into the result set
198      text_tarray resultset;
199      text_t tmptext;
200      text_t &contains = info["contains"];
201      text_t::iterator contains_here = contains.begin();
202      text_t::iterator contains_end = contains.end();
203      while (contains_here != contains_end) {
204    if (*contains_here == '"') tmptext += parentnode;
205    else if (*contains_here == ';') {
206      if (!tmptext.empty()) resultset.push_back (tmptext);
207      tmptext.clear();
208    } else tmptext.push_back(*contains_here);
209   
210    ++contains_here;
211      }
212      // insert the last result in the set
213      if (!tmptext.empty()) resultset.push_back (tmptext);
214
215      text_tarray offset_resultset;
216      text_t &md_type = info["mdtype"];
217      if (!md_type.empty())
218    {
219      text_t &md_offset = info["mdoffset"];
220      if (!md_offset.empty())
221        {
222          text_t offsettext;
223
224          text_t::iterator offset_here = md_offset.begin();
225          text_t::iterator offset_end = md_offset.end();
226          while (offset_here != offset_end)
227        {
228          if (*offset_here == ';')
229            {
230              if (offsettext.empty())
231            {
232              offset_resultset.push_back ("0");
233            }
234              else
235            {
236              offset_resultset.push_back (offsettext);
237            }
238              offsettext.clear();
239            }
240          else
241            {
242              offsettext.push_back(*offset_here);
243            }
244         
245          ++offset_here;
246        }
247          // insert the last result in the set
248          if (offsettext.empty())
249        {
250          offset_resultset.push_back ("0");
251        }
252          else
253        {
254          offset_resultset.push_back (offsettext);
255        }
256        }
257      else
258        {
259          // add 0 offset for each 'contains' entry
260          text_tarray::iterator result_here = resultset.begin();
261          text_tarray::iterator result_end = resultset.end();
262          while (result_here != result_end) {
263        offset_resultset.push_back("0");
264        ++result_here;
265          }
266        }
267
268      // do an intersection with the input set
269      if (!request.docSet.empty()) {
270
271        text_tarray intersect_resultset;
272        text_tarray intersect_offset_resultset;
273       
274        text_tarray::const_iterator resultset_here = resultset.begin();
275        text_tarray::const_iterator resultset_end = resultset.end();
276        text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
277       
278        while (resultset_here != resultset_end) {
279          if (in_set (request.docSet, *resultset_here))
280        {
281          intersect_resultset.push_back (*resultset_here);
282          intersect_offset_resultset.push_back (*offset_resultset_here);
283        }
284          ++resultset_here;
285          ++offset_resultset_here;
286        }
287        resultset = intersect_resultset;
288        offset_resultset = intersect_offset_resultset;
289      }
290    }
291      else
292    {
293      // do an intersection with the input set
294      if (!request.docSet.empty()) {
295        intersect (resultset, request.docSet);
296      }
297
298      // add 0 offset for each 'contains' entry
299      text_tarray::iterator result_here = resultset.begin();
300      text_tarray::iterator result_end = resultset.end();
301      while (result_here != result_end) {
302        offset_resultset.push_back("0");
303        ++result_here;
304      }
305    }
306
307      // create the response
308      numDocs = resultset.size();
309      int resultnum = 1;
310      ResultDocInfo_t resultdoc;
311      text_tarray::iterator result_here = resultset.begin();
312      text_tarray::iterator result_end = resultset.end();
313      text_tarray::iterator offset_result_here = offset_resultset.begin();
314
315      while (result_here != result_end) {
316    // if endresults is -1 get all results
317    if ((endresults != -1) && (resultnum > endresults)) break;
318    if (resultnum >= startresults) {
319      resultdoc.OID = (*result_here);
320      if (!md_type.empty())
321        {
322          resultdoc.classifier_metadata_type = md_type;
323          resultdoc.classifier_metadata_offset = offset_result_here->getint();
324        }
325      response.docInfo.push_back(resultdoc);
326    }
327
328    ++resultnum;
329    ++result_here;
330    if (!md_type.empty()) ++offset_result_here;
331      }
332    }
333  }
334
335  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
336  response.numDocs = numDocs;
337  response.isApprox = Exact;
338}
339
340bool browsefilterclass::get_oaiinf_db_entries(FilterResponse_t &response,
341                      comerror_t &err, ostream &logout)
342{
343  outconvertclass text_t2ascii;
344 
345  //logout << text_t2ascii << "browsefilterclass::get_oaiinf_db_entries\n";   
346
347  // ONLY if we're doing any OAI stuff (FROAI will be set then) will we be here
348  // So next try to open the oai-inf db if it exists for this collection
349
350 
351  if (!file_exists(oaidb_filename)) { // if the oaidb file doesn't even exist, let's not bother with oaidb
352   
353    logout << text_t2ascii
354       << "warning: collection's oai-inf database \"" << oaidb_filename << "\" does not exist\n\n";   
355    return false;
356
357  } else { // let's try opening the oaidb file
358    oaidb_ptr->setlogout(&logout);
359    if (!oaidb_ptr->opendatabase (oaidb_filename, DB_READER, 100, false)) {
360      // most likely a system problem (we have already checked that the database exists just above)
361      logout << text_t2ascii
362         << "system problem: open on database \""
363         << oaidb_filename << "\" failed\n\n";
364      err = systemProblem;
365      return false;
366    } // now we've opened the oai-inf db file successfully
367  }
368
369  infodbclass oai_info;
370  ResultDocInfo_t resultdoc;
371
372  text_tarray keys = oaidb_ptr->getkeys();
373
374  text_tarray::iterator key_here = keys.begin();
375  text_tarray::iterator key_end = keys.end();
376  while (key_here != key_end) {   
377   
378    resultdoc.OID = (*key_here);
379   
380    // OAI_EARLIESTTIMESTAMP_OID is the OID of a special record that we'll ignore
381    // here in browsefilter.cpp, since it's not a doc.
382    // When the *metadata* for this special OID is requested, source.cpp will handle it
383    if(resultdoc.OID == OAI_EARLIESTTIMESTAMP_OID) {
384      ++key_here;
385      continue;
386    }
387
388    if(!oaidb_ptr->getinfo(resultdoc.OID, oai_info)) {
389      logout << text_t2ascii
390         << "warning: lookup for node \"" << resultdoc.OID
391         << "\" in etc/oai-inf db failed for browsefilter.\n\n";
392    }
393    // We don't need to get the oai metadata from oai-inf.db at this stage. That will be
394    // handled by a separate metadata request. See collectserver::filter() and source.cpp's get_oai_metadata().
395    /*
396    else {
397      //logout << text_t2ascii << "@@@@ found node \"" << resultdoc.OID << "\" in etc/oai-inf db.\n\n";
398     
399      resultdoc.metadata["oaiinf.status"].isRef = false;
400      resultdoc.metadata["oaiinf.status"].values.push_back(oai_info["status"]);
401      resultdoc.metadata["oaiinf.timestamp"].isRef = false;
402      resultdoc.metadata["oaiinf.timestamp"].values.push_back(oai_info["timestamp"]);
403      resultdoc.metadata["oaiinf.datestamp"].isRef = false;
404      resultdoc.metadata["oaiinf.datestamp"].values.push_back(oai_info["datestamp"]);
405    }
406    */
407   
408    response.docInfo.push_back(resultdoc);
409    ++key_here;   
410   
411  }
412
413  // we're done with oai-inf db
414
415  oaidb_ptr->closedatabase(); // don't leave files open
416
417  return true;
418}
Note: See TracBrowser for help on using the browser.