root/gsdl/trunk/src/colservr/queryfilter.cpp @ 15757

Revision 15680, 15.5 KB (checked in by mdewsnip, 12 years ago)

(Adding new DB support) Now uses the new dbclass::getfileextension() function to avoid having GDBM-specific filename extension code scattered around.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28
29
30// translate will return true if successful
31bool queryfilterclass::translate (dbclass *db_ptr, int docnum, text_t &trans_OID) {
32  infodbclass info;
33
34  trans_OID.clear();
35
36  // get the info
37  if (db_ptr == NULL) return false;
38  if (!db_ptr->getinfo(docnum, info)) return false;
39
40  // translate
41  if (info["section"].empty()) return false;
42
43  trans_OID = info["section"];
44  return true;
45}
46
47
48// whether document results are needed
49bool queryfilterclass::need_matching_docs (int filterResultOptions) {
50  return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
51      (filterResultOptions & FRmetadata));
52}
53
54// whether term information is needed
55bool queryfilterclass::need_term_info (int filterResultOptions) {
56  return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
57}
58
59/////////////////////////////////
60// functions for queryfilterclass
61/////////////////////////////////
62
63// get the query parameters
64void queryfilterclass::parse_query_params (const FilterRequest_t &request,
65                       vector<queryparamclass> &query_params,
66                       int &startresults, int &endresults,
67                       text_t &phrasematch, ostream &logout) {
68  outconvertclass text_t2ascii;
69
70  // set defaults for the return parameters
71  query_params.erase(query_params.begin(), query_params.end());
72  startresults = filterOptions["StartResults"].defaultValue.getint();
73  endresults = filterOptions["EndResults"].defaultValue.getint();
74  phrasematch = filterOptions["PhraseMatch"].defaultValue;
75
76  // set defaults for query parameters
77  queryparamclass query;
78  query.combinequery = "or"; // first one must be "or"
79  query.collection = collection;
80  query.index = filterOptions["Index"].defaultValue;
81  query.subcollection = filterOptions["Subcollection"].defaultValue;
82  query.language = filterOptions["Language"].defaultValue;
83  query.querystring.clear();
84  query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
85  query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
86  query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
87  query.stemming = (filterOptions["Stem"].defaultValue == "true");
88  query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
89  query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
90  query.level = filterOptions["Level"].defaultValue;
91  query.filterstring = filterOptions["FilterString"].defaultValue;  // Lucene specific
92  query.sortfield = filterOptions["SortField"].defaultValue;  // Lucene specific
93  query.fuzziness = filterOptions["Fuzziness"].defaultValue;  // Lucene specific
94  query.maxnumeric = maxnumeric;
95  OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
96  OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
97  while (options_here != options_end) {
98    if ((*options_here).name == "CombineQuery") {
99      // add this query
100     
101      // "all", needed when combining queries where the document results are needed
102      if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
103      query_params.push_back (query);
104
105      // start on next query
106      query.clear();
107      query.combinequery = (*options_here).value;
108
109      // set defaults for query parameters
110      query.collection = collection;
111      query.index = filterOptions["Index"].defaultValue;
112      query.subcollection = filterOptions["Subcollection"].defaultValue;
113      query.language = filterOptions["Language"].defaultValue;
114      query.querystring.clear();
115      query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
116      query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
117      query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
118      query.stemming = (filterOptions["Stem"].defaultValue == "true");
119      query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
120      query.level = filterOptions["Level"].defaultValue;
121      query.filterstring = filterOptions["FilterString"].defaultValue;  // Lucene specific
122      query.sortfield = filterOptions["SortField"].defaultValue;  // Lucene specific
123      query.fuzziness = filterOptions["Fuzziness"].defaultValue;  // Lucene specific
124      query.maxnumeric = maxnumeric;
125      // "all", needed when combining queries where the document results are needed
126      if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
127      else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
128     
129    } else if ((*options_here).name == "StartResults") {
130      startresults = (*options_here).value.getint();
131    } else if ((*options_here).name == "EndResults") {
132      endresults = (*options_here).value.getint();
133    } else if ((*options_here).name == "QueryType") {
134      query.search_type = ((*options_here).value == "ranked");
135    } else if ((*options_here).name == "MatchMode") {
136      query.match_mode = ((*options_here).value == "all");
137      if (query.match_mode == 1) query.maxdocs = -1;
138    } else if ((*options_here).name == "Term") {
139      query.querystring = (*options_here).value;
140    } else if ((*options_here).name == "Casefold") {
141      query.casefolding = ((*options_here).value == "true");
142    } else if ((*options_here).name == "Stem") {
143      query.stemming = ((*options_here).value == "true");
144    } else if ((*options_here).name == "AccentFold") {
145      query.accentfolding = ((*options_here).value == "true");
146    } else if ((*options_here).name == "Index"&& (*options_here).value !="") {
147      query.index = (*options_here).value;
148    } else if ((*options_here).name == "Subcollection") {
149      query.subcollection = (*options_here).value;
150    } else if ((*options_here).name == "Language") {
151      query.language = (*options_here).value;
152    } else if ((*options_here).name == "Maxdocs") {
153      query.maxdocs = (*options_here).value.getint();
154    } else if ((*options_here).name == "PhraseMatch") {
155      phrasematch = (*options_here).value;
156    } else if ((*options_here).name == "Level") {
157      query.level = (*options_here).value;
158    } else if ((*options_here).name == "FilterString") {
159      query.filterstring = (*options_here).value;
160    } else if ((*options_here).name == "SortField") {
161      query.sortfield = (*options_here).value;
162    } else if ((*options_here).name == "Fuzziness") {
163      query.fuzziness = (*options_here).value;
164    } else {
165      logout << text_t2ascii
166         << "warning: unknown queryfilter option \""
167         << (*options_here).name
168         << "\" ignored.\n\n";
169    }
170
171    ++options_here;
172  }
173
174  // Store the start and end results in the query too, as lucene now needs to
175  // pass them through to the Java
176  query.startresults = startresults;
177  query.endresults = endresults;
178
179  // add the last query
180  query_params.push_back (query);
181}
182
183
184
185
186queryfilterclass::queryfilterclass () {
187  db_ptr = NULL;
188  textsearchptr = NULL;
189  maxnumeric = 4;
190
191  FilterOption_t filtopt;
192  filtopt.name = "CombineQuery";
193  filtopt.type = FilterOption_t::enumeratedt;
194  filtopt.repeatable = FilterOption_t::onePerQuery;
195  filtopt.defaultValue = "and";
196  filtopt.validValues.push_back("and");
197  filtopt.validValues.push_back("or");
198  filtopt.validValues.push_back("not");
199  filterOptions["CombineQuery"] = filtopt;
200
201  // -- onePerQuery StartResults   integer
202  filtopt.clear();
203  filtopt.name = "StartResults";
204  filtopt.type = FilterOption_t::integert;
205  filtopt.repeatable = FilterOption_t::onePerQuery;
206  filtopt.defaultValue = "1";
207  filtopt.validValues.push_back("1");
208  filtopt.validValues.push_back("1000");
209  filterOptions["StartResults"] = filtopt;
210
211  // -- onePerQuery EndResults     integer
212  filtopt.clear();
213  filtopt.name = "EndResults";
214  filtopt.type = FilterOption_t::integert;
215  filtopt.repeatable = FilterOption_t::onePerQuery;
216  filtopt.defaultValue = "10";
217  filtopt.validValues.push_back("-1");
218  filtopt.validValues.push_back("1000");
219  filterOptions["EndResults"] = filtopt;
220
221  // -- onePerQuery QueryType      enumerated (boolean, ranked)
222  filtopt.clear();
223  filtopt.name = "QueryType";
224  filtopt.type = FilterOption_t::enumeratedt;
225  filtopt.repeatable = FilterOption_t::onePerQuery;
226  filtopt.defaultValue = "ranked";
227  filtopt.validValues.push_back("boolean");
228  filtopt.validValues.push_back("ranked");
229  filterOptions["QueryType"] = filtopt;
230
231  // -- onePerQuery MatchMode      enumerated (some, all)
232  filtopt.clear();
233  filtopt.name = "MatchMode";
234  filtopt.type = FilterOption_t::enumeratedt;
235  filtopt.repeatable = FilterOption_t::onePerQuery;
236  filtopt.defaultValue = "some";
237  filtopt.validValues.push_back("some");
238  filtopt.validValues.push_back("all");
239  filterOptions["MatchMode"] = filtopt;
240
241  // -- onePerTerm  Term           string ???
242  filtopt.clear();
243  filtopt.name = "Term";
244  filtopt.type = FilterOption_t::stringt;
245  filtopt.repeatable = FilterOption_t::onePerTerm;
246  filtopt.defaultValue = "";
247  filterOptions["Term"] = filtopt;
248
249  // -- onePerTerm  Casefold       boolean
250  filtopt.clear();
251  filtopt.name = "Casefold";
252  filtopt.type = FilterOption_t::booleant;
253  filtopt.repeatable = FilterOption_t::onePerTerm;
254  filtopt.defaultValue = "true";
255  filtopt.validValues.push_back("false");
256  filtopt.validValues.push_back("true");
257  filterOptions["Casefold"] = filtopt;
258
259  // -- onePerTerm  Stem           boolean
260  filtopt.clear();
261  filtopt.name = "Stem";
262  filtopt.type = FilterOption_t::booleant;
263  filtopt.repeatable = FilterOption_t::onePerTerm;
264  filtopt.defaultValue = "false";
265  filtopt.validValues.push_back("false");
266  filtopt.validValues.push_back("true");
267  filterOptions["Stem"] = filtopt;
268
269  // -- onePerTerm  AccentFold           boolean
270  filtopt.clear();
271  filtopt.name = "AccentFold";
272  filtopt.type = FilterOption_t::booleant;
273  filtopt.repeatable = FilterOption_t::onePerTerm;
274  filtopt.defaultValue = "false";
275  filtopt.validValues.push_back("false");
276  filtopt.validValues.push_back("true");
277  filterOptions["AccentFold"] = filtopt;
278 
279  // -- onePerTerm  Index          enumerated
280  filtopt.clear();
281  filtopt.name = "Index";
282  filtopt.type = FilterOption_t::enumeratedt;
283  filtopt.repeatable = FilterOption_t::onePerTerm;
284  filtopt.defaultValue = "";
285  filterOptions["Index"] = filtopt;
286
287  // -- onePerTerm  Subcollection  enumerated
288  filtopt.clear();
289  filtopt.name = "Subcollection";
290  filtopt.type = FilterOption_t::enumeratedt;
291  filtopt.repeatable = FilterOption_t::onePerTerm;
292  filtopt.defaultValue = "";
293  filterOptions["Subcollection"] = filtopt;
294
295  // -- onePerTerm  Language  enumerated
296  filtopt.clear();
297  filtopt.name = "Language";
298  filtopt.type = FilterOption_t::enumeratedt;
299  filtopt.repeatable = FilterOption_t::onePerTerm;
300  filtopt.defaultValue = "";
301  filterOptions["Language"] = filtopt;
302
303  // -- onePerQuery  Maxdocs  integer
304  filtopt.clear();
305  filtopt.name = "Maxdocs";
306  filtopt.type = FilterOption_t::integert;
307  filtopt.repeatable = FilterOption_t::onePerQuery;
308  filtopt.defaultValue = "200";
309  filtopt.validValues.push_back("-1");
310  filtopt.validValues.push_back("1000");
311  filterOptions["Maxdocs"] = filtopt;
312
313  // -- onePerQuery  PhraseMatch  enumerated
314  filtopt.clear();
315  filtopt.name = "PhraseMatch";
316  filtopt.type = FilterOption_t::enumeratedt;
317  filtopt.repeatable = FilterOption_t::onePerQuery;
318  filtopt.defaultValue = "some_phrases";
319  filtopt.validValues.push_back ("all_phrases");
320  filtopt.validValues.push_back ("some_phrases");
321  filtopt.validValues.push_back ("all_docs");
322  filterOptions["PhraseMatch"] = filtopt;
323}
324
325queryfilterclass::~queryfilterclass () {
326  // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
327}
328
329void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
330  filterclass::configure (key, cfgline);
331
332  if (key == "indexmap") {
333    indexmap.importmap (cfgline);
334   
335    // update the list of indexes in the filter information
336    text_tarray options;
337    indexmap.gettoarray (options);
338    filterOptions["Index"].validValues = options;
339
340  } else if (key == "defaultindex") {
341    indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
342
343  } else if (key == "subcollectionmap") {
344    subcollectionmap.importmap (cfgline);
345
346    // update the list of subcollections in the filter information
347    text_tarray options;
348    subcollectionmap.gettoarray (options);
349    filterOptions["Subcollection"].validValues = options;
350
351  } else if (key == "defaultsubcollection") {
352    subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
353
354  } else if (key == "languagemap") {
355    languagemap.importmap (cfgline);
356
357    // update the list of languages in the filter information
358    text_tarray options;
359    languagemap.gettoarray (options);
360    filterOptions["Language"].validValues = options;
361
362  } else if (key == "defaultlanguage") {
363    languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
364  } else if (key == "indexstem") {
365    indexstem = cfgline[0];
366  } else if (key == "maxnumeric") {
367    maxnumeric = cfgline[0].getint();
368  }
369 
370}
371
372bool queryfilterclass::init (ostream &logout) {
373  outconvertclass text_t2ascii;
374
375  if (!filterclass::init(logout)) return false;
376
377  if (filterOptions["Index"].defaultValue.empty()) {
378    // use first index in map as default if no default is set explicitly
379    text_tarray fromarray;
380    indexmap.getfromarray(fromarray);
381    if (fromarray.size()) {
382      filterOptions["Index"].defaultValue = fromarray[0];
383    }
384  }
385
386  if (filterOptions["Subcollection"].defaultValue.empty()) {
387    // use first subcollection in map as default if no default is set explicitly
388    text_tarray fromarray;
389    subcollectionmap.getfromarray(fromarray);
390    if (fromarray.size()) {
391      filterOptions["Subcollection"].defaultValue = fromarray[0];
392    }
393  }
394
395  if (filterOptions["Language"].defaultValue.empty()) {
396    // use first language in map as default if no default is set explicitly
397    text_tarray fromarray;
398    languagemap.getfromarray(fromarray);
399    if (fromarray.size()) {
400      filterOptions["Language"].defaultValue = fromarray[0];
401    }
402  }
403
404  if (db_ptr == NULL) {
405    // most likely a configuration problem
406    logout << text_t2ascii
407       << "configuration error: queryfilter contains a null dbclass\n\n";
408    return false;
409  }
410
411  // get the filename for the database and make sure it exists
412  if (indexstem.empty()) {
413    indexstem = collection;
414  }
415  db_filename = filename_cat(dbhome, "collect", collection, "index", "text", indexstem);
416  db_filename += db_ptr->getfileextension();
417  if (!file_exists(db_filename)) {
418    logout << text_t2ascii
419       << "warning: database \"" << db_filename << "\" does not exist\n\n";
420    //return false;
421  }
422
423  return true;
424}
425
Note: See TracBrowser for help on using the browser.