source: gsdl/trunk/src/colservr/queryfilter.cpp@ 15580

Last change on this file since 15580 was 15558, checked in by mdewsnip, 16 years ago

(Adding new DB support) Changed lots of "gdbm"s to "db"s, in preparation for adding new DB types.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28#include "gsdltools.h"
29#include <assert.h>
30
31
32// translate will return true if successful
33bool queryfilterclass::translate (dbclass *db_ptr, int docnum, text_t &trans_OID) {
34 infodbclass info;
35
36 trans_OID.clear();
37
38 // get the info
39 if (db_ptr == NULL) return false;
40 if (!db_ptr->getinfo(docnum, info)) return false;
41
42 // translate
43 if (info["section"].empty()) return false;
44
45 trans_OID = info["section"];
46 return true;
47}
48
49
50// whether document results are needed
51bool queryfilterclass::need_matching_docs (int filterResultOptions) {
52 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
53 (filterResultOptions & FRmetadata));
54}
55
56// whether term information is needed
57bool queryfilterclass::need_term_info (int filterResultOptions) {
58 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
59}
60
61/////////////////////////////////
62// functions for queryfilterclass
63/////////////////////////////////
64
65// get the query parameters
66void queryfilterclass::parse_query_params (const FilterRequest_t &request,
67 vector<queryparamclass> &query_params,
68 int &startresults, int &endresults,
69 text_t &phrasematch, ostream &logout) {
70 outconvertclass text_t2ascii;
71
72 // set defaults for the return parameters
73 query_params.erase(query_params.begin(), query_params.end());
74 startresults = filterOptions["StartResults"].defaultValue.getint();
75 endresults = filterOptions["EndResults"].defaultValue.getint();
76 phrasematch = filterOptions["PhraseMatch"].defaultValue;
77
78 // set defaults for query parameters
79 queryparamclass query;
80 query.combinequery = "or"; // first one must be "or"
81 query.collection = collection;
82 query.index = filterOptions["Index"].defaultValue;
83 query.subcollection = filterOptions["Subcollection"].defaultValue;
84 query.language = filterOptions["Language"].defaultValue;
85 query.querystring.clear();
86 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
87 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
88 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
89 query.stemming = (filterOptions["Stem"].defaultValue == "true");
90 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
91 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
92 query.level = filterOptions["Level"].defaultValue;
93 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
94 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
95 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
96 query.maxnumeric = maxnumeric;
97 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
98 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
99 while (options_here != options_end) {
100 if ((*options_here).name == "CombineQuery") {
101 // add this query
102
103 // "all", needed when combining queries where the document results are needed
104 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
105 query_params.push_back (query);
106
107 // start on next query
108 query.clear();
109 query.combinequery = (*options_here).value;
110
111 // set defaults for query parameters
112 query.collection = collection;
113 query.index = filterOptions["Index"].defaultValue;
114 query.subcollection = filterOptions["Subcollection"].defaultValue;
115 query.language = filterOptions["Language"].defaultValue;
116 query.querystring.clear();
117 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
118 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
119 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
120 query.stemming = (filterOptions["Stem"].defaultValue == "true");
121 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
122 query.level = filterOptions["Level"].defaultValue;
123 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
124 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
125 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
126 query.maxnumeric = maxnumeric;
127 // "all", needed when combining queries where the document results are needed
128 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
129 else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
130
131 } else if ((*options_here).name == "StartResults") {
132 startresults = (*options_here).value.getint();
133 } else if ((*options_here).name == "EndResults") {
134 endresults = (*options_here).value.getint();
135 } else if ((*options_here).name == "QueryType") {
136 query.search_type = ((*options_here).value == "ranked");
137 } else if ((*options_here).name == "MatchMode") {
138 query.match_mode = ((*options_here).value == "all");
139 if (query.match_mode == 1) query.maxdocs = -1;
140 } else if ((*options_here).name == "Term") {
141 query.querystring = (*options_here).value;
142 } else if ((*options_here).name == "Casefold") {
143 query.casefolding = ((*options_here).value == "true");
144 } else if ((*options_here).name == "Stem") {
145 query.stemming = ((*options_here).value == "true");
146 } else if ((*options_here).name == "AccentFold") {
147 query.accentfolding = ((*options_here).value == "true");
148 } else if ((*options_here).name == "Index"&& (*options_here).value !="") {
149 query.index = (*options_here).value;
150 } else if ((*options_here).name == "Subcollection") {
151 query.subcollection = (*options_here).value;
152 } else if ((*options_here).name == "Language") {
153 query.language = (*options_here).value;
154 } else if ((*options_here).name == "Maxdocs") {
155 query.maxdocs = (*options_here).value.getint();
156 } else if ((*options_here).name == "PhraseMatch") {
157 phrasematch = (*options_here).value;
158 } else if ((*options_here).name == "Level") {
159 query.level = (*options_here).value;
160 } else if ((*options_here).name == "FilterString") {
161 query.filterstring = (*options_here).value;
162 } else if ((*options_here).name == "SortField") {
163 query.sortfield = (*options_here).value;
164 } else if ((*options_here).name == "Fuzziness") {
165 query.fuzziness = (*options_here).value;
166 } else {
167 logout << text_t2ascii
168 << "warning: unknown queryfilter option \""
169 << (*options_here).name
170 << "\" ignored.\n\n";
171 }
172
173 ++options_here;
174 }
175
176 // Store the start and end results in the query too, as lucene now needs to
177 // pass them through to the Java
178 query.startresults = startresults;
179 query.endresults = endresults;
180
181 // add the last query
182 query_params.push_back (query);
183}
184
185
186
187
188queryfilterclass::queryfilterclass () {
189 db_ptr = NULL;
190 textsearchptr = NULL;
191 maxnumeric = 4;
192
193 FilterOption_t filtopt;
194 filtopt.name = "CombineQuery";
195 filtopt.type = FilterOption_t::enumeratedt;
196 filtopt.repeatable = FilterOption_t::onePerQuery;
197 filtopt.defaultValue = "and";
198 filtopt.validValues.push_back("and");
199 filtopt.validValues.push_back("or");
200 filtopt.validValues.push_back("not");
201 filterOptions["CombineQuery"] = filtopt;
202
203 // -- onePerQuery StartResults integer
204 filtopt.clear();
205 filtopt.name = "StartResults";
206 filtopt.type = FilterOption_t::integert;
207 filtopt.repeatable = FilterOption_t::onePerQuery;
208 filtopt.defaultValue = "1";
209 filtopt.validValues.push_back("1");
210 filtopt.validValues.push_back("1000");
211 filterOptions["StartResults"] = filtopt;
212
213 // -- onePerQuery EndResults integer
214 filtopt.clear();
215 filtopt.name = "EndResults";
216 filtopt.type = FilterOption_t::integert;
217 filtopt.repeatable = FilterOption_t::onePerQuery;
218 filtopt.defaultValue = "10";
219 filtopt.validValues.push_back("-1");
220 filtopt.validValues.push_back("1000");
221 filterOptions["EndResults"] = filtopt;
222
223 // -- onePerQuery QueryType enumerated (boolean, ranked)
224 filtopt.clear();
225 filtopt.name = "QueryType";
226 filtopt.type = FilterOption_t::enumeratedt;
227 filtopt.repeatable = FilterOption_t::onePerQuery;
228 filtopt.defaultValue = "ranked";
229 filtopt.validValues.push_back("boolean");
230 filtopt.validValues.push_back("ranked");
231 filterOptions["QueryType"] = filtopt;
232
233 // -- onePerQuery MatchMode enumerated (some, all)
234 filtopt.clear();
235 filtopt.name = "MatchMode";
236 filtopt.type = FilterOption_t::enumeratedt;
237 filtopt.repeatable = FilterOption_t::onePerQuery;
238 filtopt.defaultValue = "some";
239 filtopt.validValues.push_back("some");
240 filtopt.validValues.push_back("all");
241 filterOptions["MatchMode"] = filtopt;
242
243 // -- onePerTerm Term string ???
244 filtopt.clear();
245 filtopt.name = "Term";
246 filtopt.type = FilterOption_t::stringt;
247 filtopt.repeatable = FilterOption_t::onePerTerm;
248 filtopt.defaultValue = "";
249 filterOptions["Term"] = filtopt;
250
251 // -- onePerTerm Casefold boolean
252 filtopt.clear();
253 filtopt.name = "Casefold";
254 filtopt.type = FilterOption_t::booleant;
255 filtopt.repeatable = FilterOption_t::onePerTerm;
256 filtopt.defaultValue = "true";
257 filtopt.validValues.push_back("false");
258 filtopt.validValues.push_back("true");
259 filterOptions["Casefold"] = filtopt;
260
261 // -- onePerTerm Stem boolean
262 filtopt.clear();
263 filtopt.name = "Stem";
264 filtopt.type = FilterOption_t::booleant;
265 filtopt.repeatable = FilterOption_t::onePerTerm;
266 filtopt.defaultValue = "false";
267 filtopt.validValues.push_back("false");
268 filtopt.validValues.push_back("true");
269 filterOptions["Stem"] = filtopt;
270
271 // -- onePerTerm AccentFold boolean
272 filtopt.clear();
273 filtopt.name = "AccentFold";
274 filtopt.type = FilterOption_t::booleant;
275 filtopt.repeatable = FilterOption_t::onePerTerm;
276 filtopt.defaultValue = "false";
277 filtopt.validValues.push_back("false");
278 filtopt.validValues.push_back("true");
279 filterOptions["AccentFold"] = filtopt;
280
281 // -- onePerTerm Index enumerated
282 filtopt.clear();
283 filtopt.name = "Index";
284 filtopt.type = FilterOption_t::enumeratedt;
285 filtopt.repeatable = FilterOption_t::onePerTerm;
286 filtopt.defaultValue = "";
287 filterOptions["Index"] = filtopt;
288
289 // -- onePerTerm Subcollection enumerated
290 filtopt.clear();
291 filtopt.name = "Subcollection";
292 filtopt.type = FilterOption_t::enumeratedt;
293 filtopt.repeatable = FilterOption_t::onePerTerm;
294 filtopt.defaultValue = "";
295 filterOptions["Subcollection"] = filtopt;
296
297 // -- onePerTerm Language enumerated
298 filtopt.clear();
299 filtopt.name = "Language";
300 filtopt.type = FilterOption_t::enumeratedt;
301 filtopt.repeatable = FilterOption_t::onePerTerm;
302 filtopt.defaultValue = "";
303 filterOptions["Language"] = filtopt;
304
305 // -- onePerQuery Maxdocs integer
306 filtopt.clear();
307 filtopt.name = "Maxdocs";
308 filtopt.type = FilterOption_t::integert;
309 filtopt.repeatable = FilterOption_t::onePerQuery;
310 filtopt.defaultValue = "200";
311 filtopt.validValues.push_back("-1");
312 filtopt.validValues.push_back("1000");
313 filterOptions["Maxdocs"] = filtopt;
314
315 // -- onePerQuery PhraseMatch enumerated
316 filtopt.clear();
317 filtopt.name = "PhraseMatch";
318 filtopt.type = FilterOption_t::enumeratedt;
319 filtopt.repeatable = FilterOption_t::onePerQuery;
320 filtopt.defaultValue = "some_phrases";
321 filtopt.validValues.push_back ("all_phrases");
322 filtopt.validValues.push_back ("some_phrases");
323 filtopt.validValues.push_back ("all_docs");
324 filterOptions["PhraseMatch"] = filtopt;
325}
326
327queryfilterclass::~queryfilterclass () {
328 // don't delete db_ptr or mgsearchptr here, they'll
329 // be cleaned up by mggdbmsource
330}
331
332void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
333 filterclass::configure (key, cfgline);
334
335 if (key == "indexmap") {
336 indexmap.importmap (cfgline);
337
338 // update the list of indexes in the filter information
339 text_tarray options;
340 indexmap.gettoarray (options);
341 filterOptions["Index"].validValues = options;
342
343 } else if (key == "defaultindex") {
344 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
345
346 } else if (key == "subcollectionmap") {
347 subcollectionmap.importmap (cfgline);
348
349 // update the list of subcollections in the filter information
350 text_tarray options;
351 subcollectionmap.gettoarray (options);
352 filterOptions["Subcollection"].validValues = options;
353
354 } else if (key == "defaultsubcollection") {
355 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
356
357 } else if (key == "languagemap") {
358 languagemap.importmap (cfgline);
359
360 // update the list of languages in the filter information
361 text_tarray options;
362 languagemap.gettoarray (options);
363 filterOptions["Language"].validValues = options;
364
365 } else if (key == "defaultlanguage") {
366 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
367 } else if (key == "indexstem") {
368 indexstem = cfgline[0];
369 } else if (key == "maxnumeric") {
370 maxnumeric = cfgline[0].getint();
371 }
372
373}
374
375bool queryfilterclass::init (ostream &logout) {
376 outconvertclass text_t2ascii;
377
378 if (!filterclass::init(logout)) return false;
379
380 if (filterOptions["Index"].defaultValue.empty()) {
381 // use first index in map as default if no default is set explicitly
382 text_tarray fromarray;
383 indexmap.getfromarray(fromarray);
384 if (fromarray.size()) {
385 filterOptions["Index"].defaultValue = fromarray[0];
386 }
387 }
388
389 if (filterOptions["Subcollection"].defaultValue.empty()) {
390 // use first subcollection in map as default if no default is set explicitly
391 text_tarray fromarray;
392 subcollectionmap.getfromarray(fromarray);
393 if (fromarray.size()) {
394 filterOptions["Subcollection"].defaultValue = fromarray[0];
395 }
396 }
397
398 if (filterOptions["Language"].defaultValue.empty()) {
399 // use first language in map as default if no default is set explicitly
400 text_tarray fromarray;
401 languagemap.getfromarray(fromarray);
402 if (fromarray.size()) {
403 filterOptions["Language"].defaultValue = fromarray[0];
404 }
405 }
406
407 // get the filename for the database and make sure it exists
408 if (indexstem.empty()) {
409 indexstem = collection;
410 }
411 db_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", indexstem);
412
413 if (littleEndian()) db_filename += ".ldb";
414 else db_filename += ".bdb";
415
416 if (!file_exists(db_filename)) {
417 logout << text_t2ascii
418 << "warning: database \"" << db_filename << "\" does not exist\n\n";
419 //return false;
420 }
421
422 return true;
423}
424
Note: See TracBrowser for help on using the repository browser.