source: main/trunk/greenstone2/runtime-src/src/colservr/queryfilter.cpp@ 21772

Last change on this file since 21772 was 16445, checked in by mdewsnip, 16 years ago

Search result document numbers are now represented with a text_t rather than an int, in preparation for changing Lucene to return the Greenstone document OIDs directly rather than looking them up as a separate step. This is better for efficiency and is also required for incremental building.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28
29
30// translate will return true if successful
31bool queryfilterclass::translate (dbclass *db_ptr, text_t& docnum, text_t &trans_OID) {
32 infodbclass info;
33
34 trans_OID.clear();
35
36 // get the info
37 if (db_ptr == NULL) return false;
38 if (!db_ptr->getinfo(docnum, info)) return false;
39
40 // translate
41 if (info["section"].empty()) return false;
42
43 trans_OID = info["section"];
44 return true;
45}
46
47
48// whether document results are needed
49bool queryfilterclass::need_matching_docs (int filterResultOptions) {
50 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
51 (filterResultOptions & FRmetadata));
52}
53
54// whether term information is needed
55bool queryfilterclass::need_term_info (int filterResultOptions) {
56 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
57}
58
59/////////////////////////////////
60// functions for queryfilterclass
61/////////////////////////////////
62
63// get the query parameters
64void queryfilterclass::parse_query_params (const FilterRequest_t &request,
65 vector<queryparamclass> &query_params,
66 int &startresults, int &endresults,
67 text_t &phrasematch, ostream &logout) {
68 outconvertclass text_t2ascii;
69
70 // set defaults for the return parameters
71 query_params.erase(query_params.begin(), query_params.end());
72 startresults = filterOptions["StartResults"].defaultValue.getint();
73 endresults = filterOptions["EndResults"].defaultValue.getint();
74 phrasematch = filterOptions["PhraseMatch"].defaultValue;
75
76 // set defaults for query parameters
77 queryparamclass query;
78 query.combinequery = "or"; // first one must be "or"
79 query.collection = collection;
80 query.index = filterOptions["Index"].defaultValue;
81 query.subcollection = filterOptions["Subcollection"].defaultValue;
82 query.language = filterOptions["Language"].defaultValue;
83 query.querystring.clear();
84 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
85 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
86 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
87 query.stemming = (filterOptions["Stem"].defaultValue == "true");
88 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
89 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
90 query.level = filterOptions["Level"].defaultValue;
91 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
92 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
93 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
94 query.maxnumeric = maxnumeric;
95 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
96 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
97 while (options_here != options_end) {
98 if ((*options_here).name == "CombineQuery") {
99 // add this query
100
101 // "all", needed when combining queries where the document results are needed
102 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
103 query_params.push_back (query);
104
105 // start on next query
106 query.clear();
107 query.combinequery = (*options_here).value;
108
109 // set defaults for query parameters
110 query.collection = collection;
111 query.index = filterOptions["Index"].defaultValue;
112 query.subcollection = filterOptions["Subcollection"].defaultValue;
113 query.language = filterOptions["Language"].defaultValue;
114 query.querystring.clear();
115 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
116 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
117 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
118 query.stemming = (filterOptions["Stem"].defaultValue == "true");
119 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
120 query.level = filterOptions["Level"].defaultValue;
121 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
122 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
123 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
124 query.maxnumeric = maxnumeric;
125 // "all", needed when combining queries where the document results are needed
126 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
127 else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
128
129 } else if ((*options_here).name == "StartResults") {
130 startresults = (*options_here).value.getint();
131 } else if ((*options_here).name == "EndResults") {
132 endresults = (*options_here).value.getint();
133 } else if ((*options_here).name == "QueryType") {
134 query.search_type = ((*options_here).value == "ranked");
135 } else if ((*options_here).name == "MatchMode") {
136 query.match_mode = ((*options_here).value == "all");
137 if (query.match_mode == 1) query.maxdocs = -1;
138 } else if ((*options_here).name == "Term") {
139 query.querystring = (*options_here).value;
140 } else if ((*options_here).name == "Casefold") {
141 query.casefolding = ((*options_here).value == "true");
142 } else if ((*options_here).name == "Stem") {
143 query.stemming = ((*options_here).value == "true");
144 } else if ((*options_here).name == "AccentFold") {
145 query.accentfolding = ((*options_here).value == "true");
146 } else if ((*options_here).name == "Index"&& (*options_here).value !="") {
147 query.index = (*options_here).value;
148 } else if ((*options_here).name == "Subcollection") {
149 query.subcollection = (*options_here).value;
150 } else if ((*options_here).name == "Language") {
151 query.language = (*options_here).value;
152 } else if ((*options_here).name == "Maxdocs") {
153 query.maxdocs = (*options_here).value.getint();
154 } else if ((*options_here).name == "PhraseMatch") {
155 phrasematch = (*options_here).value;
156 } else if ((*options_here).name == "Level") {
157 query.level = (*options_here).value;
158 } else if ((*options_here).name == "FilterString") {
159 query.filterstring = (*options_here).value;
160 } else if ((*options_here).name == "SortField") {
161 query.sortfield = (*options_here).value;
162 } else if ((*options_here).name == "Fuzziness") {
163 query.fuzziness = (*options_here).value;
164 } else {
165 logout << text_t2ascii
166 << "warning: unknown queryfilter option \""
167 << (*options_here).name
168 << "\" ignored.\n\n";
169 }
170
171 ++options_here;
172 }
173
174 // Store the start and end results in the query too, as lucene now needs to
175 // pass them through to the Java
176 query.startresults = startresults;
177 query.endresults = endresults;
178
179 // add the last query
180 query_params.push_back (query);
181}
182
183
184
185
186queryfilterclass::queryfilterclass () {
187 db_ptr = NULL;
188 textsearchptr = NULL;
189 maxnumeric = 4;
190
191 FilterOption_t filtopt;
192 filtopt.name = "CombineQuery";
193 filtopt.type = FilterOption_t::enumeratedt;
194 filtopt.repeatable = FilterOption_t::onePerQuery;
195 filtopt.defaultValue = "and";
196 filtopt.validValues.push_back("and");
197 filtopt.validValues.push_back("or");
198 filtopt.validValues.push_back("not");
199 filterOptions["CombineQuery"] = filtopt;
200
201 // -- onePerQuery StartResults integer
202 filtopt.clear();
203 filtopt.name = "StartResults";
204 filtopt.type = FilterOption_t::integert;
205 filtopt.repeatable = FilterOption_t::onePerQuery;
206 filtopt.defaultValue = "1";
207 filtopt.validValues.push_back("1");
208 filtopt.validValues.push_back("1000");
209 filterOptions["StartResults"] = filtopt;
210
211 // -- onePerQuery EndResults integer
212 filtopt.clear();
213 filtopt.name = "EndResults";
214 filtopt.type = FilterOption_t::integert;
215 filtopt.repeatable = FilterOption_t::onePerQuery;
216 filtopt.defaultValue = "10";
217 filtopt.validValues.push_back("-1");
218 filtopt.validValues.push_back("1000");
219 filterOptions["EndResults"] = filtopt;
220
221 // -- onePerQuery QueryType enumerated (boolean, ranked)
222 filtopt.clear();
223 filtopt.name = "QueryType";
224 filtopt.type = FilterOption_t::enumeratedt;
225 filtopt.repeatable = FilterOption_t::onePerQuery;
226 filtopt.defaultValue = "ranked";
227 filtopt.validValues.push_back("boolean");
228 filtopt.validValues.push_back("ranked");
229 filterOptions["QueryType"] = filtopt;
230
231 // -- onePerQuery MatchMode enumerated (some, all)
232 filtopt.clear();
233 filtopt.name = "MatchMode";
234 filtopt.type = FilterOption_t::enumeratedt;
235 filtopt.repeatable = FilterOption_t::onePerQuery;
236 filtopt.defaultValue = "some";
237 filtopt.validValues.push_back("some");
238 filtopt.validValues.push_back("all");
239 filterOptions["MatchMode"] = filtopt;
240
241 // -- onePerTerm Term string ???
242 filtopt.clear();
243 filtopt.name = "Term";
244 filtopt.type = FilterOption_t::stringt;
245 filtopt.repeatable = FilterOption_t::onePerTerm;
246 filtopt.defaultValue = "";
247 filterOptions["Term"] = filtopt;
248
249 // -- onePerTerm Casefold boolean
250 filtopt.clear();
251 filtopt.name = "Casefold";
252 filtopt.type = FilterOption_t::booleant;
253 filtopt.repeatable = FilterOption_t::onePerTerm;
254 filtopt.defaultValue = "true";
255 filtopt.validValues.push_back("false");
256 filtopt.validValues.push_back("true");
257 filterOptions["Casefold"] = filtopt;
258
259 // -- onePerTerm Stem boolean
260 filtopt.clear();
261 filtopt.name = "Stem";
262 filtopt.type = FilterOption_t::booleant;
263 filtopt.repeatable = FilterOption_t::onePerTerm;
264 filtopt.defaultValue = "false";
265 filtopt.validValues.push_back("false");
266 filtopt.validValues.push_back("true");
267 filterOptions["Stem"] = filtopt;
268
269 // -- onePerTerm AccentFold boolean
270 filtopt.clear();
271 filtopt.name = "AccentFold";
272 filtopt.type = FilterOption_t::booleant;
273 filtopt.repeatable = FilterOption_t::onePerTerm;
274 filtopt.defaultValue = "false";
275 filtopt.validValues.push_back("false");
276 filtopt.validValues.push_back("true");
277 filterOptions["AccentFold"] = filtopt;
278
279 // -- onePerTerm Index enumerated
280 filtopt.clear();
281 filtopt.name = "Index";
282 filtopt.type = FilterOption_t::enumeratedt;
283 filtopt.repeatable = FilterOption_t::onePerTerm;
284 filtopt.defaultValue = "";
285 filterOptions["Index"] = filtopt;
286
287 // -- onePerTerm Subcollection enumerated
288 filtopt.clear();
289 filtopt.name = "Subcollection";
290 filtopt.type = FilterOption_t::enumeratedt;
291 filtopt.repeatable = FilterOption_t::onePerTerm;
292 filtopt.defaultValue = "";
293 filterOptions["Subcollection"] = filtopt;
294
295 // -- onePerTerm Language enumerated
296 filtopt.clear();
297 filtopt.name = "Language";
298 filtopt.type = FilterOption_t::enumeratedt;
299 filtopt.repeatable = FilterOption_t::onePerTerm;
300 filtopt.defaultValue = "";
301 filterOptions["Language"] = filtopt;
302
303 // -- onePerQuery Maxdocs integer
304 filtopt.clear();
305 filtopt.name = "Maxdocs";
306 filtopt.type = FilterOption_t::integert;
307 filtopt.repeatable = FilterOption_t::onePerQuery;
308 filtopt.defaultValue = "200";
309 filtopt.validValues.push_back("-1");
310 filtopt.validValues.push_back("1000");
311 filterOptions["Maxdocs"] = filtopt;
312
313 // -- onePerQuery PhraseMatch enumerated
314 filtopt.clear();
315 filtopt.name = "PhraseMatch";
316 filtopt.type = FilterOption_t::enumeratedt;
317 filtopt.repeatable = FilterOption_t::onePerQuery;
318 filtopt.defaultValue = "some_phrases";
319 filtopt.validValues.push_back ("all_phrases");
320 filtopt.validValues.push_back ("some_phrases");
321 filtopt.validValues.push_back ("all_docs");
322 filterOptions["PhraseMatch"] = filtopt;
323}
324
325queryfilterclass::~queryfilterclass () {
326 // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
327}
328
329void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
330 filterclass::configure (key, cfgline);
331
332 if (key == "indexmap") {
333 indexmap.importmap (cfgline);
334
335 // update the list of indexes in the filter information
336 text_tarray options;
337 indexmap.gettoarray (options);
338 filterOptions["Index"].validValues = options;
339
340 } else if (key == "defaultindex") {
341 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
342
343 } else if (key == "subcollectionmap") {
344 subcollectionmap.importmap (cfgline);
345
346 // update the list of subcollections in the filter information
347 text_tarray options;
348 subcollectionmap.gettoarray (options);
349 filterOptions["Subcollection"].validValues = options;
350
351 } else if (key == "defaultsubcollection") {
352 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
353
354 } else if (key == "languagemap") {
355 languagemap.importmap (cfgline);
356
357 // update the list of languages in the filter information
358 text_tarray options;
359 languagemap.gettoarray (options);
360 filterOptions["Language"].validValues = options;
361
362 } else if (key == "defaultlanguage") {
363 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
364 } else if (key == "indexstem") {
365 indexstem = cfgline[0];
366 } else if (key == "maxnumeric") {
367 maxnumeric = cfgline[0].getint();
368 }
369
370}
371
372bool queryfilterclass::init (ostream &logout) {
373 outconvertclass text_t2ascii;
374
375 if (!filterclass::init(logout)) return false;
376
377 if (filterOptions["Index"].defaultValue.empty()) {
378 // use first index in map as default if no default is set explicitly
379 text_tarray fromarray;
380 indexmap.getfromarray(fromarray);
381 if (fromarray.size()) {
382 filterOptions["Index"].defaultValue = fromarray[0];
383 }
384 }
385
386 if (filterOptions["Subcollection"].defaultValue.empty()) {
387 // use first subcollection in map as default if no default is set explicitly
388 text_tarray fromarray;
389 subcollectionmap.getfromarray(fromarray);
390 if (fromarray.size()) {
391 filterOptions["Subcollection"].defaultValue = fromarray[0];
392 }
393 }
394
395 if (filterOptions["Language"].defaultValue.empty()) {
396 // use first language in map as default if no default is set explicitly
397 text_tarray fromarray;
398 languagemap.getfromarray(fromarray);
399 if (fromarray.size()) {
400 filterOptions["Language"].defaultValue = fromarray[0];
401 }
402 }
403
404 if (db_ptr == NULL) {
405 // most likely a configuration problem
406 logout << text_t2ascii
407 << "configuration error: queryfilter contains a null dbclass\n\n";
408 return false;
409 }
410
411 // get the filename for the database and make sure it exists
412 if (indexstem.empty()) {
413 indexstem = collection;
414 }
415 db_filename = resolve_db_filename(indexstem,db_ptr->getfileextension());
416 if (!file_exists(db_filename)) {
417 logout << text_t2ascii
418 << "warning: database \"" << db_filename << "\" does not exist\n\n";
419 //return false;
420 }
421
422 return true;
423}
424
Note: See TracBrowser for help on using the repository browser.