source: main/trunk/greenstone2/runtime-src/src/colservr/queryfilter.cpp@ 28762

Last change on this file since 28762 was 27084, checked in by kjdon, 11 years ago

more tidying up of queryfilters. moved some stuff to the lucene class, and added 'virtual's so the right method actually gets used

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.5 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28
29
30
31/////////////////////////////////
32// functions for queryfilterclass
33/////////////////////////////////
34
35
36queryfilterclass::queryfilterclass () {
37 db_ptr = NULL;
38 textsearchptr = NULL;
39 maxnumeric = 4;
40
41 FilterOption_t filtopt;
42 filtopt.name = "CombineQuery";
43 filtopt.type = FilterOption_t::enumeratedt;
44 filtopt.repeatable = FilterOption_t::onePerQuery;
45 filtopt.defaultValue = "and";
46 filtopt.validValues.push_back("and");
47 filtopt.validValues.push_back("or");
48 filtopt.validValues.push_back("not");
49 filterOptions["CombineQuery"] = filtopt;
50
51 // -- onePerQuery StartResults integer
52 filtopt.clear();
53 filtopt.name = "StartResults";
54 filtopt.type = FilterOption_t::integert;
55 filtopt.repeatable = FilterOption_t::onePerQuery;
56 filtopt.defaultValue = "1";
57 filtopt.validValues.push_back("1");
58 filtopt.validValues.push_back("1000");
59 filterOptions["StartResults"] = filtopt;
60
61 // -- onePerQuery EndResults integer
62 filtopt.clear();
63 filtopt.name = "EndResults";
64 filtopt.type = FilterOption_t::integert;
65 filtopt.repeatable = FilterOption_t::onePerQuery;
66 filtopt.defaultValue = "10";
67 filtopt.validValues.push_back("-1");
68 filtopt.validValues.push_back("1000");
69 filterOptions["EndResults"] = filtopt;
70
71 // -- onePerQuery QueryType enumerated (boolean, ranked)
72 filtopt.clear();
73 filtopt.name = "QueryType";
74 filtopt.type = FilterOption_t::enumeratedt;
75 filtopt.repeatable = FilterOption_t::onePerQuery;
76 filtopt.defaultValue = "ranked";
77 filtopt.validValues.push_back("boolean");
78 filtopt.validValues.push_back("ranked");
79 filterOptions["QueryType"] = filtopt;
80
81 // -- onePerQuery MatchMode enumerated (some, all)
82 filtopt.clear();
83 filtopt.name = "MatchMode";
84 filtopt.type = FilterOption_t::enumeratedt;
85 filtopt.repeatable = FilterOption_t::onePerQuery;
86 filtopt.defaultValue = "some";
87 filtopt.validValues.push_back("some");
88 filtopt.validValues.push_back("all");
89 filterOptions["MatchMode"] = filtopt;
90
91 // -- onePerTerm Term string ???
92 filtopt.clear();
93 filtopt.name = "Term";
94 filtopt.type = FilterOption_t::stringt;
95 filtopt.repeatable = FilterOption_t::onePerTerm;
96 filtopt.defaultValue = "";
97 filterOptions["Term"] = filtopt;
98
99 // -- onePerTerm Casefold boolean
100 filtopt.clear();
101 filtopt.name = "Casefold";
102 filtopt.type = FilterOption_t::booleant;
103 filtopt.repeatable = FilterOption_t::onePerTerm;
104 filtopt.defaultValue = "true";
105 filtopt.validValues.push_back("false");
106 filtopt.validValues.push_back("true");
107 filterOptions["Casefold"] = filtopt;
108
109 // -- onePerTerm Stem boolean
110 filtopt.clear();
111 filtopt.name = "Stem";
112 filtopt.type = FilterOption_t::booleant;
113 filtopt.repeatable = FilterOption_t::onePerTerm;
114 filtopt.defaultValue = "false";
115 filtopt.validValues.push_back("false");
116 filtopt.validValues.push_back("true");
117 filterOptions["Stem"] = filtopt;
118
119 // -- onePerTerm AccentFold boolean
120 filtopt.clear();
121 filtopt.name = "AccentFold";
122 filtopt.type = FilterOption_t::booleant;
123 filtopt.repeatable = FilterOption_t::onePerTerm;
124 filtopt.defaultValue = "false";
125 filtopt.validValues.push_back("false");
126 filtopt.validValues.push_back("true");
127 filterOptions["AccentFold"] = filtopt;
128
129 // -- onePerTerm Index enumerated
130 filtopt.clear();
131 filtopt.name = "Index";
132 filtopt.type = FilterOption_t::enumeratedt;
133 filtopt.repeatable = FilterOption_t::onePerTerm;
134 filtopt.defaultValue = "";
135 filterOptions["Index"] = filtopt;
136
137 // -- onePerTerm Subcollection enumerated
138 filtopt.clear();
139 filtopt.name = "Subcollection";
140 filtopt.type = FilterOption_t::enumeratedt;
141 filtopt.repeatable = FilterOption_t::onePerTerm;
142 filtopt.defaultValue = "";
143 filterOptions["Subcollection"] = filtopt;
144
145 // -- onePerTerm Language enumerated
146 filtopt.clear();
147 filtopt.name = "Language";
148 filtopt.type = FilterOption_t::enumeratedt;
149 filtopt.repeatable = FilterOption_t::onePerTerm;
150 filtopt.defaultValue = "";
151 filterOptions["Language"] = filtopt;
152
153 // -- onePerQuery Maxdocs integer
154 filtopt.clear();
155 filtopt.name = "Maxdocs";
156 filtopt.type = FilterOption_t::integert;
157 filtopt.repeatable = FilterOption_t::onePerQuery;
158 filtopt.defaultValue = "200";
159 filtopt.validValues.push_back("-1");
160 filtopt.validValues.push_back("1000");
161 filterOptions["Maxdocs"] = filtopt;
162
163}
164
165queryfilterclass::~queryfilterclass () {
166 // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
167}
168
169void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
170 filterclass::configure (key, cfgline);
171
172 if (key == "indexmap") {
173 indexmap.importmap (cfgline);
174
175 // update the list of indexes in the filter information
176 text_tarray options;
177 indexmap.gettoarray (options);
178 filterOptions["Index"].validValues = options;
179
180 } else if (key == "defaultindex") {
181 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
182
183 } else if (key == "subcollectionmap") {
184 subcollectionmap.importmap (cfgline);
185
186 // update the list of subcollections in the filter information
187 text_tarray options;
188 subcollectionmap.gettoarray (options);
189 filterOptions["Subcollection"].validValues = options;
190
191 } else if (key == "defaultsubcollection") {
192 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
193
194 } else if (key == "languagemap") {
195 languagemap.importmap (cfgline);
196
197 // update the list of languages in the filter information
198 text_tarray options;
199 languagemap.gettoarray (options);
200 filterOptions["Language"].validValues = options;
201
202 } else if (key == "defaultlanguage") {
203 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
204 } else if (key == "indexstem") {
205 indexstem = cfgline[0];
206 } else if (key == "maxnumeric") {
207 maxnumeric = cfgline[0].getint();
208 }
209
210}
211
212bool queryfilterclass::init (ostream &logout) {
213 outconvertclass text_t2ascii;
214
215 if (!filterclass::init(logout)) return false;
216
217 if (filterOptions["Index"].defaultValue.empty()) {
218 // use first index in map as default if no default is set explicitly
219 text_tarray fromarray;
220 indexmap.getfromarray(fromarray);
221 if (fromarray.size()) {
222 filterOptions["Index"].defaultValue = fromarray[0];
223 }
224 }
225
226 if (filterOptions["Subcollection"].defaultValue.empty()) {
227 // use first subcollection in map as default if no default is set explicitly
228 text_tarray fromarray;
229 subcollectionmap.getfromarray(fromarray);
230 if (fromarray.size()) {
231 filterOptions["Subcollection"].defaultValue = fromarray[0];
232 }
233 }
234
235 if (filterOptions["Language"].defaultValue.empty()) {
236 // use first language in map as default if no default is set explicitly
237 text_tarray fromarray;
238 languagemap.getfromarray(fromarray);
239 if (fromarray.size()) {
240 filterOptions["Language"].defaultValue = fromarray[0];
241 }
242 }
243
244 if (db_ptr == NULL) {
245 // most likely a configuration problem
246 logout << text_t2ascii
247 << "configuration error: queryfilter contains a null dbclass\n\n";
248 return false;
249 }
250
251 // get the filename for the database and make sure it exists
252 if (indexstem.empty()) {
253 indexstem = collection;
254 }
255 db_filename = resolve_db_filename(indexstem,db_ptr->getfileextension());
256 if (!file_exists(db_filename)) {
257 logout << text_t2ascii
258 << "warning: database \"" << db_filename << "\" does not exist\n\n";
259 //return false;
260 }
261
262 return true;
263}
264
265void queryfilterclass::set_queryparam_defaults(queryparamclass &query ) {
266
267 query.collection = collection;
268 query.index = filterOptions["Index"].defaultValue;
269 query.subcollection = filterOptions["Subcollection"].defaultValue;
270 query.language = filterOptions["Language"].defaultValue;
271 query.querystring.clear();
272 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
273 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
274 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
275 query.stemming = (filterOptions["Stem"].defaultValue == "true");
276 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
277 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
278 query.level = filterOptions["Level"].defaultValue;
279 query.maxnumeric = maxnumeric;
280
281}
282
283bool queryfilterclass::set_queryparam_field(const OptionValue_t &option, queryparamclass &query) {
284
285 if (option.name == "QueryType") {
286 query.search_type = (option.value == "ranked");
287 return true;
288 }
289 if (option.name == "MatchMode") {
290 query.match_mode = (option.value == "all");
291 if (query.match_mode == 1) query.maxdocs = -1;
292 return true;
293 }
294 if (option.name == "Term") {
295 query.querystring = option.value;
296 return true;
297 }
298 if (option.name == "Casefold") {
299 query.casefolding = (option.value == "true");
300 return true;
301 }
302 if (option.name == "Stem") {
303 query.stemming = (option.value == "true");
304 return true;
305 }
306 if (option.name == "AccentFold") {
307 query.accentfolding = (option.value == "true");
308 return true;
309 }
310 if (option.name == "Index"&& option.value !="") {
311 query.index = option.value;
312 return true;
313 }
314 if (option.name == "Subcollection") {
315 query.subcollection = option.value;
316 return true;
317 }
318 if (option.name == "Language") {
319 query.language = option.value;
320 return true;
321 }
322 if (option.name == "Maxdocs") {
323 query.maxdocs = option.value.getint();
324 return true;
325 }
326 if (option.name == "Level") {
327 query.level = option.value;
328 return true;
329 }
330
331 return false;
332}
333// get the query parameters
334void queryfilterclass::parse_query_params (const FilterRequest_t &request,
335 vector<queryparamclass> &query_params,
336 int &startresults, int &endresults,
337 ostream &logout) {
338 outconvertclass text_t2ascii;
339 // set defaults for the return parameters
340 query_params.erase(query_params.begin(), query_params.end());
341 startresults = filterOptions["StartResults"].defaultValue.getint();
342 endresults = filterOptions["EndResults"].defaultValue.getint();
343
344 // set defaults for query parameters
345 queryparamclass query;
346 query.combinequery = "or"; // first one must be "or"
347 set_queryparam_defaults(query);
348 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
349 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
350 while (options_here != options_end) {
351 if ((*options_here).name == "CombineQuery") {
352 // add this query
353
354 // "all", needed when combining queries where the document results are needed
355 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
356 query_params.push_back (query);
357
358 // start on next query
359 query.clear();
360 query.combinequery = (*options_here).value;
361
362 // set defaults for query parameters
363 set_queryparam_defaults(query);
364
365 // "all", needed when combining queries where the document results are needed
366 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
367
368 } else if ((*options_here).name == "StartResults") {
369 startresults = (*options_here).value.getint();
370 } else if ((*options_here).name == "EndResults") {
371 endresults = (*options_here).value.getint();
372 } else if (!set_queryparam_field(*options_here, query)) {
373 logout << text_t2ascii
374 << "warning: unknown queryfilter option \""
375 << (*options_here).name
376 << "\" ignored.\n\n";
377 }
378
379 ++options_here;
380 }
381
382 // Store the start and end results in the query too, as lucene now needs to
383 // pass them through to the Java
384 query.startresults = startresults;
385 query.endresults = endresults;
386
387 // add the last query
388 query_params.push_back (query);
389}
390
391
392
393// translate will return true if successful
394bool queryfilterclass::translate (dbclass *db_ptr, text_t& docnum, text_t &trans_OID) {
395 infodbclass info;
396
397 trans_OID.clear();
398
399 // get the info
400 if (db_ptr == NULL) return false;
401 if (!db_ptr->getinfo(docnum, info)) return false;
402
403 // translate
404 if (info["section"].empty()) return false;
405
406 trans_OID = info["section"];
407 return true;
408}
409
410
411// whether document results are needed
412bool queryfilterclass::need_matching_docs (int filterResultOptions) {
413 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
414 (filterResultOptions & FRmetadata));
415}
416
417// whether term information is needed
418bool queryfilterclass::need_term_info (int filterResultOptions) {
419 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
420}
Note: See TracBrowser for help on using the repository browser.