source: gsdl/trunk/src/colservr/queryfilter.cpp@ 16310

Last change on this file since 16310 was 16310, checked in by davidb, 16 years ago

Introduction of 'collecthome' which parallels 'gsdlhome' to allow the toplevel collect folder to be outside of the gsdlhome area

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28
29
30// translate will return true if successful
31bool queryfilterclass::translate (dbclass *db_ptr, int docnum, text_t &trans_OID) {
32 infodbclass info;
33
34 trans_OID.clear();
35
36 // get the info
37 if (db_ptr == NULL) return false;
38 if (!db_ptr->getinfo(docnum, info)) return false;
39
40 // translate
41 if (info["section"].empty()) return false;
42
43 trans_OID = info["section"];
44 return true;
45}
46
47
48// whether document results are needed
49bool queryfilterclass::need_matching_docs (int filterResultOptions) {
50 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
51 (filterResultOptions & FRmetadata));
52}
53
54// whether term information is needed
55bool queryfilterclass::need_term_info (int filterResultOptions) {
56 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
57}
58
59/////////////////////////////////
60// functions for queryfilterclass
61/////////////////////////////////
62
63// get the query parameters
64void queryfilterclass::parse_query_params (const FilterRequest_t &request,
65 vector<queryparamclass> &query_params,
66 int &startresults, int &endresults,
67 text_t &phrasematch, ostream &logout) {
68 outconvertclass text_t2ascii;
69
70 // set defaults for the return parameters
71 query_params.erase(query_params.begin(), query_params.end());
72 startresults = filterOptions["StartResults"].defaultValue.getint();
73 endresults = filterOptions["EndResults"].defaultValue.getint();
74 phrasematch = filterOptions["PhraseMatch"].defaultValue;
75
76 // set defaults for query parameters
77 queryparamclass query;
78 query.combinequery = "or"; // first one must be "or"
79 query.collection = collection;
80 query.index = filterOptions["Index"].defaultValue;
81 query.subcollection = filterOptions["Subcollection"].defaultValue;
82 query.language = filterOptions["Language"].defaultValue;
83 query.querystring.clear();
84 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
85 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
86 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
87 query.stemming = (filterOptions["Stem"].defaultValue == "true");
88 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
89 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
90 query.level = filterOptions["Level"].defaultValue;
91 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
92 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
93 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
94 query.maxnumeric = maxnumeric;
95 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
96 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
97 while (options_here != options_end) {
98 if ((*options_here).name == "CombineQuery") {
99 // add this query
100
101 // "all", needed when combining queries where the document results are needed
102 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
103 query_params.push_back (query);
104
105 // start on next query
106 query.clear();
107 query.combinequery = (*options_here).value;
108
109 // set defaults for query parameters
110 query.collection = collection;
111 query.index = filterOptions["Index"].defaultValue;
112 query.subcollection = filterOptions["Subcollection"].defaultValue;
113 query.language = filterOptions["Language"].defaultValue;
114 query.querystring.clear();
115 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
116 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
117 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
118 query.stemming = (filterOptions["Stem"].defaultValue == "true");
119 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
120 query.level = filterOptions["Level"].defaultValue;
121 query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
122 query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
123 query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
124 query.maxnumeric = maxnumeric;
125 // "all", needed when combining queries where the document results are needed
126 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
127 else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
128
129 } else if ((*options_here).name == "StartResults") {
130 startresults = (*options_here).value.getint();
131 } else if ((*options_here).name == "EndResults") {
132 endresults = (*options_here).value.getint();
133 } else if ((*options_here).name == "QueryType") {
134 query.search_type = ((*options_here).value == "ranked");
135 } else if ((*options_here).name == "MatchMode") {
136 query.match_mode = ((*options_here).value == "all");
137 if (query.match_mode == 1) query.maxdocs = -1;
138 } else if ((*options_here).name == "Term") {
139 query.querystring = (*options_here).value;
140 } else if ((*options_here).name == "Casefold") {
141 query.casefolding = ((*options_here).value == "true");
142 } else if ((*options_here).name == "Stem") {
143 query.stemming = ((*options_here).value == "true");
144 } else if ((*options_here).name == "AccentFold") {
145 query.accentfolding = ((*options_here).value == "true");
146 } else if ((*options_here).name == "Index"&& (*options_here).value !="") {
147 query.index = (*options_here).value;
148 } else if ((*options_here).name == "Subcollection") {
149 query.subcollection = (*options_here).value;
150 } else if ((*options_here).name == "Language") {
151 query.language = (*options_here).value;
152 } else if ((*options_here).name == "Maxdocs") {
153 query.maxdocs = (*options_here).value.getint();
154 } else if ((*options_here).name == "PhraseMatch") {
155 phrasematch = (*options_here).value;
156 } else if ((*options_here).name == "Level") {
157 query.level = (*options_here).value;
158 } else if ((*options_here).name == "FilterString") {
159 query.filterstring = (*options_here).value;
160 } else if ((*options_here).name == "SortField") {
161 query.sortfield = (*options_here).value;
162 } else if ((*options_here).name == "Fuzziness") {
163 query.fuzziness = (*options_here).value;
164 } else {
165 logout << text_t2ascii
166 << "warning: unknown queryfilter option \""
167 << (*options_here).name
168 << "\" ignored.\n\n";
169 }
170
171 ++options_here;
172 }
173
174 // Store the start and end results in the query too, as lucene now needs to
175 // pass them through to the Java
176 query.startresults = startresults;
177 query.endresults = endresults;
178
179 // add the last query
180 query_params.push_back (query);
181}
182
183
184
185
186queryfilterclass::queryfilterclass () {
187 db_ptr = NULL;
188 textsearchptr = NULL;
189 maxnumeric = 4;
190
191 FilterOption_t filtopt;
192 filtopt.name = "CombineQuery";
193 filtopt.type = FilterOption_t::enumeratedt;
194 filtopt.repeatable = FilterOption_t::onePerQuery;
195 filtopt.defaultValue = "and";
196 filtopt.validValues.push_back("and");
197 filtopt.validValues.push_back("or");
198 filtopt.validValues.push_back("not");
199 filterOptions["CombineQuery"] = filtopt;
200
201 // -- onePerQuery StartResults integer
202 filtopt.clear();
203 filtopt.name = "StartResults";
204 filtopt.type = FilterOption_t::integert;
205 filtopt.repeatable = FilterOption_t::onePerQuery;
206 filtopt.defaultValue = "1";
207 filtopt.validValues.push_back("1");
208 filtopt.validValues.push_back("1000");
209 filterOptions["StartResults"] = filtopt;
210
211 // -- onePerQuery EndResults integer
212 filtopt.clear();
213 filtopt.name = "EndResults";
214 filtopt.type = FilterOption_t::integert;
215 filtopt.repeatable = FilterOption_t::onePerQuery;
216 filtopt.defaultValue = "10";
217 filtopt.validValues.push_back("-1");
218 filtopt.validValues.push_back("1000");
219 filterOptions["EndResults"] = filtopt;
220
221 // -- onePerQuery QueryType enumerated (boolean, ranked)
222 filtopt.clear();
223 filtopt.name = "QueryType";
224 filtopt.type = FilterOption_t::enumeratedt;
225 filtopt.repeatable = FilterOption_t::onePerQuery;
226 filtopt.defaultValue = "ranked";
227 filtopt.validValues.push_back("boolean");
228 filtopt.validValues.push_back("ranked");
229 filterOptions["QueryType"] = filtopt;
230
231 // -- onePerQuery MatchMode enumerated (some, all)
232 filtopt.clear();
233 filtopt.name = "MatchMode";
234 filtopt.type = FilterOption_t::enumeratedt;
235 filtopt.repeatable = FilterOption_t::onePerQuery;
236 filtopt.defaultValue = "some";
237 filtopt.validValues.push_back("some");
238 filtopt.validValues.push_back("all");
239 filterOptions["MatchMode"] = filtopt;
240
241 // -- onePerTerm Term string ???
242 filtopt.clear();
243 filtopt.name = "Term";
244 filtopt.type = FilterOption_t::stringt;
245 filtopt.repeatable = FilterOption_t::onePerTerm;
246 filtopt.defaultValue = "";
247 filterOptions["Term"] = filtopt;
248
249 // -- onePerTerm Casefold boolean
250 filtopt.clear();
251 filtopt.name = "Casefold";
252 filtopt.type = FilterOption_t::booleant;
253 filtopt.repeatable = FilterOption_t::onePerTerm;
254 filtopt.defaultValue = "true";
255 filtopt.validValues.push_back("false");
256 filtopt.validValues.push_back("true");
257 filterOptions["Casefold"] = filtopt;
258
259 // -- onePerTerm Stem boolean
260 filtopt.clear();
261 filtopt.name = "Stem";
262 filtopt.type = FilterOption_t::booleant;
263 filtopt.repeatable = FilterOption_t::onePerTerm;
264 filtopt.defaultValue = "false";
265 filtopt.validValues.push_back("false");
266 filtopt.validValues.push_back("true");
267 filterOptions["Stem"] = filtopt;
268
269 // -- onePerTerm AccentFold boolean
270 filtopt.clear();
271 filtopt.name = "AccentFold";
272 filtopt.type = FilterOption_t::booleant;
273 filtopt.repeatable = FilterOption_t::onePerTerm;
274 filtopt.defaultValue = "false";
275 filtopt.validValues.push_back("false");
276 filtopt.validValues.push_back("true");
277 filterOptions["AccentFold"] = filtopt;
278
279 // -- onePerTerm Index enumerated
280 filtopt.clear();
281 filtopt.name = "Index";
282 filtopt.type = FilterOption_t::enumeratedt;
283 filtopt.repeatable = FilterOption_t::onePerTerm;
284 filtopt.defaultValue = "";
285 filterOptions["Index"] = filtopt;
286
287 // -- onePerTerm Subcollection enumerated
288 filtopt.clear();
289 filtopt.name = "Subcollection";
290 filtopt.type = FilterOption_t::enumeratedt;
291 filtopt.repeatable = FilterOption_t::onePerTerm;
292 filtopt.defaultValue = "";
293 filterOptions["Subcollection"] = filtopt;
294
295 // -- onePerTerm Language enumerated
296 filtopt.clear();
297 filtopt.name = "Language";
298 filtopt.type = FilterOption_t::enumeratedt;
299 filtopt.repeatable = FilterOption_t::onePerTerm;
300 filtopt.defaultValue = "";
301 filterOptions["Language"] = filtopt;
302
303 // -- onePerQuery Maxdocs integer
304 filtopt.clear();
305 filtopt.name = "Maxdocs";
306 filtopt.type = FilterOption_t::integert;
307 filtopt.repeatable = FilterOption_t::onePerQuery;
308 filtopt.defaultValue = "200";
309 filtopt.validValues.push_back("-1");
310 filtopt.validValues.push_back("1000");
311 filterOptions["Maxdocs"] = filtopt;
312
313 // -- onePerQuery PhraseMatch enumerated
314 filtopt.clear();
315 filtopt.name = "PhraseMatch";
316 filtopt.type = FilterOption_t::enumeratedt;
317 filtopt.repeatable = FilterOption_t::onePerQuery;
318 filtopt.defaultValue = "some_phrases";
319 filtopt.validValues.push_back ("all_phrases");
320 filtopt.validValues.push_back ("some_phrases");
321 filtopt.validValues.push_back ("all_docs");
322 filterOptions["PhraseMatch"] = filtopt;
323}
324
325queryfilterclass::~queryfilterclass () {
326 // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
327}
328
329void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
330 filterclass::configure (key, cfgline);
331
332 if (key == "indexmap") {
333 indexmap.importmap (cfgline);
334
335 // update the list of indexes in the filter information
336 text_tarray options;
337 indexmap.gettoarray (options);
338 filterOptions["Index"].validValues = options;
339
340 } else if (key == "defaultindex") {
341 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
342
343 } else if (key == "subcollectionmap") {
344 subcollectionmap.importmap (cfgline);
345
346 // update the list of subcollections in the filter information
347 text_tarray options;
348 subcollectionmap.gettoarray (options);
349 filterOptions["Subcollection"].validValues = options;
350
351 } else if (key == "defaultsubcollection") {
352 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
353
354 } else if (key == "languagemap") {
355 languagemap.importmap (cfgline);
356
357 // update the list of languages in the filter information
358 text_tarray options;
359 languagemap.gettoarray (options);
360 filterOptions["Language"].validValues = options;
361
362 } else if (key == "defaultlanguage") {
363 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
364 } else if (key == "indexstem") {
365 indexstem = cfgline[0];
366 } else if (key == "maxnumeric") {
367 maxnumeric = cfgline[0].getint();
368 }
369
370}
371
372bool queryfilterclass::init (ostream &logout) {
373 outconvertclass text_t2ascii;
374
375 if (!filterclass::init(logout)) return false;
376
377 if (filterOptions["Index"].defaultValue.empty()) {
378 // use first index in map as default if no default is set explicitly
379 text_tarray fromarray;
380 indexmap.getfromarray(fromarray);
381 if (fromarray.size()) {
382 filterOptions["Index"].defaultValue = fromarray[0];
383 }
384 }
385
386 if (filterOptions["Subcollection"].defaultValue.empty()) {
387 // use first subcollection in map as default if no default is set explicitly
388 text_tarray fromarray;
389 subcollectionmap.getfromarray(fromarray);
390 if (fromarray.size()) {
391 filterOptions["Subcollection"].defaultValue = fromarray[0];
392 }
393 }
394
395 if (filterOptions["Language"].defaultValue.empty()) {
396 // use first language in map as default if no default is set explicitly
397 text_tarray fromarray;
398 languagemap.getfromarray(fromarray);
399 if (fromarray.size()) {
400 filterOptions["Language"].defaultValue = fromarray[0];
401 }
402 }
403
404 if (db_ptr == NULL) {
405 // most likely a configuration problem
406 logout << text_t2ascii
407 << "configuration error: queryfilter contains a null dbclass\n\n";
408 return false;
409 }
410
411 // get the filename for the database and make sure it exists
412 if (indexstem.empty()) {
413 indexstem = collection;
414 }
415 db_filename = resolve_db_filename(indexstem,db_ptr->getfileextension());
416 if (!file_exists(db_filename)) {
417 logout << text_t2ascii
418 << "warning: database \"" << db_filename << "\" does not exist\n\n";
419 //return false;
420 }
421
422 return true;
423}
424
Note: See TracBrowser for help on using the repository browser.