source: main/trunk/greenstone2/runtime-src/src/colservr/queryfilter.cpp@ 31388

Last change on this file since 31388 was 31388, checked in by ak19, 7 years ago

Second commit to do with implementing OAI deletion policy for GS2. This commit is only loosely related, as it shifts functions duplicated in source.h and filter.h (and cpp files) into the new colserver.h and cpp files for sharing.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.5 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "colservertools.h"
28#include "fileutil.h"
29
30
31
32/////////////////////////////////
33// functions for queryfilterclass
34/////////////////////////////////
35
36
37queryfilterclass::queryfilterclass () {
38 db_ptr = NULL;
39 textsearchptr = NULL;
40 maxnumeric = 4;
41
42 FilterOption_t filtopt;
43 filtopt.name = "CombineQuery";
44 filtopt.type = FilterOption_t::enumeratedt;
45 filtopt.repeatable = FilterOption_t::onePerQuery;
46 filtopt.defaultValue = "and";
47 filtopt.validValues.push_back("and");
48 filtopt.validValues.push_back("or");
49 filtopt.validValues.push_back("not");
50 filterOptions["CombineQuery"] = filtopt;
51
52 // -- onePerQuery StartResults integer
53 filtopt.clear();
54 filtopt.name = "StartResults";
55 filtopt.type = FilterOption_t::integert;
56 filtopt.repeatable = FilterOption_t::onePerQuery;
57 filtopt.defaultValue = "1";
58 filtopt.validValues.push_back("1");
59 filtopt.validValues.push_back("1000");
60 filterOptions["StartResults"] = filtopt;
61
62 // -- onePerQuery EndResults integer
63 filtopt.clear();
64 filtopt.name = "EndResults";
65 filtopt.type = FilterOption_t::integert;
66 filtopt.repeatable = FilterOption_t::onePerQuery;
67 filtopt.defaultValue = "10";
68 filtopt.validValues.push_back("-1");
69 filtopt.validValues.push_back("1000");
70 filterOptions["EndResults"] = filtopt;
71
72 // -- onePerQuery QueryType enumerated (boolean, ranked)
73 filtopt.clear();
74 filtopt.name = "QueryType";
75 filtopt.type = FilterOption_t::enumeratedt;
76 filtopt.repeatable = FilterOption_t::onePerQuery;
77 filtopt.defaultValue = "ranked";
78 filtopt.validValues.push_back("boolean");
79 filtopt.validValues.push_back("ranked");
80 filterOptions["QueryType"] = filtopt;
81
82 // -- onePerQuery MatchMode enumerated (some, all)
83 filtopt.clear();
84 filtopt.name = "MatchMode";
85 filtopt.type = FilterOption_t::enumeratedt;
86 filtopt.repeatable = FilterOption_t::onePerQuery;
87 filtopt.defaultValue = "some";
88 filtopt.validValues.push_back("some");
89 filtopt.validValues.push_back("all");
90 filterOptions["MatchMode"] = filtopt;
91
92 // -- onePerTerm Term string ???
93 filtopt.clear();
94 filtopt.name = "Term";
95 filtopt.type = FilterOption_t::stringt;
96 filtopt.repeatable = FilterOption_t::onePerTerm;
97 filtopt.defaultValue = "";
98 filterOptions["Term"] = filtopt;
99
100 // -- onePerTerm Casefold boolean
101 filtopt.clear();
102 filtopt.name = "Casefold";
103 filtopt.type = FilterOption_t::booleant;
104 filtopt.repeatable = FilterOption_t::onePerTerm;
105 filtopt.defaultValue = "true";
106 filtopt.validValues.push_back("false");
107 filtopt.validValues.push_back("true");
108 filterOptions["Casefold"] = filtopt;
109
110 // -- onePerTerm Stem boolean
111 filtopt.clear();
112 filtopt.name = "Stem";
113 filtopt.type = FilterOption_t::booleant;
114 filtopt.repeatable = FilterOption_t::onePerTerm;
115 filtopt.defaultValue = "false";
116 filtopt.validValues.push_back("false");
117 filtopt.validValues.push_back("true");
118 filterOptions["Stem"] = filtopt;
119
120 // -- onePerTerm AccentFold boolean
121 filtopt.clear();
122 filtopt.name = "AccentFold";
123 filtopt.type = FilterOption_t::booleant;
124 filtopt.repeatable = FilterOption_t::onePerTerm;
125 filtopt.defaultValue = "false";
126 filtopt.validValues.push_back("false");
127 filtopt.validValues.push_back("true");
128 filterOptions["AccentFold"] = filtopt;
129
130 // -- onePerTerm Index enumerated
131 filtopt.clear();
132 filtopt.name = "Index";
133 filtopt.type = FilterOption_t::enumeratedt;
134 filtopt.repeatable = FilterOption_t::onePerTerm;
135 filtopt.defaultValue = "";
136 filterOptions["Index"] = filtopt;
137
138 // -- onePerTerm Subcollection enumerated
139 filtopt.clear();
140 filtopt.name = "Subcollection";
141 filtopt.type = FilterOption_t::enumeratedt;
142 filtopt.repeatable = FilterOption_t::onePerTerm;
143 filtopt.defaultValue = "";
144 filterOptions["Subcollection"] = filtopt;
145
146 // -- onePerTerm Language enumerated
147 filtopt.clear();
148 filtopt.name = "Language";
149 filtopt.type = FilterOption_t::enumeratedt;
150 filtopt.repeatable = FilterOption_t::onePerTerm;
151 filtopt.defaultValue = "";
152 filterOptions["Language"] = filtopt;
153
154 // -- onePerQuery Maxdocs integer
155 filtopt.clear();
156 filtopt.name = "Maxdocs";
157 filtopt.type = FilterOption_t::integert;
158 filtopt.repeatable = FilterOption_t::onePerQuery;
159 filtopt.defaultValue = "200";
160 filtopt.validValues.push_back("-1");
161 filtopt.validValues.push_back("1000");
162 filterOptions["Maxdocs"] = filtopt;
163
164}
165
166queryfilterclass::~queryfilterclass () {
167 // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
168}
169
170void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
171 filterclass::configure (key, cfgline);
172
173 if (key == "indexmap") {
174 indexmap.importmap (cfgline);
175
176 // update the list of indexes in the filter information
177 text_tarray options;
178 indexmap.gettoarray (options);
179 filterOptions["Index"].validValues = options;
180
181 } else if (key == "defaultindex") {
182 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
183
184 } else if (key == "subcollectionmap") {
185 subcollectionmap.importmap (cfgline);
186
187 // update the list of subcollections in the filter information
188 text_tarray options;
189 subcollectionmap.gettoarray (options);
190 filterOptions["Subcollection"].validValues = options;
191
192 } else if (key == "defaultsubcollection") {
193 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
194
195 } else if (key == "languagemap") {
196 languagemap.importmap (cfgline);
197
198 // update the list of languages in the filter information
199 text_tarray options;
200 languagemap.gettoarray (options);
201 filterOptions["Language"].validValues = options;
202
203 } else if (key == "defaultlanguage") {
204 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
205 } else if (key == "indexstem") {
206 indexstem = cfgline[0];
207 } else if (key == "maxnumeric") {
208 maxnumeric = cfgline[0].getint();
209 }
210
211}
212
213bool queryfilterclass::init (ostream &logout) {
214 outconvertclass text_t2ascii;
215
216 if (!filterclass::init(logout)) return false;
217
218 if (filterOptions["Index"].defaultValue.empty()) {
219 // use first index in map as default if no default is set explicitly
220 text_tarray fromarray;
221 indexmap.getfromarray(fromarray);
222 if (fromarray.size()) {
223 filterOptions["Index"].defaultValue = fromarray[0];
224 }
225 }
226
227 if (filterOptions["Subcollection"].defaultValue.empty()) {
228 // use first subcollection in map as default if no default is set explicitly
229 text_tarray fromarray;
230 subcollectionmap.getfromarray(fromarray);
231 if (fromarray.size()) {
232 filterOptions["Subcollection"].defaultValue = fromarray[0];
233 }
234 }
235
236 if (filterOptions["Language"].defaultValue.empty()) {
237 // use first language in map as default if no default is set explicitly
238 text_tarray fromarray;
239 languagemap.getfromarray(fromarray);
240 if (fromarray.size()) {
241 filterOptions["Language"].defaultValue = fromarray[0];
242 }
243 }
244
245 if (db_ptr == NULL) {
246 // most likely a configuration problem
247 logout << text_t2ascii
248 << "configuration error: queryfilter contains a null dbclass\n\n";
249 return false;
250 }
251
252 // get the filename for the database and make sure it exists
253 if (indexstem.empty()) {
254 indexstem = collection;
255 }
256 db_filename = resolve_db_filename(gsdlhome, dbhome, collecthome, collection,
257 indexstem,db_ptr->getfileextension());
258 if (!file_exists(db_filename)) {
259 logout << text_t2ascii
260 << "warning: database \"" << db_filename << "\" does not exist\n\n";
261 //return false;
262 }
263
264 return true;
265}
266
267void queryfilterclass::set_queryparam_defaults(queryparamclass &query ) {
268
269 query.collection = collection;
270 query.index = filterOptions["Index"].defaultValue;
271 query.subcollection = filterOptions["Subcollection"].defaultValue;
272 query.language = filterOptions["Language"].defaultValue;
273 query.querystring.clear();
274 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
275 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
276 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
277 query.stemming = (filterOptions["Stem"].defaultValue == "true");
278 query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
279 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
280 query.level = filterOptions["Level"].defaultValue;
281 query.maxnumeric = maxnumeric;
282
283}
284
285bool queryfilterclass::set_queryparam_field(const OptionValue_t &option, queryparamclass &query) {
286
287 if (option.name == "QueryType") {
288 query.search_type = (option.value == "ranked");
289 return true;
290 }
291 if (option.name == "MatchMode") {
292 query.match_mode = (option.value == "all");
293 if (query.match_mode == 1) query.maxdocs = -1;
294 return true;
295 }
296 if (option.name == "Term") {
297 query.querystring = option.value;
298 return true;
299 }
300 if (option.name == "Casefold") {
301 query.casefolding = (option.value == "true");
302 return true;
303 }
304 if (option.name == "Stem") {
305 query.stemming = (option.value == "true");
306 return true;
307 }
308 if (option.name == "AccentFold") {
309 query.accentfolding = (option.value == "true");
310 return true;
311 }
312 if (option.name == "Index"&& option.value !="") {
313 query.index = option.value;
314 return true;
315 }
316 if (option.name == "Subcollection") {
317 query.subcollection = option.value;
318 return true;
319 }
320 if (option.name == "Language") {
321 query.language = option.value;
322 return true;
323 }
324 if (option.name == "Maxdocs") {
325 query.maxdocs = option.value.getint();
326 return true;
327 }
328 if (option.name == "Level") {
329 query.level = option.value;
330 return true;
331 }
332
333 return false;
334}
335// get the query parameters
336void queryfilterclass::parse_query_params (const FilterRequest_t &request,
337 vector<queryparamclass> &query_params,
338 int &startresults, int &endresults,
339 ostream &logout) {
340 outconvertclass text_t2ascii;
341 // set defaults for the return parameters
342 query_params.erase(query_params.begin(), query_params.end());
343 startresults = filterOptions["StartResults"].defaultValue.getint();
344 endresults = filterOptions["EndResults"].defaultValue.getint();
345
346 // set defaults for query parameters
347 queryparamclass query;
348 query.combinequery = "or"; // first one must be "or"
349 set_queryparam_defaults(query);
350 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
351 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
352 while (options_here != options_end) {
353 if ((*options_here).name == "CombineQuery") {
354 // add this query
355
356 // "all", needed when combining queries where the document results are needed
357 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
358 query_params.push_back (query);
359
360 // start on next query
361 query.clear();
362 query.combinequery = (*options_here).value;
363
364 // set defaults for query parameters
365 set_queryparam_defaults(query);
366
367 // "all", needed when combining queries where the document results are needed
368 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
369
370 } else if ((*options_here).name == "StartResults") {
371 startresults = (*options_here).value.getint();
372 } else if ((*options_here).name == "EndResults") {
373 endresults = (*options_here).value.getint();
374 } else if (!set_queryparam_field(*options_here, query)) {
375 logout << text_t2ascii
376 << "warning: unknown queryfilter option \""
377 << (*options_here).name
378 << "\" ignored.\n\n";
379 }
380
381 ++options_here;
382 }
383
384 // Store the start and end results in the query too, as lucene now needs to
385 // pass them through to the Java
386 query.startresults = startresults;
387 query.endresults = endresults;
388
389 // add the last query
390 query_params.push_back (query);
391}
392
393
394
395// translate will return true if successful
396bool queryfilterclass::translate (dbclass *db_ptr, text_t& docnum, text_t &trans_OID) {
397 infodbclass info;
398
399 trans_OID.clear();
400
401 // get the info
402 if (db_ptr == NULL) return false;
403 if (!db_ptr->getinfo(docnum, info)) return false;
404
405 // translate
406 if (info["section"].empty()) return false;
407
408 trans_OID = info["section"];
409 return true;
410}
411
412
413// whether document results are needed
414bool queryfilterclass::need_matching_docs (int filterResultOptions) {
415 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
416 (filterResultOptions & FRmetadata));
417}
418
419// whether term information is needed
420bool queryfilterclass::need_term_info (int filterResultOptions) {
421 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
422}
Note: See TracBrowser for help on using the repository browser.