source: main/trunk/greenstone2/runtime-src/src/colservr/lucenequeryfilter.cpp@ 27361

Last change on this file since 27361 was 27361, checked in by kjdon, 11 years ago

new handling of sortfield query param

  • Property svn:keywords set to Author Date Id Revision
File size: 13.3 KB
Line 
1/**********************************************************************
2 *
3 * lucenequeryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "lucenequeryfilter.h"
27#include "fileutil.h"
28#include "lucenesearch.h"
29
30lucenequeryfilterclass::lucenequeryfilterclass ()
31 : fieldedqueryfilterclass() {
32
33
34 FilterOption_t filtopt;
35
36 // -- onePerQuery SortField, enumerated, used to list available sorting fields
37 filtopt.clear();
38 filtopt.name = "SortField";
39 filtopt.type = FilterOption_t::enumeratedt;
40 filtopt.repeatable = FilterOption_t::onePerQuery;
41 filtopt.defaultValue = "";
42 filterOptions["SortField"] = filtopt;
43
44 // -- onePerQuery SortOder enumerated (0=ascending, 1=descending)
45 filtopt.clear();
46 filtopt.name = "SortOrder";
47 filtopt.type = FilterOption_t::enumeratedt;
48 filtopt.repeatable = FilterOption_t::onePerQuery;
49 filtopt.defaultValue = "ascending";
50 filtopt.validValues.push_back("ascending");
51 filtopt.validValues.push_back("descending");
52 filterOptions["SortOrder"] = filtopt;
53
54 // -- onePerQuery Fuzziness string 0.0-1.0
55 filtopt.clear();
56 filtopt.name = "Fuzziness";
57 filtopt.type = FilterOption_t::stringt;
58 filtopt.repeatable = FilterOption_t::onePerQuery;
59 filtopt.defaultValue = "";
60 filterOptions["Fuzziness"] = filtopt;
61
62 // -- onePerQuery FilterString string
63 filtopt.clear();
64 filtopt.name = "FilterString";
65 filtopt.type = FilterOption_t::stringt;
66 filtopt.repeatable = FilterOption_t::onePerQuery;
67 filtopt.defaultValue = "";
68 filterOptions["FilterString"] = filtopt;
69}
70
71lucenequeryfilterclass::~lucenequeryfilterclass () {
72}
73
74
75
76void lucenequeryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
77 fieldedqueryfilterclass::configure(key, cfgline);
78
79 if (key == "textlevel") {
80 ((lucenesearchclass *)textsearchptr)->set_text_level(cfgline[0]);
81 }
82
83 else if (key == "indexsortfields") {
84 filterOptions["SortField"].validValues.erase(filterOptions["SortField"].validValues.begin(), filterOptions["SortField"].validValues.end());
85 text_tarray::const_iterator here = cfgline.begin();
86 text_tarray::const_iterator end = cfgline.end();
87 while (here != end) {
88 if (!(*here).empty()) {
89 filterOptions["SortField"].validValues.push_back(*here);
90 }
91 ++here;
92 }
93 }
94}
95
96bool lucenequeryfilterclass::init (ostream &logout) {
97
98 if (!fieldedqueryfilterclass::init(logout)) {
99 return false;
100 }
101
102 if (filterOptions["SortField"].defaultValue.empty() && filterOptions["SortField"].validValues.begin() != filterOptions["SortField"].validValues.end() && !filterOptions["SortField"].validValues[0].empty()) {
103 filterOptions["SortField"].defaultValue = filterOptions["SortField"].validValues[0];
104 }
105
106 return true;
107}
108
109void lucenequeryfilterclass::set_queryparam_defaults(queryparamclass &query ) {
110
111 fieldedqueryfilterclass::set_queryparam_defaults(query);
112 query.filterstring = filterOptions["FilterString"].defaultValue;
113 query.sortfield = filterOptions["SortField"].defaultValue;
114 query.sortorder = (filterOptions["SortOrder"].defaultValue == "descending");
115 query.fuzziness = filterOptions["Fuzziness"].defaultValue;
116
117}
118
119bool lucenequeryfilterclass::set_queryparam_field(const OptionValue_t &option, queryparamclass &query) {
120
121 if (option.name == "FilterString") {
122 query.filterstring = option.value;
123 return true;
124 }
125 if (option.name == "SortField") {
126 query.sortfield = option.value;
127 return true;
128 }
129 if (option.name == "SortOrder") {
130 query.sortorder = (option.value == "descending");
131 return true;
132 }
133 if (option.name == "Fuzziness") {
134 query.fuzziness = option.value;
135 return true;
136 }
137 return fieldedqueryfilterclass::set_queryparam_field(option, query);
138}
139
140void lucenequeryfilterclass::filter(const FilterRequest_t &request,
141 FilterResponse_t &response,
142 comerror_t &err, ostream &logout) {
143
144 outconvertclass text_t2ascii;
145
146 response.clear ();
147 err = noError;
148 if (db_ptr == NULL) {
149 // most likely a configuration problem
150 logout << text_t2ascii
151 << "configuration error: queryfilter contains a null dbclass\n\n";
152 err = configurationError;
153 return;
154 }
155 if (textsearchptr == NULL) {
156 // most likely a configuration problem
157 logout << text_t2ascii
158 << "configuration error: queryfilter contains a null textsearchclass (lucene)\n\n";
159 err = configurationError;
160 return;
161 }
162 if (full_text_browse(request.filterResultOptions)) {
163 browsefilter(request, response, err, logout);
164 return;
165 }
166 // open the database
167 db_ptr->setlogout(&logout);
168 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
169 // most likely a system problem (we have already checked that the database exists)
170 logout << text_t2ascii
171 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
172 err = systemProblem;
173 return;
174 }
175
176
177 // get the query parameters
178 int startresults, endresults;
179 vector<queryparamclass> queryfilterparams;
180 parse_query_params (request, queryfilterparams, startresults,
181 endresults, logout);
182
183
184 // do query
185 queryresultsclass queryresults;
186 do_multi_query (request, queryfilterparams, queryresults, err, logout);
187 response.error_message = queryresults.error_message;
188 if (err != noError) return;
189
190 // assemble document results
191 if (need_matching_docs (request.filterResultOptions))
192 {
193 // Loop through the query results (ordered by ranking)
194 int resultnum = 1;
195 vector<text_t>::iterator docorder_iterator = queryresults.docs.docorder.begin();
196 while (docorder_iterator != queryresults.docs.docorder.end())
197 {
198 text_t doc_OID = (*docorder_iterator);
199 // logout << "Matching doc OID: " << doc_OID << endl;
200
201 // Make sure this result is in the docset, and either in the request set or the request set is empty
202 docresultmap::iterator doc_result = queryresults.docs.docset.find (doc_OID);
203 if (doc_result != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, doc_OID)))
204 {
205 // Add the matching document
206 ResultDocInfo_t resultdoc;
207 resultdoc.OID = doc_OID;
208 resultdoc.result_num = resultnum;
209 resultdoc.ranking = (int)((*doc_result).second.docweight * 10000.0 + 0.5);
210 resultdoc.num_terms_matched = (*doc_result).second.num_query_terms_matched;
211 response.docInfo.push_back (resultdoc);
212
213 resultnum++;
214 }
215
216 docorder_iterator++;
217 }
218 }
219
220 // assemble the term results
221 if (need_term_info(request.filterResultOptions)) {
222 // note: the terms have already been sorted and uniqued - ?? have they??
223
224 TermInfo_t terminfo;
225 bool terms_first = true;
226
227 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
228 termfreqclassarray::iterator terms_end = queryresults.terms.end();
229
230 while (terms_here != terms_end) {
231 terminfo.clear();
232 terminfo.term = (*terms_here).termstr;
233 terminfo.freq = (*terms_here).termfreq;
234 // lucene doesn't return any termvariants at this stage,
235 // so make sure the original term is set
236 terminfo.matchTerms.push_back(terminfo.term);
237
238 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
239 if (terms_first) {
240 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
241 text_tset::iterator termvariants_end = queryresults.termvariants.end();
242 while (termvariants_here != termvariants_end) {
243 terminfo.matchTerms.push_back (*termvariants_here);
244 ++termvariants_here;
245 }
246 }
247 terms_first = false;
248
249 response.termInfo.push_back (terminfo);
250
251 ++terms_here;
252 }
253
254 // add the stop words
255 text_tset::iterator stopwords_here = queryresults.stopwords.begin();
256 text_tset::iterator stopwords_end = queryresults.stopwords.end();
257 while (stopwords_here != stopwords_end) {
258 response.stopwords.insert(*stopwords_here);
259 ++stopwords_here;
260 }
261 }
262
263 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
264 response.numDocs = queryresults.docs_matched;
265 response.isApprox = queryresults.is_approx;
266}
267
268void lucenequeryfilterclass::browsefilter(const FilterRequest_t &request,
269 FilterResponse_t &response,
270 comerror_t &err, ostream &logout) {
271
272 outconvertclass text_t2ascii;
273
274 // get the query parameters
275 int startresults, endresults;
276
277 vector<queryparamclass> queryfilterparams;
278 parse_query_params (request, queryfilterparams, startresults,
279 endresults, logout);
280
281 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
282
283 // do query
284 queryresultsclass queryresults;
285 queryresults.clear();
286
287 int numDocs = endresults-startresults;
288 textsearchptr->setcollectdir (collectdir);
289
290 if (!((lucenesearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
291 // most likely a system problem
292 logout << text_t2ascii
293 << "system problem: could not do full text browse with lucene for index \""
294 << (*query_here).index << (*query_here).subcollection
295 << (*query_here).language << "\".\n\n";
296 err = systemProblem;
297 return;
298 }
299
300 // assemble the term results
301 TermInfo_t terminfo;
302
303 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
304 termfreqclassarray::iterator terms_end = queryresults.terms.end();
305
306 while (terms_here != terms_end) {
307 terminfo.clear();
308 terminfo.term = (*terms_here).termstr;
309 terminfo.freq = (*terms_here).termfreq;
310
311 response.termInfo.push_back (terminfo);
312
313 ++terms_here;
314 }
315
316
317}
318
319// lucenesearchptr and db_ptr are assumed to be valid
320void lucenequeryfilterclass::do_multi_query (const FilterRequest_t &request,
321 const vector<queryparamclass> &query_params,
322 queryresultsclass &multiresults,
323 comerror_t &err, ostream &logout) {
324 outconvertclass text_t2ascii;
325
326 err = noError;
327 textsearchptr->setcollectdir (collectdir);
328 multiresults.clear();
329
330 vector<queryparamclass>::const_iterator query_here = query_params.begin();
331 vector<queryparamclass>::const_iterator query_end = query_params.end();
332 while (query_here != query_end) {
333 queryresultsclass thisqueryresults;
334 if (!textsearchptr->search((*query_here), thisqueryresults)) {
335 // most likely a system problem
336 logout << text_t2ascii
337 << "system problem: could not do search with lucene for index \""
338 << (*query_here).index << (*query_here).level
339 << (*query_here).subcollection
340 << (*query_here).language << "\".\n\n";
341 err = systemProblem;
342 return;
343 }
344
345 // check for syntax error
346 if (thisqueryresults.syntax_error==true) {
347 logout << text_t2ascii
348 << "syntax problem: invalid query string \""
349 << (*query_here).querystring<<"\".\n";
350 err = syntaxError;
351 return;
352 }
353 // combine the results
354 if (need_matching_docs (request.filterResultOptions)) {
355
356 if (query_params.size() == 1) {
357 multiresults.error_message = thisqueryresults.error_message;
358 multiresults.docs = thisqueryresults.docs; // just one set of results
359 multiresults.docs_matched = thisqueryresults.docs_matched;
360 multiresults.is_approx = thisqueryresults.is_approx;
361
362 } else {
363 if ((*query_here).combinequery == "and") {
364 multiresults.docs.combine_and (thisqueryresults.docs);
365 } else if ((*query_here).combinequery == "or") {
366 multiresults.docs.combine_or (thisqueryresults.docs);
367 } else if ((*query_here).combinequery == "not") {
368 multiresults.docs.combine_not (thisqueryresults.docs);
369 }
370 multiresults.docs_matched = multiresults.docs.docset.size();
371 multiresults.is_approx = Exact;
372 }
373 }
374
375 // combine the term information
376 if (need_term_info (request.filterResultOptions)) {
377 // append the terms
378 multiresults.orgterms.insert(multiresults.orgterms.end(),
379 thisqueryresults.orgterms.begin(),
380 thisqueryresults.orgterms.end());
381
382
383 // add the term variants -
384 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
385 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
386 while (termvar_here != termvar_end) {
387 multiresults.termvariants.insert(*termvar_here);
388 ++termvar_here;
389 }
390
391 // add the stop words
392 text_tset::iterator stopwords_here = thisqueryresults.stopwords.begin();
393 text_tset::iterator stopwords_end = thisqueryresults.stopwords.end();
394 while (stopwords_here != stopwords_end) {
395 multiresults.stopwords.insert(*stopwords_here);
396 ++stopwords_here;
397 }
398 }
399
400 ++query_here;
401 }
402
403 // sort and unique the query terms
404 multiresults.sortuniqqueryterms ();
405}
406
407
Note: See TracBrowser for help on using the repository browser.