source: main/trunk/greenstone2/runtime-src/src/colservr/lucenequeryfilter.cpp@ 28955

Last change on this file since 28955 was 28955, checked in by kjdon, 10 years ago

now we read in indexsortfieldmap and defaultsortfield, so the user can change the default sort field

  • Property svn:keywords set to Author Date Id Revision
File size: 13.5 KB
Line 
1/**********************************************************************
2 *
3 * lucenequeryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "lucenequeryfilter.h"
27#include "fileutil.h"
28#include "lucenesearch.h"
29
30lucenequeryfilterclass::lucenequeryfilterclass ()
31 : fieldedqueryfilterclass() {
32
33
34 FilterOption_t filtopt;
35
36 // -- onePerQuery SortField, enumerated, used to list available sorting fields
37 filtopt.clear();
38 filtopt.name = "SortField";
39 filtopt.type = FilterOption_t::enumeratedt;
40 filtopt.repeatable = FilterOption_t::onePerQuery;
41 filtopt.defaultValue = "";
42 filterOptions["SortField"] = filtopt;
43
44 // -- onePerQuery SortOder enumerated (0=ascending, 1=descending)
45 filtopt.clear();
46 filtopt.name = "SortOrder";
47 filtopt.type = FilterOption_t::enumeratedt;
48 filtopt.repeatable = FilterOption_t::onePerQuery;
49 filtopt.defaultValue = "ascending";
50 filtopt.validValues.push_back("ascending");
51 filtopt.validValues.push_back("descending");
52 filterOptions["SortOrder"] = filtopt;
53
54 // -- onePerQuery Fuzziness string 0.0-1.0
55 filtopt.clear();
56 filtopt.name = "Fuzziness";
57 filtopt.type = FilterOption_t::stringt;
58 filtopt.repeatable = FilterOption_t::onePerQuery;
59 filtopt.defaultValue = "";
60 filterOptions["Fuzziness"] = filtopt;
61
62 // -- onePerQuery FilterString string
63 filtopt.clear();
64 filtopt.name = "FilterString";
65 filtopt.type = FilterOption_t::stringt;
66 filtopt.repeatable = FilterOption_t::onePerQuery;
67 filtopt.defaultValue = "";
68 filterOptions["FilterString"] = filtopt;
69}
70
71lucenequeryfilterclass::~lucenequeryfilterclass () {
72}
73
74
75
76void lucenequeryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
77 fieldedqueryfilterclass::configure(key, cfgline);
78
79 if (key == "textlevel") {
80 ((lucenesearchclass *)textsearchptr)->set_text_level(cfgline[0]);
81 }
82 else if (key == "indexsortfieldmap") {
83 sortfieldmap.importmap (cfgline);
84 }
85 else if (key == "indexsortfields") {
86 filterOptions["SortField"].validValues.erase(filterOptions["SortField"].validValues.begin(), filterOptions["SortField"].validValues.end());
87 text_tarray::const_iterator here = cfgline.begin();
88 text_tarray::const_iterator end = cfgline.end();
89 while (here != end) {
90 if (!(*here).empty()) {
91 filterOptions["SortField"].validValues.push_back(*here);
92 }
93 ++here;
94 }
95 }
96 else if (key == "defaultsortfield") {
97 sortfieldmap.from2to (cfgline[0], filterOptions["SortField"].defaultValue);
98 }
99}
100
101bool lucenequeryfilterclass::init (ostream &logout) {
102
103 if (!fieldedqueryfilterclass::init(logout)) {
104 return false;
105 }
106
107 if (filterOptions["SortField"].defaultValue.empty() && filterOptions["SortField"].validValues.begin() != filterOptions["SortField"].validValues.end() && !filterOptions["SortField"].validValues[0].empty()) {
108 filterOptions["SortField"].defaultValue = filterOptions["SortField"].validValues[0];
109 }
110
111 return true;
112}
113
114void lucenequeryfilterclass::set_queryparam_defaults(queryparamclass &query ) {
115
116 fieldedqueryfilterclass::set_queryparam_defaults(query);
117 query.filterstring = filterOptions["FilterString"].defaultValue;
118 query.sortfield = filterOptions["SortField"].defaultValue;
119 query.sortorder = (filterOptions["SortOrder"].defaultValue == "descending");
120 query.fuzziness = filterOptions["Fuzziness"].defaultValue;
121
122}
123
124bool lucenequeryfilterclass::set_queryparam_field(const OptionValue_t &option, queryparamclass &query) {
125
126 if (option.name == "FilterString") {
127 query.filterstring = option.value;
128 return true;
129 }
130 if (option.name == "SortField") {
131 query.sortfield = option.value;
132 return true;
133 }
134 if (option.name == "SortOrder") {
135 query.sortorder = (option.value == "descending");
136 return true;
137 }
138 if (option.name == "Fuzziness") {
139 query.fuzziness = option.value;
140 return true;
141 }
142 return fieldedqueryfilterclass::set_queryparam_field(option, query);
143}
144
145void lucenequeryfilterclass::filter(const FilterRequest_t &request,
146 FilterResponse_t &response,
147 comerror_t &err, ostream &logout) {
148
149 outconvertclass text_t2ascii;
150
151 response.clear ();
152 err = noError;
153 if (db_ptr == NULL) {
154 // most likely a configuration problem
155 logout << text_t2ascii
156 << "configuration error: queryfilter contains a null dbclass\n\n";
157 err = configurationError;
158 return;
159 }
160 if (textsearchptr == NULL) {
161 // most likely a configuration problem
162 logout << text_t2ascii
163 << "configuration error: queryfilter contains a null textsearchclass (lucene)\n\n";
164 err = configurationError;
165 return;
166 }
167 if (full_text_browse(request.filterResultOptions)) {
168 browsefilter(request, response, err, logout);
169 return;
170 }
171 // open the database
172 db_ptr->setlogout(&logout);
173 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
174 // most likely a system problem (we have already checked that the database exists)
175 logout << text_t2ascii
176 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
177 err = systemProblem;
178 return;
179 }
180
181
182 // get the query parameters
183 int startresults, endresults;
184 vector<queryparamclass> queryfilterparams;
185 parse_query_params (request, queryfilterparams, startresults,
186 endresults, logout);
187
188
189 // do query
190 queryresultsclass queryresults;
191 do_multi_query (request, queryfilterparams, queryresults, err, logout);
192 response.error_message = queryresults.error_message;
193 if (err != noError) return;
194
195 // assemble document results
196 if (need_matching_docs (request.filterResultOptions))
197 {
198 // Loop through the query results (ordered by ranking)
199 int resultnum = 1;
200 vector<text_t>::iterator docorder_iterator = queryresults.docs.docorder.begin();
201 while (docorder_iterator != queryresults.docs.docorder.end())
202 {
203 text_t doc_OID = (*docorder_iterator);
204 // logout << "Matching doc OID: " << doc_OID << endl;
205
206 // Make sure this result is in the docset, and either in the request set or the request set is empty
207 docresultmap::iterator doc_result = queryresults.docs.docset.find (doc_OID);
208 if (doc_result != queryresults.docs.docset.end() && (request.docSet.empty() || in_set(request.docSet, doc_OID)))
209 {
210 // Add the matching document
211 ResultDocInfo_t resultdoc;
212 resultdoc.OID = doc_OID;
213 resultdoc.result_num = resultnum;
214 resultdoc.ranking = (int)((*doc_result).second.docweight * 10000.0 + 0.5);
215 resultdoc.num_terms_matched = (*doc_result).second.num_query_terms_matched;
216 response.docInfo.push_back (resultdoc);
217
218 resultnum++;
219 }
220
221 docorder_iterator++;
222 }
223 }
224
225 // assemble the term results
226 if (need_term_info(request.filterResultOptions)) {
227 // note: the terms have already been sorted and uniqued - ?? have they??
228
229 TermInfo_t terminfo;
230 bool terms_first = true;
231
232 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
233 termfreqclassarray::iterator terms_end = queryresults.terms.end();
234
235 while (terms_here != terms_end) {
236 terminfo.clear();
237 terminfo.term = (*terms_here).termstr;
238 terminfo.freq = (*terms_here).termfreq;
239 // lucene doesn't return any termvariants at this stage,
240 // so make sure the original term is set
241 terminfo.matchTerms.push_back(terminfo.term);
242
243 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
244 if (terms_first) {
245 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
246 text_tset::iterator termvariants_end = queryresults.termvariants.end();
247 while (termvariants_here != termvariants_end) {
248 terminfo.matchTerms.push_back (*termvariants_here);
249 ++termvariants_here;
250 }
251 }
252 terms_first = false;
253
254 response.termInfo.push_back (terminfo);
255
256 ++terms_here;
257 }
258
259 // add the stop words
260 text_tset::iterator stopwords_here = queryresults.stopwords.begin();
261 text_tset::iterator stopwords_end = queryresults.stopwords.end();
262 while (stopwords_here != stopwords_end) {
263 response.stopwords.insert(*stopwords_here);
264 ++stopwords_here;
265 }
266 }
267
268 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
269 response.numDocs = queryresults.docs_matched;
270 response.isApprox = queryresults.is_approx;
271}
272
273void lucenequeryfilterclass::browsefilter(const FilterRequest_t &request,
274 FilterResponse_t &response,
275 comerror_t &err, ostream &logout) {
276
277 outconvertclass text_t2ascii;
278
279 // get the query parameters
280 int startresults, endresults;
281
282 vector<queryparamclass> queryfilterparams;
283 parse_query_params (request, queryfilterparams, startresults,
284 endresults, logout);
285
286 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
287
288 // do query
289 queryresultsclass queryresults;
290 queryresults.clear();
291
292 int numDocs = endresults-startresults;
293 textsearchptr->setcollectdir (collectdir);
294
295 if (!((lucenesearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
296 // most likely a system problem
297 logout << text_t2ascii
298 << "system problem: could not do full text browse with lucene for index \""
299 << (*query_here).index << (*query_here).subcollection
300 << (*query_here).language << "\".\n\n";
301 err = systemProblem;
302 return;
303 }
304
305 // assemble the term results
306 TermInfo_t terminfo;
307
308 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
309 termfreqclassarray::iterator terms_end = queryresults.terms.end();
310
311 while (terms_here != terms_end) {
312 terminfo.clear();
313 terminfo.term = (*terms_here).termstr;
314 terminfo.freq = (*terms_here).termfreq;
315
316 response.termInfo.push_back (terminfo);
317
318 ++terms_here;
319 }
320
321
322}
323
324// lucenesearchptr and db_ptr are assumed to be valid
325void lucenequeryfilterclass::do_multi_query (const FilterRequest_t &request,
326 const vector<queryparamclass> &query_params,
327 queryresultsclass &multiresults,
328 comerror_t &err, ostream &logout) {
329 outconvertclass text_t2ascii;
330
331 err = noError;
332 textsearchptr->setcollectdir (collectdir);
333 multiresults.clear();
334
335 vector<queryparamclass>::const_iterator query_here = query_params.begin();
336 vector<queryparamclass>::const_iterator query_end = query_params.end();
337 while (query_here != query_end) {
338 queryresultsclass thisqueryresults;
339 if (!textsearchptr->search((*query_here), thisqueryresults)) {
340 // most likely a system problem
341 logout << text_t2ascii
342 << "system problem: could not do search with lucene for index \""
343 << (*query_here).index << (*query_here).level
344 << (*query_here).subcollection
345 << (*query_here).language << "\".\n\n";
346 err = systemProblem;
347 return;
348 }
349
350 // check for syntax error
351 if (thisqueryresults.syntax_error==true) {
352 logout << text_t2ascii
353 << "syntax problem: invalid query string \""
354 << (*query_here).querystring<<"\".\n";
355 err = syntaxError;
356 return;
357 }
358 // combine the results
359 if (need_matching_docs (request.filterResultOptions)) {
360
361 if (query_params.size() == 1) {
362 multiresults.error_message = thisqueryresults.error_message;
363 multiresults.docs = thisqueryresults.docs; // just one set of results
364 multiresults.docs_matched = thisqueryresults.docs_matched;
365 multiresults.is_approx = thisqueryresults.is_approx;
366
367 } else {
368 if ((*query_here).combinequery == "and") {
369 multiresults.docs.combine_and (thisqueryresults.docs);
370 } else if ((*query_here).combinequery == "or") {
371 multiresults.docs.combine_or (thisqueryresults.docs);
372 } else if ((*query_here).combinequery == "not") {
373 multiresults.docs.combine_not (thisqueryresults.docs);
374 }
375 multiresults.docs_matched = multiresults.docs.docset.size();
376 multiresults.is_approx = Exact;
377 }
378 }
379
380 // combine the term information
381 if (need_term_info (request.filterResultOptions)) {
382 // append the terms
383 multiresults.orgterms.insert(multiresults.orgterms.end(),
384 thisqueryresults.orgterms.begin(),
385 thisqueryresults.orgterms.end());
386
387
388 // add the term variants -
389 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
390 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
391 while (termvar_here != termvar_end) {
392 multiresults.termvariants.insert(*termvar_here);
393 ++termvar_here;
394 }
395
396 // add the stop words
397 text_tset::iterator stopwords_here = thisqueryresults.stopwords.begin();
398 text_tset::iterator stopwords_end = thisqueryresults.stopwords.end();
399 while (stopwords_here != stopwords_end) {
400 multiresults.stopwords.insert(*stopwords_here);
401 ++stopwords_here;
402 }
403 }
404
405 ++query_here;
406 }
407
408 // sort and unique the query terms
409 multiresults.sortuniqqueryterms ();
410}
411
412
Note: See TracBrowser for help on using the repository browser.