source: main/trunk/greenstone2/runtime-src/src/colservr/mgppqueryfilter.cpp@ 21945

Last change on this file since 21945 was 20727, checked in by kjdon, 15 years ago

added support for defaultlevel in collect.cfg, for mgpp and lucene. also added defaultindex for lucene

  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "mgppqueryfilter.h"
27#include "fileutil.h"
28#include "mgppsearch.h"
29
30/////////////////////////////////
31// functions for queryfilterclass
32/////////////////////////////////
33
34
35mgppqueryfilterclass::mgppqueryfilterclass ()
36 : queryfilterclass() {
37
38
39 FilterOption_t filtopt;
40
41 // -- onePerTerm Level enumerated
42 // likely to be Doc, Sec, Para, but we dont assume anything now
43 filtopt.clear();
44 filtopt.name = "Level";
45 filtopt.type = FilterOption_t::enumeratedt;
46 filtopt.repeatable = FilterOption_t::onePerTerm;
47 filtopt.defaultValue = "";
48 filterOptions["Level"] = filtopt;
49
50 // -- IndexField, enumerated, used to list available fields
51 filtopt.clear();
52 filtopt.name = "IndexField";
53 filtopt.type = FilterOption_t::enumeratedt;
54 filtopt.repeatable = FilterOption_t::onePerTerm;
55 filtopt.defaultValue = "";
56 filterOptions["IndexField"] = filtopt;
57
58}
59
60mgppqueryfilterclass::~mgppqueryfilterclass () {
61}
62
63
64//whether a query is a full text browse
65bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
66 return (filterRequestOptions & FRfullTextBrowse);
67}
68
69void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
70 queryfilterclass::configure(key, cfgline);
71
72 if (key == "indexfieldmap") {
73 indexfieldmap.importmap (cfgline);
74 text_tarray options;
75 indexfieldmap.gettoarray (options);
76 filterOptions["IndexField"].validValues = options;
77
78 } else if (key == "levelmap") {
79 levelmap.importmap (cfgline);
80 } else if (key == "indexlevels") {
81 filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
82 text_tarray::const_iterator here = cfgline.begin();
83 text_tarray::const_iterator end = cfgline.end();
84 while (here != end) {
85 if (!(*here).empty()) {
86 filterOptions["Level"].validValues.push_back(*here);
87 }
88 ++here;
89 }
90 } else if (key == "textlevel") {
91 ((mgppsearchclass *)textsearchptr)->set_text_level(cfgline[0]);
92 } else if (key == "indexstem") {
93 ((mgppsearchclass *)textsearchptr)->set_indexstem (cfgline[0]);
94 } else if (key == "defaultindex") { // used for fields in mgpp
95 indexfieldmap.from2to (cfgline[0], filterOptions["IndexField"].defaultValue);
96 } else if (key == "defaultlevel") {
97 levelmap.from2to (cfgline[0], filterOptions["Level"].defaultValue);
98 }
99
100}
101
102bool mgppqueryfilterclass::init (ostream &logout) {
103
104 if (!queryfilterclass::init(logout)) {
105 return false;
106 }
107
108 if (filterOptions["IndexField"].defaultValue.empty()) {
109 // use first index in map as default if no default is set explicitly
110 text_tarray fromarray;
111 indexfieldmap.getfromarray(fromarray);
112 if (fromarray.size()) {
113 filterOptions["IndexField"].defaultValue = fromarray[0];
114 }
115 }
116 if (filterOptions["Levels"].defaultValue.empty()) {
117 // use first level as default if no default is set explicitly
118 if (!filterOptions["Level"].validValues[0].empty())
119 filterOptions["Levels"].defaultValue = filterOptions["Level"].validValues[0];
120 }
121
122 return true;
123}
124
125void mgppqueryfilterclass::filter(const FilterRequest_t &request,
126 FilterResponse_t &response,
127 comerror_t &err, ostream &logout) {
128
129
130 outconvertclass text_t2ascii;
131
132 response.clear ();
133 err = noError;
134 if (db_ptr == NULL) {
135 // most likely a configuration problem
136 logout << text_t2ascii
137 << "configuration error: queryfilter contains a null dbclass\n\n";
138 err = configurationError;
139 return;
140 }
141 if (textsearchptr == NULL) {
142 // most likely a configuration problem
143 logout << text_t2ascii
144 << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
145 err = configurationError;
146 return;
147 }
148 if (full_text_browse(request.filterResultOptions)) {
149 browsefilter(request, response, err, logout);
150 return;
151 }
152 // open the database
153 db_ptr->setlogout(&logout);
154 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
155 // most likely a system problem (we have already checked that the database exists)
156 logout << text_t2ascii
157 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
158 err = systemProblem;
159 return;
160 }
161
162
163 // get the query parameters
164 int startresults, endresults;
165 text_t phrasematch; // not used here any more
166 vector<queryparamclass> queryfilterparams;
167 parse_query_params (request, queryfilterparams, startresults,
168 endresults, phrasematch, logout);
169
170
171 // do query
172 queryresultsclass queryresults;
173 do_multi_query (request, queryfilterparams, queryresults, err, logout);
174 if (err != noError) return;
175 // assemble document results
176 if (need_matching_docs (request.filterResultOptions)) {
177
178 int resultnum = 1;
179 ResultDocInfo_t resultdoc;
180 text_t trans_OID;
181 vector<text_t>::iterator docorder_here = queryresults.docs.docorder.begin();
182 vector<text_t>::iterator docorder_end = queryresults.docs.docorder.end();
183
184 if (endresults == -1) endresults = MAXNUMDOCS;
185 while (docorder_here != docorder_end) {
186 if (resultnum > endresults) break;
187
188 // translate the document number
189 if (!translate(db_ptr, *docorder_here, trans_OID)) {
190 logout << text_t2ascii
191 << "warning: could not translate mgpp document number \""
192 << *docorder_here << "\"to OID.\n\n";
193
194 } else {
195 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
196
197 // see if there is a result for this number,
198 // if it is in the request set (or the request set is empty)
199 if (docset_here != queryresults.docs.docset.end() &&
200 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
201 if (resultnum >= startresults) {
202 // add this document
203 resultdoc.OID = trans_OID;
204 resultdoc.result_num = resultnum;
205 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
206
207 response.docInfo.push_back (resultdoc);
208 }
209
210 ++resultnum;
211 }
212 } // else
213
214 ++docorder_here;
215 }
216 } // if need matching docs
217
218 // assemble the term results
219 if (need_term_info(request.filterResultOptions)) {
220 // note: the terms have already been sorted and uniqued - ?? have they??
221
222 TermInfo_t terminfo;
223 bool terms_first = true;
224
225 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
226 termfreqclassarray::iterator terms_end = queryresults.terms.end();
227
228 while (terms_here != terms_end) {
229 terminfo.clear();
230 terminfo.term = (*terms_here).termstr;
231 terminfo.freq = (*terms_here).termfreq;
232
233 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
234 if (terms_first) {
235 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
236 text_tset::iterator termvariants_end = queryresults.termvariants.end();
237 while (termvariants_here != termvariants_end) {
238 terminfo.matchTerms.push_back (*termvariants_here);
239 ++termvariants_here;
240 }
241 }
242 terms_first = false;
243
244 response.termInfo.push_back (terminfo);
245
246 ++terms_here;
247 }
248 }
249
250 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
251 response.numDocs = queryresults.docs_matched;
252 response.isApprox = queryresults.is_approx;
253}
254
255void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
256 FilterResponse_t &response,
257 comerror_t &err, ostream &logout) {
258
259 outconvertclass text_t2ascii;
260
261 // get the query parameters
262 int startresults, endresults;
263 text_t phrasematch; // not used here any more, just have it so can use
264 // parse_query_params function
265
266 vector<queryparamclass> queryfilterparams;
267 parse_query_params (request, queryfilterparams, startresults,
268 endresults, phrasematch, logout);
269
270 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
271
272 // do query
273 queryresultsclass queryresults;
274 queryresults.clear();
275
276 int numDocs = endresults-startresults;
277 textsearchptr->setcollectdir (collectdir);
278
279 if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
280 // most likely a system problem
281 logout << text_t2ascii
282 << "system problem: could not do full text browse with mgpp for index \""
283 << (*query_here).index << (*query_here).subcollection
284 << (*query_here).language << "\".\n\n";
285 err = systemProblem;
286 return;
287 }
288
289 // assemble the term results
290 TermInfo_t terminfo;
291
292 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
293 termfreqclassarray::iterator terms_end = queryresults.terms.end();
294
295 while (terms_here != terms_end) {
296 terminfo.clear();
297 terminfo.term = (*terms_here).termstr;
298 terminfo.freq = (*terms_here).termfreq;
299
300 response.termInfo.push_back (terminfo);
301
302 ++terms_here;
303 }
304
305
306}
307
308// textsearchptr and db_ptr are assumed to be valid
309void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
310 const vector<queryparamclass> &query_params,
311 queryresultsclass &multiresults,
312 comerror_t &err, ostream &logout) {
313 outconvertclass text_t2ascii;
314
315 err = noError;
316 textsearchptr->setcollectdir (collectdir);
317 multiresults.clear();
318
319 vector<queryparamclass>::const_iterator query_here = query_params.begin();
320 vector<queryparamclass>::const_iterator query_end = query_params.end();
321 while (query_here != query_end) {
322 queryresultsclass thisqueryresults;
323 text_t indx((*query_here).index);
324 if (!textsearchptr->search((*query_here), thisqueryresults)) {
325 // most likely a system problem
326 logout << text_t2ascii
327 << "system problem: could not do search with mgpp for index \""
328 << (*query_here).index << (*query_here).subcollection
329 << (*query_here).language << "\".\n\n";
330 err = systemProblem;
331 return;
332 }
333
334 // check for syntax error
335 if (thisqueryresults.syntax_error==true) {
336 logout << text_t2ascii
337 << "syntax problem: invalid query string \""
338 << (*query_here).querystring<<"\".\n";
339 err = syntaxError;
340 return;
341 }
342 // combine the results
343 if (need_matching_docs (request.filterResultOptions)) {
344
345 if (query_params.size() == 1) {
346 multiresults.docs = thisqueryresults.docs; // just one set of results
347 multiresults.docs_matched = thisqueryresults.docs_matched;
348 multiresults.is_approx = thisqueryresults.is_approx;
349
350 } else {
351 if ((*query_here).combinequery == "and") {
352 multiresults.docs.combine_and (thisqueryresults.docs);
353 } else if ((*query_here).combinequery == "or") {
354 multiresults.docs.combine_or (thisqueryresults.docs);
355 } else if ((*query_here).combinequery == "not") {
356 multiresults.docs.combine_not (thisqueryresults.docs);
357 }
358 multiresults.docs_matched = multiresults.docs.docset.size();
359 multiresults.is_approx = Exact;
360 }
361 }
362
363 // combine the term information
364 if (need_term_info (request.filterResultOptions)) {
365 // append the terms
366 multiresults.orgterms.insert(multiresults.orgterms.end(),
367 thisqueryresults.orgterms.begin(),
368 thisqueryresults.orgterms.end());
369
370
371 // add the term variants -
372 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
373 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
374 while (termvar_here != termvar_end) {
375 multiresults.termvariants.insert(*termvar_here);
376 ++termvar_here;
377 }
378 }
379
380 ++query_here;
381 }
382
383 // sort and unique the query terms
384 multiresults.sortuniqqueryterms ();
385}
386
387
388
Note: See TracBrowser for help on using the repository browser.