source: gsdl/trunk/src/colservr/mgppqueryfilter.cpp@ 15599

Last change on this file since 15599 was 15595, checked in by mdewsnip, 16 years ago

(Adding new DB support) Removed a bunch of "gdbm"s from comments etc.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.2 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // likely to be Doc, Sec, Para, but we dont assume anything now
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filterOptions["Level"] = filtopt;
51
52 // -- IndexField, enumerated, used to list available fields
53 filtopt.clear();
54 filtopt.name = "IndexField";
55 filtopt.type = FilterOption_t::enumeratedt;
56 filtopt.repeatable = FilterOption_t::onePerTerm;
57 filtopt.defaultValue = "";
58 filterOptions["IndexField"] = filtopt;
59
60}
61
62mgppqueryfilterclass::~mgppqueryfilterclass () {
63}
64
65
66//whether a query is a full text browse
67bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
68 return (filterRequestOptions & FRfullTextBrowse);
69}
70
71void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72 queryfilterclass::configure(key, cfgline);
73
74 if (key == "indexfieldmap") {
75 indexfieldmap.importmap (cfgline);
76
77 // update the list of indexes in the filter information
78 text_tarray options;
79 indexfieldmap.gettoarray (options);
80 filterOptions["IndexField"].validValues = options;
81
82 } else if (key == "indexlevels") {
83 text_tarray::const_iterator here = cfgline.begin();
84 text_tarray::const_iterator end = cfgline.end();
85 bool first=true;
86 filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
87 while (here != end) {
88 if (!(*here).empty()) {
89 if (first) {
90 first = false;
91 // the default is the first value
92 filterOptions["Level"].defaultValue = *here;
93 }
94 filterOptions["Level"].validValues.push_back(*here);
95 }
96 ++here;
97 }
98 } else if (key == "textlevel") {
99 ((mgppsearchclass *)textsearchptr)->set_text_level(cfgline[0]);
100 } else if (key == "indexstem") {
101 ((mgppsearchclass *)textsearchptr)->set_indexstem (cfgline[0]);
102 } else if (key == "defaultindex") { // used for fields in mgpp
103 indexfieldmap.from2to (cfgline[0], filterOptions["IndexField"].defaultValue);
104 }
105
106}
107
108bool mgppqueryfilterclass::init (ostream &logout) {
109
110 if (!queryfilterclass::init(logout)) {
111 return false;
112 }
113
114 if (filterOptions["IndexField"].defaultValue.empty()) {
115 // use first index in map as default if no default is set explicitly
116 text_tarray fromarray;
117 indexfieldmap.getfromarray(fromarray);
118 if (fromarray.size()) {
119 filterOptions["IndexField"].defaultValue = fromarray[0];
120 }
121 }
122 return true;
123}
124
125void mgppqueryfilterclass::filter(const FilterRequest_t &request,
126 FilterResponse_t &response,
127 comerror_t &err, ostream &logout) {
128
129
130 outconvertclass text_t2ascii;
131
132 response.clear ();
133 err = noError;
134 if (db_ptr == NULL) {
135 // most likely a configuration problem
136 logout << text_t2ascii
137 << "configuration error: queryfilter contains a null dbclass\n\n";
138 err = configurationError;
139 return;
140 }
141 if (textsearchptr == NULL) {
142 // most likely a configuration problem
143 logout << text_t2ascii
144 << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
145 err = configurationError;
146 return;
147 }
148 if (full_text_browse(request.filterResultOptions)) {
149 browsefilter(request, response, err, logout);
150 return;
151 }
152 // open the database
153 db_ptr->setlogout(&logout);
154 if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
155 // most likely a system problem (we have already checked that the database exists)
156 logout << text_t2ascii
157 << "system problem: open on database \"" << db_filename << "\" failed\n\n";
158 err = systemProblem;
159 return;
160 }
161
162
163 // get the query parameters
164 int startresults, endresults;
165 text_t phrasematch; // not used here any more
166 vector<queryparamclass> queryfilterparams;
167 parse_query_params (request, queryfilterparams, startresults,
168 endresults, phrasematch, logout);
169
170
171 // do query
172 queryresultsclass queryresults;
173 do_multi_query (request, queryfilterparams, queryresults, err, logout);
174 if (err != noError) return;
175 // assemble document results
176 if (need_matching_docs (request.filterResultOptions)) {
177
178 int resultnum = 1;
179 ResultDocInfo_t resultdoc;
180 text_t trans_OID;
181 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
182 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
183
184 if (endresults == -1) endresults = MAXNUMDOCS;
185 while (docorder_here != docorder_end) {
186 if (resultnum > endresults) break;
187
188 // translate the document number
189 if (!translate(db_ptr, *docorder_here, trans_OID)) {
190 logout << text_t2ascii
191 << "warning: could not translate mgpp document number \""
192 << *docorder_here << "\"to OID.\n\n";
193
194 } else {
195 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
196
197 // see if there is a result for this number,
198 // if it is in the request set (or the request set is empty)
199 if (docset_here != queryresults.docs.docset.end() &&
200 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
201 if (resultnum >= startresults) {
202 // add this document
203 resultdoc.OID = trans_OID;
204 resultdoc.result_num = resultnum;
205 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
206
207 response.docInfo.push_back (resultdoc);
208 }
209
210 ++resultnum;
211 }
212 } // else
213
214 ++docorder_here;
215 }
216 } // if need matching docs
217
218 // assemble the term results
219 if (need_term_info(request.filterResultOptions)) {
220 // note: the terms have already been sorted and uniqued - ?? have they??
221
222 TermInfo_t terminfo;
223 bool terms_first = true;
224
225 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
226 termfreqclassarray::iterator terms_end = queryresults.terms.end();
227
228 while (terms_here != terms_end) {
229 terminfo.clear();
230 terminfo.term = (*terms_here).termstr;
231 terminfo.freq = (*terms_here).termfreq;
232
233 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
234 if (terms_first) {
235 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
236 text_tset::iterator termvariants_end = queryresults.termvariants.end();
237 while (termvariants_here != termvariants_end) {
238 terminfo.matchTerms.push_back (*termvariants_here);
239 ++termvariants_here;
240 }
241 }
242 terms_first = false;
243
244 response.termInfo.push_back (terminfo);
245
246 ++terms_here;
247 }
248 }
249
250 db_ptr->closedatabase(); // Important that local library doesn't leave any files open
251 response.numDocs = queryresults.docs_matched;
252 response.isApprox = queryresults.is_approx;
253}
254
255void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
256 FilterResponse_t &response,
257 comerror_t &err, ostream &logout) {
258
259 outconvertclass text_t2ascii;
260
261 // get the query parameters
262 int startresults, endresults;
263 text_t phrasematch; // not used here any more, just have it so can use
264 // parse_query_params function
265
266 vector<queryparamclass> queryfilterparams;
267 parse_query_params (request, queryfilterparams, startresults,
268 endresults, phrasematch, logout);
269
270 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
271
272 // do query
273 queryresultsclass queryresults;
274 queryresults.clear();
275
276 int numDocs = endresults-startresults;
277 textsearchptr->setcollectdir (collectdir);
278
279 if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
280 // most likely a system problem
281 logout << text_t2ascii
282 << "system problem: could not do full text browse with mgpp for index \""
283 << (*query_here).index << (*query_here).subcollection
284 << (*query_here).language << "\".\n\n";
285 err = systemProblem;
286 return;
287 }
288
289 // assemble the term results
290 TermInfo_t terminfo;
291
292 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
293 termfreqclassarray::iterator terms_end = queryresults.terms.end();
294
295 while (terms_here != terms_end) {
296 terminfo.clear();
297 terminfo.term = (*terms_here).termstr;
298 terminfo.freq = (*terms_here).termfreq;
299
300 response.termInfo.push_back (terminfo);
301
302 ++terms_here;
303 }
304
305
306}
307
308// textsearchptr and db_ptr are assumed to be valid
309void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
310 const vector<queryparamclass> &query_params,
311 queryresultsclass &multiresults,
312 comerror_t &err, ostream &logout) {
313 outconvertclass text_t2ascii;
314
315 err = noError;
316 textsearchptr->setcollectdir (collectdir);
317 multiresults.clear();
318
319 vector<queryparamclass>::const_iterator query_here = query_params.begin();
320 vector<queryparamclass>::const_iterator query_end = query_params.end();
321 while (query_here != query_end) {
322 queryresultsclass thisqueryresults;
323 text_t indx((*query_here).index);
324 if (!textsearchptr->search((*query_here), thisqueryresults)) {
325 // most likely a system problem
326 logout << text_t2ascii
327 << "system problem: could not do search with mgpp for index \""
328 << (*query_here).index << (*query_here).subcollection
329 << (*query_here).language << "\".\n\n";
330 err = systemProblem;
331 return;
332 }
333
334 // check for syntax error
335 if (thisqueryresults.syntax_error==true) {
336 logout << text_t2ascii
337 << "syntax problem: invalid query string \""
338 << (*query_here).querystring<<"\".\n";
339 err = syntaxError;
340 return;
341 }
342 // combine the results
343 if (need_matching_docs (request.filterResultOptions)) {
344
345 if (query_params.size() == 1) {
346 multiresults.docs = thisqueryresults.docs; // just one set of results
347 multiresults.docs_matched = thisqueryresults.docs_matched;
348 multiresults.is_approx = thisqueryresults.is_approx;
349
350 } else {
351 if ((*query_here).combinequery == "and") {
352 multiresults.docs.combine_and (thisqueryresults.docs);
353 } else if ((*query_here).combinequery == "or") {
354 multiresults.docs.combine_or (thisqueryresults.docs);
355 } else if ((*query_here).combinequery == "not") {
356 multiresults.docs.combine_not (thisqueryresults.docs);
357 }
358 multiresults.docs_matched = multiresults.docs.docset.size();
359 multiresults.is_approx = Exact;
360 }
361 }
362
363 // combine the term information
364 if (need_term_info (request.filterResultOptions)) {
365 // append the terms
366 multiresults.orgterms.insert(multiresults.orgterms.end(),
367 thisqueryresults.orgterms.begin(),
368 thisqueryresults.orgterms.end());
369
370
371 // add the term variants -
372 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
373 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
374 while (termvar_here != termvar_end) {
375 multiresults.termvariants.insert(*termvar_here);
376 ++termvar_here;
377 }
378 }
379
380 ++query_here;
381 }
382
383 // sort and unique the query terms
384 multiresults.sortuniqqueryterms ();
385}
386
387
388
Note: See TracBrowser for help on using the repository browser.