source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 5024

Last change on this file since 5024 was 4808, checked in by kjdon, 21 years ago

the level filteroption is now initialized from indexlevels and textlevel from build.cfg, not using levels from collect.cfg. this means it will not work with old colls. but it no longer assumes anything about what levels are available

  • Property svn:keywords set to Author Date Id Revision
File size: 11.6 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // likely to be Doc, Sec, Para, but we dont assume anything now
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filterOptions["Level"] = filtopt;
51
52 // -- IndexField, enumerated, used to list available fields
53 filtopt.clear();
54 filtopt.name = "IndexField";
55 filtopt.type = FilterOption_t::enumeratedt;
56 filtopt.repeatable = FilterOption_t::onePerTerm;
57 filtopt.defaultValue = "";
58 filterOptions["IndexField"] = filtopt;
59
60}
61
62mgppqueryfilterclass::~mgppqueryfilterclass () {
63}
64
65
66//whether a query is a full text browse
67bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
68 return (filterRequestOptions & FRfullTextBrowse);
69}
70
71void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72 queryfilterclass::configure(key, cfgline);
73
74 if (key == "indexfieldmap") {
75 indexfieldmap.importmap (cfgline);
76
77 // update the list of indexes in the filter information
78 text_tarray options;
79 indexfieldmap.gettoarray (options);
80
81 text_tarray::const_iterator here = options.begin();
82 text_tarray::const_iterator end = options.end();
83 bool start = true;
84 while (here !=end) {
85 if (!(*here).empty()) {
86 filterOptions["IndexField"].validValues.push_back(*here);
87 if (start) {
88 filterOptions["IndexField"].defaultValue = *here;
89 start = false;
90 }
91 }
92 here++;
93 }
94 } else if (key == "indexlevels") {
95 text_tarray::const_iterator here = cfgline.begin();
96 text_tarray::const_iterator end = cfgline.end();
97 bool first=true;
98 while (here != end) {
99 if (!(*here).empty()) {
100 if (first) {
101 first = false;
102 // the default is the first value
103 filterOptions["Level"].defaultValue = *here;
104 }
105 filterOptions["Level"].validValues.push_back(*here);
106 }
107 here ++;
108 }
109 } else if (key == "textlevel") {
110 ((mgppsearchclass *)mgsearchptr)->set_gdbm_level( cfgline[0]);
111 }
112
113}
114
115
116void mgppqueryfilterclass::filter(const FilterRequest_t &request,
117 FilterResponse_t &response,
118 comerror_t &err, ostream &logout) {
119
120
121 outconvertclass text_t2ascii;
122
123 response.clear ();
124 err = noError;
125 if (gdbmptr == NULL) {
126 // most likely a configuration problem
127 logout << text_t2ascii
128 << "configuration error: queryfilter contains a null gdbmclass\n\n";
129 err = configurationError;
130 return;
131 }
132 if (mgsearchptr == NULL) {
133 // most likely a configuration problem
134 logout << text_t2ascii
135 << "configuration error: queryfilter contains a null mgppsearchclass\n\n";
136 err = configurationError;
137 return;
138 }
139 if (full_text_browse(request.filterResultOptions)) {
140 browsefilter(request, response, err, logout);
141 return;
142 }
143 // open the database
144 gdbmptr->setlogout(&logout);
145 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
146 // most likely a system problem (we have already checked that the
147 // gdbm database exists)
148 logout << text_t2ascii
149 << "system problem: open on gdbm database \""
150 << gdbm_filename << "\" failed\n\n";
151 err = systemProblem;
152 return;
153 }
154
155
156 // get the query parameters
157 int startresults, endresults;
158 text_t phrasematch; // not used here any more
159 vector<queryparamclass> queryfilterparams;
160 parse_query_params (request, queryfilterparams, startresults,
161 endresults, phrasematch, logout);
162
163
164 // do query
165 queryresultsclass queryresults;
166 do_multi_query (request, queryfilterparams, queryresults, err, logout);
167 if (err != noError) return;
168 // assemble document results
169 if (need_matching_docs (request.filterResultOptions)) {
170
171 int resultnum = 1;
172 ResultDocInfo_t resultdoc;
173 text_t trans_OID;
174 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
175 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
176
177 if (endresults == -1) endresults = MAXNUMDOCS;
178 while (docorder_here != docorder_end) {
179 if (resultnum > endresults) break;
180
181 // translate the document number
182 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
183 logout << text_t2ascii
184 << "warning: could not translate mgpp document number \""
185 << *docorder_here << "\"to OID.\n\n";
186
187 } else {
188 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
189
190 // see if there is a result for this number,
191 // if it is in the request set (or the request set is empty)
192 if (docset_here != queryresults.docs.docset.end() &&
193 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
194 if (resultnum >= startresults) {
195 // add this document
196 resultdoc.OID = trans_OID;
197 resultdoc.result_num = resultnum;
198 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
199
200 response.docInfo.push_back (resultdoc);
201 }
202
203 resultnum++;
204 }
205 } // else
206
207 docorder_here++;
208 }
209 } // if need matching docs
210
211 // assemble the term results
212 if (need_term_info(request.filterResultOptions)) {
213 // note: the terms have already been sorted and uniqued - ?? have they??
214
215 TermInfo_t terminfo;
216 bool terms_first = true;
217
218 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
219 termfreqclassarray::iterator terms_end = queryresults.terms.end();
220
221 while (terms_here != terms_end) {
222 terminfo.clear();
223 terminfo.term = (*terms_here).termstr;
224 terminfo.freq = (*terms_here).termfreq;
225
226 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
227 if (terms_first) {
228 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
229 text_tset::iterator termvariants_end = queryresults.termvariants.end();
230 while (termvariants_here != termvariants_end) {
231 terminfo.matchTerms.push_back (*termvariants_here);
232 termvariants_here++;
233 }
234 }
235 terms_first = false;
236
237 response.termInfo.push_back (terminfo);
238
239 terms_here++;
240 }
241 }
242
243 response.numDocs = queryresults.docs_matched;
244 response.isApprox = queryresults.is_approx;
245}
246
247void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
248 FilterResponse_t &response,
249 comerror_t &err, ostream &logout) {
250
251 outconvertclass text_t2ascii;
252
253 // get the query parameters
254 int startresults, endresults;
255 text_t phrasematch; // not used here any more, just have it so can use
256 // parse_query_params function
257
258 vector<queryparamclass> queryfilterparams;
259 parse_query_params (request, queryfilterparams, startresults,
260 endresults, phrasematch, logout);
261
262 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
263
264 // do query
265 queryresultsclass queryresults;
266 queryresults.clear();
267
268 int numDocs = endresults-startresults;
269 mgsearchptr->setcollectdir (collectdir);
270
271 if (!((mgppsearchclass*)mgsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
272 // most likely a system problem
273 logout << text_t2ascii
274 << "system problem: could not do full text browse with mgpp for index \""
275 << (*query_here).index << (*query_here).subcollection
276 << (*query_here).language << "\".\n\n";
277 err = systemProblem;
278 return;
279 }
280
281 // assemble the term results
282 TermInfo_t terminfo;
283
284 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
285 termfreqclassarray::iterator terms_end = queryresults.terms.end();
286
287 while (terms_here != terms_end) {
288 terminfo.clear();
289 terminfo.term = (*terms_here).termstr;
290 terminfo.freq = (*terms_here).termfreq;
291
292 response.termInfo.push_back (terminfo);
293
294 terms_here++;
295 }
296
297
298}
299
300// mgppsearchptr and gdbmptr are assumed to be valid
301void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
302 const vector<queryparamclass> &query_params,
303 queryresultsclass &multiresults,
304 comerror_t &err, ostream &logout) {
305 outconvertclass text_t2ascii;
306
307 err = noError;
308 mgsearchptr->setcollectdir (collectdir);
309 multiresults.clear();
310
311 vector<queryparamclass>::const_iterator query_here = query_params.begin();
312 vector<queryparamclass>::const_iterator query_end = query_params.end();
313 while (query_here != query_end) {
314 queryresultsclass thisqueryresults;
315 text_t indx((*query_here).index);
316 if (!mgsearchptr->search((*query_here), thisqueryresults)) {
317 // most likely a system problem
318 logout << text_t2ascii
319 << "system problem: could not do search with mgpp for index \""
320 << (*query_here).index << (*query_here).subcollection
321 << (*query_here).language << "\".\n\n";
322 err = systemProblem;
323 return;
324 }
325
326 // check for syntax error
327 if (thisqueryresults.syntax_error==true) {
328 logout << text_t2ascii
329 << "syntax problem: invalid query string \""
330 << (*query_here).querystring<<"\".\n";
331 err = syntaxError;
332 return;
333 }
334 // combine the results
335 if (need_matching_docs (request.filterResultOptions)) {
336
337 if (query_params.size() == 1) {
338 multiresults.docs = thisqueryresults.docs; // just one set of results
339 multiresults.docs_matched = thisqueryresults.docs_matched;
340 multiresults.is_approx = thisqueryresults.is_approx;
341
342 } else {
343 if ((*query_here).combinequery == "and") {
344 multiresults.docs.combine_and (thisqueryresults.docs);
345 } else if ((*query_here).combinequery == "or") {
346 multiresults.docs.combine_or (thisqueryresults.docs);
347 } else if ((*query_here).combinequery == "not") {
348 multiresults.docs.combine_not (thisqueryresults.docs);
349 }
350 multiresults.docs_matched = multiresults.docs.docset.size();
351 multiresults.is_approx = Exact;
352 }
353 }
354
355 // combine the term information
356 if (need_term_info (request.filterResultOptions)) {
357 // append the terms
358 multiresults.orgterms.insert(multiresults.orgterms.end(),
359 thisqueryresults.orgterms.begin(),
360 thisqueryresults.orgterms.end());
361
362
363 // add the term variants -
364 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
365 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
366 while (termvar_here != termvar_end) {
367 multiresults.termvariants.insert(*termvar_here);
368 termvar_here++;
369 }
370 }
371
372 query_here++;
373 }
374
375 // sort and unique the query terms
376 multiresults.sortuniqqueryterms ();
377}
378
379
380
Note: See TracBrowser for help on using the repository browser.