source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 10954

Last change on this file since 10954 was 10954, checked in by kjdon, 18 years ago

made the indexfieldmap use defaultindex for mgppqueryfilter so that can set the default field for plain search

  • Property svn:keywords set to Author Date Id Revision
File size: 12.1 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // likely to be Doc, Sec, Para, but we dont assume anything now
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filterOptions["Level"] = filtopt;
51
52 // -- IndexField, enumerated, used to list available fields
53 filtopt.clear();
54 filtopt.name = "IndexField";
55 filtopt.type = FilterOption_t::enumeratedt;
56 filtopt.repeatable = FilterOption_t::onePerTerm;
57 filtopt.defaultValue = "";
58 filterOptions["IndexField"] = filtopt;
59
60}
61
62mgppqueryfilterclass::~mgppqueryfilterclass () {
63}
64
65
66//whether a query is a full text browse
67bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
68 return (filterRequestOptions & FRfullTextBrowse);
69}
70
71void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72 queryfilterclass::configure(key, cfgline);
73
74 if (key == "indexfieldmap") {
75 indexfieldmap.importmap (cfgline);
76
77 // update the list of indexes in the filter information
78 text_tarray options;
79 indexfieldmap.gettoarray (options);
80 filterOptions["IndexField"].validValues = options;
81
82 } else if (key == "indexlevels") {
83 text_tarray::const_iterator here = cfgline.begin();
84 text_tarray::const_iterator end = cfgline.end();
85 bool first=true;
86 filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
87 while (here != end) {
88 if (!(*here).empty()) {
89 if (first) {
90 first = false;
91 // the default is the first value
92 filterOptions["Level"].defaultValue = *here;
93 }
94 filterOptions["Level"].validValues.push_back(*here);
95 }
96 ++here;
97 }
98 } else if (key == "textlevel") {
99 ((mgppsearchclass *)textsearchptr)->set_gdbm_level( cfgline[0]);
100 } else if (key == "indexstem") {
101 ((mgppsearchclass *)textsearchptr)->set_indexstem (cfgline[0]);
102 } else if (key == "defaultindex") { // used for fields in mgpp
103 indexfieldmap.from2to (cfgline[0], filterOptions["IndexField"].defaultValue);
104 }
105
106}
107
108bool mgppqueryfilterclass::init (ostream &logout) {
109
110 if (!queryfilterclass::init(logout)) {
111 return false;
112 }
113
114 if (filterOptions["IndexField"].defaultValue.empty()) {
115 // use first index in map as default if no default is set explicitly
116 text_tarray fromarray;
117 indexfieldmap.getfromarray(fromarray);
118 if (fromarray.size()) {
119 filterOptions["IndexField"].defaultValue = fromarray[0];
120 }
121 }
122 return true;
123}
124
125void mgppqueryfilterclass::filter(const FilterRequest_t &request,
126 FilterResponse_t &response,
127 comerror_t &err, ostream &logout) {
128
129
130 outconvertclass text_t2ascii;
131
132 response.clear ();
133 err = noError;
134 if (gdbmptr == NULL) {
135 // most likely a configuration problem
136 logout << text_t2ascii
137 << "configuration error: queryfilter contains a null gdbmclass\n\n";
138 err = configurationError;
139 return;
140 }
141 if (textsearchptr == NULL) {
142 // most likely a configuration problem
143 logout << text_t2ascii
144 << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
145 err = configurationError;
146 return;
147 }
148 if (full_text_browse(request.filterResultOptions)) {
149 browsefilter(request, response, err, logout);
150 return;
151 }
152 // open the database
153 gdbmptr->setlogout(&logout);
154 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
155 // most likely a system problem (we have already checked that the
156 // gdbm database exists)
157 logout << text_t2ascii
158 << "system problem: open on gdbm database \""
159 << gdbm_filename << "\" failed\n\n";
160 err = systemProblem;
161 return;
162 }
163
164
165 // get the query parameters
166 int startresults, endresults;
167 text_t phrasematch; // not used here any more
168 vector<queryparamclass> queryfilterparams;
169 parse_query_params (request, queryfilterparams, startresults,
170 endresults, phrasematch, logout);
171
172
173 // do query
174 queryresultsclass queryresults;
175 do_multi_query (request, queryfilterparams, queryresults, err, logout);
176 if (err != noError) return;
177 // assemble document results
178 if (need_matching_docs (request.filterResultOptions)) {
179
180 int resultnum = 1;
181 ResultDocInfo_t resultdoc;
182 text_t trans_OID;
183 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
184 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
185
186 if (endresults == -1) endresults = MAXNUMDOCS;
187 while (docorder_here != docorder_end) {
188 if (resultnum > endresults) break;
189
190 // translate the document number
191 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
192 logout << text_t2ascii
193 << "warning: could not translate mgpp document number \""
194 << *docorder_here << "\"to OID.\n\n";
195
196 } else {
197 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
198
199 // see if there is a result for this number,
200 // if it is in the request set (or the request set is empty)
201 if (docset_here != queryresults.docs.docset.end() &&
202 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
203 if (resultnum >= startresults) {
204 // add this document
205 resultdoc.OID = trans_OID;
206 resultdoc.result_num = resultnum;
207 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
208
209 response.docInfo.push_back (resultdoc);
210 }
211
212 ++resultnum;
213 }
214 } // else
215
216 ++docorder_here;
217 }
218 } // if need matching docs
219
220 // assemble the term results
221 if (need_term_info(request.filterResultOptions)) {
222 // note: the terms have already been sorted and uniqued - ?? have they??
223
224 TermInfo_t terminfo;
225 bool terms_first = true;
226
227 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
228 termfreqclassarray::iterator terms_end = queryresults.terms.end();
229
230 while (terms_here != terms_end) {
231 terminfo.clear();
232 terminfo.term = (*terms_here).termstr;
233 terminfo.freq = (*terms_here).termfreq;
234
235 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
236 if (terms_first) {
237 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
238 text_tset::iterator termvariants_end = queryresults.termvariants.end();
239 while (termvariants_here != termvariants_end) {
240 terminfo.matchTerms.push_back (*termvariants_here);
241 ++termvariants_here;
242 }
243 }
244 terms_first = false;
245
246 response.termInfo.push_back (terminfo);
247
248 ++terms_here;
249 }
250 }
251
252 response.numDocs = queryresults.docs_matched;
253 response.isApprox = queryresults.is_approx;
254}
255
256void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
257 FilterResponse_t &response,
258 comerror_t &err, ostream &logout) {
259
260 outconvertclass text_t2ascii;
261
262 // get the query parameters
263 int startresults, endresults;
264 text_t phrasematch; // not used here any more, just have it so can use
265 // parse_query_params function
266
267 vector<queryparamclass> queryfilterparams;
268 parse_query_params (request, queryfilterparams, startresults,
269 endresults, phrasematch, logout);
270
271 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
272
273 // do query
274 queryresultsclass queryresults;
275 queryresults.clear();
276
277 int numDocs = endresults-startresults;
278 textsearchptr->setcollectdir (collectdir);
279
280 if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
281 // most likely a system problem
282 logout << text_t2ascii
283 << "system problem: could not do full text browse with mgpp for index \""
284 << (*query_here).index << (*query_here).subcollection
285 << (*query_here).language << "\".\n\n";
286 err = systemProblem;
287 return;
288 }
289
290 // assemble the term results
291 TermInfo_t terminfo;
292
293 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
294 termfreqclassarray::iterator terms_end = queryresults.terms.end();
295
296 while (terms_here != terms_end) {
297 terminfo.clear();
298 terminfo.term = (*terms_here).termstr;
299 terminfo.freq = (*terms_here).termfreq;
300
301 response.termInfo.push_back (terminfo);
302
303 ++terms_here;
304 }
305
306
307}
308
309// mgppsearchptr and gdbmptr are assumed to be valid
310void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
311 const vector<queryparamclass> &query_params,
312 queryresultsclass &multiresults,
313 comerror_t &err, ostream &logout) {
314 outconvertclass text_t2ascii;
315
316 err = noError;
317 textsearchptr->setcollectdir (collectdir);
318 multiresults.clear();
319
320 vector<queryparamclass>::const_iterator query_here = query_params.begin();
321 vector<queryparamclass>::const_iterator query_end = query_params.end();
322 while (query_here != query_end) {
323 queryresultsclass thisqueryresults;
324 text_t indx((*query_here).index);
325 if (!textsearchptr->search((*query_here), thisqueryresults)) {
326 // most likely a system problem
327 logout << text_t2ascii
328 << "system problem: could not do search with mgpp for index \""
329 << (*query_here).index << (*query_here).subcollection
330 << (*query_here).language << "\".\n\n";
331 err = systemProblem;
332 return;
333 }
334
335 // check for syntax error
336 if (thisqueryresults.syntax_error==true) {
337 logout << text_t2ascii
338 << "syntax problem: invalid query string \""
339 << (*query_here).querystring<<"\".\n";
340 err = syntaxError;
341 return;
342 }
343 // combine the results
344 if (need_matching_docs (request.filterResultOptions)) {
345
346 if (query_params.size() == 1) {
347 multiresults.docs = thisqueryresults.docs; // just one set of results
348 multiresults.docs_matched = thisqueryresults.docs_matched;
349 multiresults.is_approx = thisqueryresults.is_approx;
350
351 } else {
352 if ((*query_here).combinequery == "and") {
353 multiresults.docs.combine_and (thisqueryresults.docs);
354 } else if ((*query_here).combinequery == "or") {
355 multiresults.docs.combine_or (thisqueryresults.docs);
356 } else if ((*query_here).combinequery == "not") {
357 multiresults.docs.combine_not (thisqueryresults.docs);
358 }
359 multiresults.docs_matched = multiresults.docs.docset.size();
360 multiresults.is_approx = Exact;
361 }
362 }
363
364 // combine the term information
365 if (need_term_info (request.filterResultOptions)) {
366 // append the terms
367 multiresults.orgterms.insert(multiresults.orgterms.end(),
368 thisqueryresults.orgterms.begin(),
369 thisqueryresults.orgterms.end());
370
371
372 // add the term variants -
373 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
374 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
375 while (termvar_here != termvar_end) {
376 multiresults.termvariants.insert(*termvar_here);
377 ++termvar_here;
378 }
379 }
380
381 ++query_here;
382 }
383
384 // sort and unique the query terms
385 multiresults.sortuniqqueryterms ();
386}
387
388
389
Note: See TracBrowser for help on using the repository browser.