source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 1908

Last change on this file since 1908 was 1835, checked in by kjm18, 23 years ago

BrowseFields filteroption renamed IndexField - uses indexfieldmap in build.cfg
Level info also set in collect.cfg.
termvariants now present.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // Document, Section, Paragraph
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filtopt.validValues.push_back("Document");
51 filtopt.defaultValue = "Document";
52 filterOptions["Level"] = filtopt;
53
54 // -- IndexField, enumerated, used to list available fields
55 // ZZ used to represent "AllFields"
56 filtopt.clear();
57 filtopt.name = "IndexField";
58 filtopt.type = FilterOption_t::enumeratedt;
59 filtopt.repeatable = FilterOption_t::onePerTerm;
60 filtopt.validValues.push_back("ZZ");
61 filtopt.defaultValue = "ZZ";
62 filterOptions["IndexField"] = filtopt;
63
64}
65
66mgppqueryfilterclass::~mgppqueryfilterclass () {
67}
68
69
70//whether a query is a full text browse
71bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
72 return (filterRequestOptions & FRfullTextBrowse);
73}
74
75void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
76 queryfilterclass::configure(key, cfgline);
77
78 if (key == "indexfieldmap") {
79 indexfieldmap.importmap (cfgline);
80
81 // update the list of indexes in the filter information
82 text_tarray options;
83 indexfieldmap.gettoarray (options);
84
85 text_tarray::const_iterator here = options.begin();
86 text_tarray::const_iterator end = options.end();
87
88 while (here !=end) {
89 if (!(*here).empty())
90 filterOptions["IndexField"].validValues.push_back(*here);
91 here++;
92 }
93 }
94
95 if (key == "levels") {
96 text_tarray::const_iterator here = cfgline.begin();
97 text_tarray::const_iterator end = cfgline.end();
98
99 while (here != end) {
100 if (!(*here).empty())
101 filterOptions["Level"].validValues.push_back(*here);
102 here++;
103 }
104 }
105}
106
107
108void mgppqueryfilterclass::filter(const FilterRequest_t &request,
109 FilterResponse_t &response,
110 comerror_t &err, ostream &logout) {
111
112
113 outconvertclass text_t2ascii;
114
115 response.clear ();
116 err = noError;
117 if (gdbmptr == NULL) {
118 // most likely a configuration problem
119 logout << text_t2ascii
120 << "configuration error: queryfilter contains a null gdbmclass\n\n";
121 err = configurationError;
122 return;
123 }
124 if (mgsearchptr == NULL) {
125 // most likely a configuration problem
126 logout << text_t2ascii
127 << "configuration error: queryfilter contains a null mgppsearchclass\n\n";
128 err = configurationError;
129 return;
130 }
131 if (full_text_browse(request.filterResultOptions)) {
132 browsefilter(request, response, err, logout);
133 return;
134 }
135 // open the database
136 gdbmptr->setlogout(&logout);
137 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
138 // most likely a system problem (we have already checked that the
139 // gdbm database exists)
140 logout << text_t2ascii
141 << "system problem: open on gdbm database \""
142 << gdbm_filename << "\" failed\n\n";
143 err = systemProblem;
144 return;
145 }
146
147
148 // get the query parameters
149 int startresults, endresults;
150 text_t phrasematch; // not used here any more
151 vector<queryparamclass> queryfilterparams;
152 parse_query_params (request, queryfilterparams, startresults,
153 endresults, phrasematch, logout);
154
155
156 // do query
157 queryresultsclass queryresults;
158 do_multi_query (request, queryfilterparams, queryresults, err, logout);
159 if (err != noError) return;
160
161 // assemble document results
162 if (need_matching_docs (request.filterResultOptions)) {
163
164 int resultnum = 1;
165 ResultDocInfo_t resultdoc;
166 text_t trans_OID;
167 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
168 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
169
170 if (endresults == -1) endresults = MAXNUMDOCS;
171 while (docorder_here != docorder_end) {
172 if (resultnum > endresults) break;
173
174 // translate the document number
175 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
176 logout << text_t2ascii
177 << "warning: could not translate mgpp document number \""
178 << *docorder_here << "\"to OID.\n\n";
179
180 } else {
181 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
182
183 // see if there is a result for this number,
184 // if it is in the request set (or the request set is empty)
185 if (docset_here != queryresults.docs.docset.end() &&
186 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
187 if (resultnum >= startresults) {
188 // add this document
189 resultdoc.OID = trans_OID;
190 resultdoc.result_num = resultnum;
191 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
192
193 response.docInfo.push_back (resultdoc);
194 }
195
196 resultnum++;
197 }
198 } // else
199
200 docorder_here++;
201 }
202 } // if need matching docs
203
204 // assemble the term results
205 if (need_term_info(request.filterResultOptions)) {
206 // note: the terms have already been sorted and uniqued - ?? have they??
207
208 TermInfo_t terminfo;
209 bool terms_first = true;
210
211 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
212 termfreqclassarray::iterator terms_end = queryresults.terms.end();
213
214 while (terms_here != terms_end) {
215 terminfo.clear();
216 terminfo.term = (*terms_here).termstr;
217 terminfo.freq = (*terms_here).termfreq;
218
219 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
220 if (terms_first) {
221 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
222 text_tset::iterator termvariants_end = queryresults.termvariants.end();
223 while (termvariants_here != termvariants_end) {
224 terminfo.matchTerms.push_back (*termvariants_here);
225 termvariants_here++;
226 }
227 }
228 terms_first = false;
229
230 response.termInfo.push_back (terminfo);
231
232 terms_here++;
233 }
234 }
235
236 response.numDocs = queryresults.docs_matched;
237 response.isApprox = queryresults.is_approx;
238}
239
240void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
241 FilterResponse_t &response,
242 comerror_t &err, ostream &logout) {
243
244 outconvertclass text_t2ascii;
245
246 // get the query parameters
247 int startresults, endresults;
248 text_t phrasematch; // not used here any more, just have it so can use
249 // parse_query_params function
250
251 vector<queryparamclass> queryfilterparams;
252 parse_query_params (request, queryfilterparams, startresults,
253 endresults, phrasematch, logout);
254
255 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
256
257 // do query
258 queryresultsclass queryresults;
259 queryresults.clear();
260
261 int numDocs = endresults-startresults;
262 mgsearchptr->setcollectdir (collectdir);
263
264 if (!((mgppsearchclass*)mgsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
265 // most likely a system problem
266 logout << text_t2ascii
267 << "system problem: could not do full text browse with mgpp for index \""
268 << (*query_here).index << (*query_here).subcollection
269 << (*query_here).language << "\".\n\n";
270 err = systemProblem;
271 return;
272 }
273
274 // assemble the term results
275 TermInfo_t terminfo;
276
277 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
278 termfreqclassarray::iterator terms_end = queryresults.terms.end();
279
280 while (terms_here != terms_end) {
281 terminfo.clear();
282 terminfo.term = (*terms_here).termstr;
283 terminfo.freq = (*terms_here).termfreq;
284
285 response.termInfo.push_back (terminfo);
286
287 terms_here++;
288 }
289
290
291}
292
293// mgppsearchptr and gdbmptr are assumed to be valid
294void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
295 const vector<queryparamclass> &query_params,
296 queryresultsclass &multiresults,
297 comerror_t &err, ostream &logout) {
298 outconvertclass text_t2ascii;
299
300 err = noError;
301 mgsearchptr->setcollectdir (collectdir);
302 multiresults.clear();
303
304 vector<queryparamclass>::const_iterator query_here = query_params.begin();
305 vector<queryparamclass>::const_iterator query_end = query_params.end();
306 while (query_here != query_end) {
307 queryresultsclass thisqueryresults;
308 text_t indx((*query_here).index);
309 if (!mgsearchptr->search((*query_here), thisqueryresults)) {
310 // most likely a system problem
311 logout << text_t2ascii
312 << "system problem: could not do search with mgpp for index \""
313 << (*query_here).index << (*query_here).subcollection
314 << (*query_here).language << "\".\n\n";
315 err = systemProblem;
316 return;
317 }
318
319 // combine the results
320 if (need_matching_docs (request.filterResultOptions)) {
321
322 if (query_params.size() == 1) {
323 multiresults.docs = thisqueryresults.docs; // just one set of results
324 multiresults.docs_matched = thisqueryresults.docs_matched;
325 multiresults.is_approx = thisqueryresults.is_approx;
326
327 } else {
328 if ((*query_here).combinequery == "and") {
329 multiresults.docs.combine_and (thisqueryresults.docs);
330 } else if ((*query_here).combinequery == "or") {
331 multiresults.docs.combine_or (thisqueryresults.docs);
332 } else if ((*query_here).combinequery == "not") {
333 multiresults.docs.combine_not (thisqueryresults.docs);
334 }
335 multiresults.docs_matched = multiresults.docs.docset.size();
336 multiresults.is_approx = Exact;
337 }
338 }
339
340 // combine the term information
341 if (need_term_info (request.filterResultOptions)) {
342 // append the terms
343 multiresults.orgterms.insert(multiresults.orgterms.end(),
344 thisqueryresults.orgterms.begin(),
345 thisqueryresults.orgterms.end());
346
347
348 // add the term variants -
349 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
350 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
351 while (termvar_here != termvar_end) {
352 multiresults.termvariants.insert(*termvar_here);
353 termvar_here++;
354 }
355 }
356
357 query_here++;
358 }
359
360 // sort and unique the query terms
361 multiresults.sortuniqqueryterms ();
362}
363
364
365
Note: See TracBrowser for help on using the repository browser.