source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 13780

Last change on this file since 13780 was 13780, checked in by mdewsnip, 17 years ago

GLI/LOCAL LIBRARY: To prevent the problems with the GLI being unable to install newly built collections because the local library is holding files open, much more care needs to be taken to close files (typically the GDBM database and the MG/MGPP index files) after use. Fixed a lot of places where files were being left open.

  • Property svn:keywords set to Author Date Id Revision
File size: 12.2 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // likely to be Doc, Sec, Para, but we dont assume anything now
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filterOptions["Level"] = filtopt;
51
52 // -- IndexField, enumerated, used to list available fields
53 filtopt.clear();
54 filtopt.name = "IndexField";
55 filtopt.type = FilterOption_t::enumeratedt;
56 filtopt.repeatable = FilterOption_t::onePerTerm;
57 filtopt.defaultValue = "";
58 filterOptions["IndexField"] = filtopt;
59
60}
61
62mgppqueryfilterclass::~mgppqueryfilterclass () {
63}
64
65
66//whether a query is a full text browse
67bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
68 return (filterRequestOptions & FRfullTextBrowse);
69}
70
71void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72 queryfilterclass::configure(key, cfgline);
73
74 if (key == "indexfieldmap") {
75 indexfieldmap.importmap (cfgline);
76
77 // update the list of indexes in the filter information
78 text_tarray options;
79 indexfieldmap.gettoarray (options);
80 filterOptions["IndexField"].validValues = options;
81
82 } else if (key == "indexlevels") {
83 text_tarray::const_iterator here = cfgline.begin();
84 text_tarray::const_iterator end = cfgline.end();
85 bool first=true;
86 filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
87 while (here != end) {
88 if (!(*here).empty()) {
89 if (first) {
90 first = false;
91 // the default is the first value
92 filterOptions["Level"].defaultValue = *here;
93 }
94 filterOptions["Level"].validValues.push_back(*here);
95 }
96 ++here;
97 }
98 } else if (key == "textlevel") {
99 ((mgppsearchclass *)textsearchptr)->set_gdbm_level( cfgline[0]);
100 } else if (key == "indexstem") {
101 ((mgppsearchclass *)textsearchptr)->set_indexstem (cfgline[0]);
102 } else if (key == "defaultindex") { // used for fields in mgpp
103 indexfieldmap.from2to (cfgline[0], filterOptions["IndexField"].defaultValue);
104 }
105
106}
107
108bool mgppqueryfilterclass::init (ostream &logout) {
109
110 if (!queryfilterclass::init(logout)) {
111 return false;
112 }
113
114 if (filterOptions["IndexField"].defaultValue.empty()) {
115 // use first index in map as default if no default is set explicitly
116 text_tarray fromarray;
117 indexfieldmap.getfromarray(fromarray);
118 if (fromarray.size()) {
119 filterOptions["IndexField"].defaultValue = fromarray[0];
120 }
121 }
122 return true;
123}
124
125void mgppqueryfilterclass::filter(const FilterRequest_t &request,
126 FilterResponse_t &response,
127 comerror_t &err, ostream &logout) {
128
129
130 outconvertclass text_t2ascii;
131
132 response.clear ();
133 err = noError;
134 if (gdbmptr == NULL) {
135 // most likely a configuration problem
136 logout << text_t2ascii
137 << "configuration error: queryfilter contains a null gdbmclass\n\n";
138 err = configurationError;
139 return;
140 }
141 if (textsearchptr == NULL) {
142 // most likely a configuration problem
143 logout << text_t2ascii
144 << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
145 err = configurationError;
146 return;
147 }
148 if (full_text_browse(request.filterResultOptions)) {
149 browsefilter(request, response, err, logout);
150 return;
151 }
152 // open the database
153 gdbmptr->setlogout(&logout);
154 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
155 // most likely a system problem (we have already checked that the
156 // gdbm database exists)
157 logout << text_t2ascii
158 << "system problem: open on gdbm database \""
159 << gdbm_filename << "\" failed\n\n";
160 err = systemProblem;
161 return;
162 }
163
164
165 // get the query parameters
166 int startresults, endresults;
167 text_t phrasematch; // not used here any more
168 vector<queryparamclass> queryfilterparams;
169 parse_query_params (request, queryfilterparams, startresults,
170 endresults, phrasematch, logout);
171
172
173 // do query
174 queryresultsclass queryresults;
175 do_multi_query (request, queryfilterparams, queryresults, err, logout);
176 if (err != noError) return;
177 // assemble document results
178 if (need_matching_docs (request.filterResultOptions)) {
179
180 int resultnum = 1;
181 ResultDocInfo_t resultdoc;
182 text_t trans_OID;
183 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
184 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
185
186 if (endresults == -1) endresults = MAXNUMDOCS;
187 while (docorder_here != docorder_end) {
188 if (resultnum > endresults) break;
189
190 // translate the document number
191 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
192 logout << text_t2ascii
193 << "warning: could not translate mgpp document number \""
194 << *docorder_here << "\"to OID.\n\n";
195
196 } else {
197 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
198
199 // see if there is a result for this number,
200 // if it is in the request set (or the request set is empty)
201 if (docset_here != queryresults.docs.docset.end() &&
202 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
203 if (resultnum >= startresults) {
204 // add this document
205 resultdoc.OID = trans_OID;
206 resultdoc.result_num = resultnum;
207 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
208
209 response.docInfo.push_back (resultdoc);
210 }
211
212 ++resultnum;
213 }
214 } // else
215
216 ++docorder_here;
217 }
218 } // if need matching docs
219
220 // assemble the term results
221 if (need_term_info(request.filterResultOptions)) {
222 // note: the terms have already been sorted and uniqued - ?? have they??
223
224 TermInfo_t terminfo;
225 bool terms_first = true;
226
227 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
228 termfreqclassarray::iterator terms_end = queryresults.terms.end();
229
230 while (terms_here != terms_end) {
231 terminfo.clear();
232 terminfo.term = (*terms_here).termstr;
233 terminfo.freq = (*terms_here).termfreq;
234
235 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
236 if (terms_first) {
237 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
238 text_tset::iterator termvariants_end = queryresults.termvariants.end();
239 while (termvariants_here != termvariants_end) {
240 terminfo.matchTerms.push_back (*termvariants_here);
241 ++termvariants_here;
242 }
243 }
244 terms_first = false;
245
246 response.termInfo.push_back (terminfo);
247
248 ++terms_here;
249 }
250 }
251
252 gdbmptr->closedatabase(); // Important that local library doesn't leave any files open
253 response.numDocs = queryresults.docs_matched;
254 response.isApprox = queryresults.is_approx;
255}
256
257void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
258 FilterResponse_t &response,
259 comerror_t &err, ostream &logout) {
260
261 outconvertclass text_t2ascii;
262
263 // get the query parameters
264 int startresults, endresults;
265 text_t phrasematch; // not used here any more, just have it so can use
266 // parse_query_params function
267
268 vector<queryparamclass> queryfilterparams;
269 parse_query_params (request, queryfilterparams, startresults,
270 endresults, phrasematch, logout);
271
272 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
273
274 // do query
275 queryresultsclass queryresults;
276 queryresults.clear();
277
278 int numDocs = endresults-startresults;
279 textsearchptr->setcollectdir (collectdir);
280
281 if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
282 // most likely a system problem
283 logout << text_t2ascii
284 << "system problem: could not do full text browse with mgpp for index \""
285 << (*query_here).index << (*query_here).subcollection
286 << (*query_here).language << "\".\n\n";
287 err = systemProblem;
288 return;
289 }
290
291 // assemble the term results
292 TermInfo_t terminfo;
293
294 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
295 termfreqclassarray::iterator terms_end = queryresults.terms.end();
296
297 while (terms_here != terms_end) {
298 terminfo.clear();
299 terminfo.term = (*terms_here).termstr;
300 terminfo.freq = (*terms_here).termfreq;
301
302 response.termInfo.push_back (terminfo);
303
304 ++terms_here;
305 }
306
307
308}
309
310// mgppsearchptr and gdbmptr are assumed to be valid
311void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
312 const vector<queryparamclass> &query_params,
313 queryresultsclass &multiresults,
314 comerror_t &err, ostream &logout) {
315 outconvertclass text_t2ascii;
316
317 err = noError;
318 textsearchptr->setcollectdir (collectdir);
319 multiresults.clear();
320
321 vector<queryparamclass>::const_iterator query_here = query_params.begin();
322 vector<queryparamclass>::const_iterator query_end = query_params.end();
323 while (query_here != query_end) {
324 queryresultsclass thisqueryresults;
325 text_t indx((*query_here).index);
326 if (!textsearchptr->search((*query_here), thisqueryresults)) {
327 // most likely a system problem
328 logout << text_t2ascii
329 << "system problem: could not do search with mgpp for index \""
330 << (*query_here).index << (*query_here).subcollection
331 << (*query_here).language << "\".\n\n";
332 err = systemProblem;
333 return;
334 }
335
336 // check for syntax error
337 if (thisqueryresults.syntax_error==true) {
338 logout << text_t2ascii
339 << "syntax problem: invalid query string \""
340 << (*query_here).querystring<<"\".\n";
341 err = syntaxError;
342 return;
343 }
344 // combine the results
345 if (need_matching_docs (request.filterResultOptions)) {
346
347 if (query_params.size() == 1) {
348 multiresults.docs = thisqueryresults.docs; // just one set of results
349 multiresults.docs_matched = thisqueryresults.docs_matched;
350 multiresults.is_approx = thisqueryresults.is_approx;
351
352 } else {
353 if ((*query_here).combinequery == "and") {
354 multiresults.docs.combine_and (thisqueryresults.docs);
355 } else if ((*query_here).combinequery == "or") {
356 multiresults.docs.combine_or (thisqueryresults.docs);
357 } else if ((*query_here).combinequery == "not") {
358 multiresults.docs.combine_not (thisqueryresults.docs);
359 }
360 multiresults.docs_matched = multiresults.docs.docset.size();
361 multiresults.is_approx = Exact;
362 }
363 }
364
365 // combine the term information
366 if (need_term_info (request.filterResultOptions)) {
367 // append the terms
368 multiresults.orgterms.insert(multiresults.orgterms.end(),
369 thisqueryresults.orgterms.begin(),
370 thisqueryresults.orgterms.end());
371
372
373 // add the term variants -
374 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
375 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
376 while (termvar_here != termvar_end) {
377 multiresults.termvariants.insert(*termvar_here);
378 ++termvar_here;
379 }
380 }
381
382 ++query_here;
383 }
384
385 // sort and unique the query terms
386 multiresults.sortuniqqueryterms ();
387}
388
389
390
Note: See TracBrowser for help on using the repository browser.