source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 9189

Last change on this file since 9189 was 8483, checked in by kjdon, 20 years ago

in configure, when reading in indexes or levels, clear whats already there - otherwise it just keeps adding the same levels over again in local library whenever a reconfigure thing is done

  • Property svn:keywords set to Author Date Id Revision
File size: 11.9 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27
28#include "mgppqueryfilter.h"
29#include "fileutil.h"
30#include <assert.h>
31#include "mgppsearch.h"
32
33/////////////////////////////////
34// functions for queryfilterclass
35/////////////////////////////////
36
37
38mgppqueryfilterclass::mgppqueryfilterclass ()
39 : queryfilterclass() {
40
41
42 FilterOption_t filtopt;
43
44 // -- onePerTerm Level enumerated
45 // likely to be Doc, Sec, Para, but we dont assume anything now
46 filtopt.clear();
47 filtopt.name = "Level";
48 filtopt.type = FilterOption_t::enumeratedt;
49 filtopt.repeatable = FilterOption_t::onePerTerm;
50 filterOptions["Level"] = filtopt;
51
52 // -- IndexField, enumerated, used to list available fields
53 filtopt.clear();
54 filtopt.name = "IndexField";
55 filtopt.type = FilterOption_t::enumeratedt;
56 filtopt.repeatable = FilterOption_t::onePerTerm;
57 filtopt.defaultValue = "";
58 filterOptions["IndexField"] = filtopt;
59
60}
61
62mgppqueryfilterclass::~mgppqueryfilterclass () {
63}
64
65
66//whether a query is a full text browse
67bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
68 return (filterRequestOptions & FRfullTextBrowse);
69}
70
71void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72 queryfilterclass::configure(key, cfgline);
73
74 if (key == "indexfieldmap") {
75 indexfieldmap.importmap (cfgline);
76
77 // update the list of indexes in the filter information
78 text_tarray options;
79 indexfieldmap.gettoarray (options);
80 filterOptions["IndexField"].validValues.erase(filterOptions["IndexField"].validValues.begin(), filterOptions["IndexField"].validValues.end());
81 text_tarray::const_iterator here = options.begin();
82 text_tarray::const_iterator end = options.end();
83 bool start = true;
84 while (here !=end) {
85 if (!(*here).empty()) {
86 filterOptions["IndexField"].validValues.push_back(*here);
87 if (start) {
88 filterOptions["IndexField"].defaultValue = *here;
89 start = false;
90 }
91 }
92 here++;
93 }
94
95 } else if (key == "indexlevels") {
96 text_tarray::const_iterator here = cfgline.begin();
97 text_tarray::const_iterator end = cfgline.end();
98 bool first=true;
99 filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
100 while (here != end) {
101 if (!(*here).empty()) {
102 if (first) {
103 first = false;
104 // the default is the first value
105 filterOptions["Level"].defaultValue = *here;
106 }
107 filterOptions["Level"].validValues.push_back(*here);
108 }
109 here ++;
110 }
111 } else if (key == "textlevel") {
112 ((mgppsearchclass *)textsearchptr)->set_gdbm_level( cfgline[0]);
113 }
114
115}
116
117
118void mgppqueryfilterclass::filter(const FilterRequest_t &request,
119 FilterResponse_t &response,
120 comerror_t &err, ostream &logout) {
121
122
123 outconvertclass text_t2ascii;
124
125 response.clear ();
126 err = noError;
127 if (gdbmptr == NULL) {
128 // most likely a configuration problem
129 logout << text_t2ascii
130 << "configuration error: queryfilter contains a null gdbmclass\n\n";
131 err = configurationError;
132 return;
133 }
134 if (textsearchptr == NULL) {
135 // most likely a configuration problem
136 logout << text_t2ascii
137 << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
138 err = configurationError;
139 return;
140 }
141 if (full_text_browse(request.filterResultOptions)) {
142 browsefilter(request, response, err, logout);
143 return;
144 }
145 // open the database
146 gdbmptr->setlogout(&logout);
147 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
148 // most likely a system problem (we have already checked that the
149 // gdbm database exists)
150 logout << text_t2ascii
151 << "system problem: open on gdbm database \""
152 << gdbm_filename << "\" failed\n\n";
153 err = systemProblem;
154 return;
155 }
156
157
158 // get the query parameters
159 int startresults, endresults;
160 text_t phrasematch; // not used here any more
161 vector<queryparamclass> queryfilterparams;
162 parse_query_params (request, queryfilterparams, startresults,
163 endresults, phrasematch, logout);
164
165
166 // do query
167 queryresultsclass queryresults;
168 do_multi_query (request, queryfilterparams, queryresults, err, logout);
169 if (err != noError) return;
170 // assemble document results
171 if (need_matching_docs (request.filterResultOptions)) {
172
173 int resultnum = 1;
174 ResultDocInfo_t resultdoc;
175 text_t trans_OID;
176 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
177 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
178
179 if (endresults == -1) endresults = MAXNUMDOCS;
180 while (docorder_here != docorder_end) {
181 if (resultnum > endresults) break;
182
183 // translate the document number
184 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
185 logout << text_t2ascii
186 << "warning: could not translate mgpp document number \""
187 << *docorder_here << "\"to OID.\n\n";
188
189 } else {
190 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
191
192 // see if there is a result for this number,
193 // if it is in the request set (or the request set is empty)
194 if (docset_here != queryresults.docs.docset.end() &&
195 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
196 if (resultnum >= startresults) {
197 // add this document
198 resultdoc.OID = trans_OID;
199 resultdoc.result_num = resultnum;
200 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
201
202 response.docInfo.push_back (resultdoc);
203 }
204
205 resultnum++;
206 }
207 } // else
208
209 docorder_here++;
210 }
211 } // if need matching docs
212
213 // assemble the term results
214 if (need_term_info(request.filterResultOptions)) {
215 // note: the terms have already been sorted and uniqued - ?? have they??
216
217 TermInfo_t terminfo;
218 bool terms_first = true;
219
220 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
221 termfreqclassarray::iterator terms_end = queryresults.terms.end();
222
223 while (terms_here != terms_end) {
224 terminfo.clear();
225 terminfo.term = (*terms_here).termstr;
226 terminfo.freq = (*terms_here).termfreq;
227
228 // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
229 if (terms_first) {
230 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
231 text_tset::iterator termvariants_end = queryresults.termvariants.end();
232 while (termvariants_here != termvariants_end) {
233 terminfo.matchTerms.push_back (*termvariants_here);
234 termvariants_here++;
235 }
236 }
237 terms_first = false;
238
239 response.termInfo.push_back (terminfo);
240
241 terms_here++;
242 }
243 }
244
245 response.numDocs = queryresults.docs_matched;
246 response.isApprox = queryresults.is_approx;
247}
248
249void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
250 FilterResponse_t &response,
251 comerror_t &err, ostream &logout) {
252
253 outconvertclass text_t2ascii;
254
255 // get the query parameters
256 int startresults, endresults;
257 text_t phrasematch; // not used here any more, just have it so can use
258 // parse_query_params function
259
260 vector<queryparamclass> queryfilterparams;
261 parse_query_params (request, queryfilterparams, startresults,
262 endresults, phrasematch, logout);
263
264 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
265
266 // do query
267 queryresultsclass queryresults;
268 queryresults.clear();
269
270 int numDocs = endresults-startresults;
271 textsearchptr->setcollectdir (collectdir);
272
273 if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
274 // most likely a system problem
275 logout << text_t2ascii
276 << "system problem: could not do full text browse with mgpp for index \""
277 << (*query_here).index << (*query_here).subcollection
278 << (*query_here).language << "\".\n\n";
279 err = systemProblem;
280 return;
281 }
282
283 // assemble the term results
284 TermInfo_t terminfo;
285
286 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
287 termfreqclassarray::iterator terms_end = queryresults.terms.end();
288
289 while (terms_here != terms_end) {
290 terminfo.clear();
291 terminfo.term = (*terms_here).termstr;
292 terminfo.freq = (*terms_here).termfreq;
293
294 response.termInfo.push_back (terminfo);
295
296 terms_here++;
297 }
298
299
300}
301
302// mgppsearchptr and gdbmptr are assumed to be valid
303void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
304 const vector<queryparamclass> &query_params,
305 queryresultsclass &multiresults,
306 comerror_t &err, ostream &logout) {
307 outconvertclass text_t2ascii;
308
309 err = noError;
310 textsearchptr->setcollectdir (collectdir);
311 multiresults.clear();
312
313 vector<queryparamclass>::const_iterator query_here = query_params.begin();
314 vector<queryparamclass>::const_iterator query_end = query_params.end();
315 while (query_here != query_end) {
316 queryresultsclass thisqueryresults;
317 text_t indx((*query_here).index);
318 if (!textsearchptr->search((*query_here), thisqueryresults)) {
319 // most likely a system problem
320 logout << text_t2ascii
321 << "system problem: could not do search with mgpp for index \""
322 << (*query_here).index << (*query_here).subcollection
323 << (*query_here).language << "\".\n\n";
324 err = systemProblem;
325 return;
326 }
327
328 // check for syntax error
329 if (thisqueryresults.syntax_error==true) {
330 logout << text_t2ascii
331 << "syntax problem: invalid query string \""
332 << (*query_here).querystring<<"\".\n";
333 err = syntaxError;
334 return;
335 }
336 // combine the results
337 if (need_matching_docs (request.filterResultOptions)) {
338
339 if (query_params.size() == 1) {
340 multiresults.docs = thisqueryresults.docs; // just one set of results
341 multiresults.docs_matched = thisqueryresults.docs_matched;
342 multiresults.is_approx = thisqueryresults.is_approx;
343
344 } else {
345 if ((*query_here).combinequery == "and") {
346 multiresults.docs.combine_and (thisqueryresults.docs);
347 } else if ((*query_here).combinequery == "or") {
348 multiresults.docs.combine_or (thisqueryresults.docs);
349 } else if ((*query_here).combinequery == "not") {
350 multiresults.docs.combine_not (thisqueryresults.docs);
351 }
352 multiresults.docs_matched = multiresults.docs.docset.size();
353 multiresults.is_approx = Exact;
354 }
355 }
356
357 // combine the term information
358 if (need_term_info (request.filterResultOptions)) {
359 // append the terms
360 multiresults.orgterms.insert(multiresults.orgterms.end(),
361 thisqueryresults.orgterms.begin(),
362 thisqueryresults.orgterms.end());
363
364
365 // add the term variants -
366 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
367 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
368 while (termvar_here != termvar_end) {
369 multiresults.termvariants.insert(*termvar_here);
370 termvar_here++;
371 }
372 }
373
374 query_here++;
375 }
376
377 // sort and unique the query terms
378 multiresults.sortuniqqueryterms ();
379}
380
381
382
Note: See TracBrowser for help on using the repository browser.