source: trunk/gsdl/src/colservr/mgppqueryfilter.cpp@ 1661

Last change on this file since 1661 was 1520, checked in by nzdl, 24 years ago

fixed compiler warning

  • Property svn:keywords set to Author Date Id Revision
File size: 10.6 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: mgppqueryfilter.cpp 1520 2000-09-07 23:35:07Z nzdl $
25 *
26 *********************************************************************/
27
28
29
30#include "mgppqueryfilter.h"
31#include "fileutil.h"
32#include <assert.h>
33#include "mgppsearch.h"
34
35/////////////////////////////////
36// functions for queryfilterclass
37/////////////////////////////////
38
39
40mgppqueryfilterclass::mgppqueryfilterclass ()
41 : queryfilterclass() {
42
43 FilterOption_t filtopt;
44 // -- onePerTerm Level enumerated
45 filtopt.clear();
46 filtopt.name = "Level";
47 filtopt.type = FilterOption_t::enumeratedt;
48 filtopt.repeatable = FilterOption_t::onePerTerm;
49 filtopt.validValues.push_back("Document");
50 filtopt.validValues.push_back("Section");
51 filtopt.validValues.push_back("Paragraph");
52 filtopt.defaultValue = "Document";
53 filterOptions["Level"] = filtopt;
54
55 // -- BrowseFields, enumerated, used to list available fields
56 filtopt.clear();
57 filtopt.name = "BrowseFields";
58 filtopt.type = FilterOption_t::enumeratedt;
59 filtopt.repeatable = FilterOption_t::onePerTerm;
60 filtopt.validValues.push_back("AllFields");
61 filtopt.defaultValue = "AllFields";
62 filterOptions["BrowseFields"] = filtopt;
63
64}
65
66mgppqueryfilterclass::~mgppqueryfilterclass () {
67}
68
69
70//whether a query is a full text browse
71bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
72 return (filterRequestOptions & FRfullTextBrowse);
73}
74
75void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
76 queryfilterclass::configure(key, cfgline);
77
78 if (key == "indexfields") {
79 text_tarray::const_iterator here = cfgline.begin();
80 text_tarray::const_iterator end = cfgline.end();
81
82 while (here !=end) {
83
84 filterOptions["BrowseFields"].validValues.push_back(*here);
85 here++;
86 }
87 }
88}
89
90
91void mgppqueryfilterclass::filter(const FilterRequest_t &request,
92 FilterResponse_t &response,
93 comerror_t &err, ostream &logout) {
94
95
96 outconvertclass text_t2ascii;
97
98 response.clear ();
99 err = noError;
100 if (gdbmptr == NULL) {
101 // most likely a configuration problem
102 logout << text_t2ascii
103 << "configuration error: queryfilter contains a null gdbmclass\n\n";
104 err = configurationError;
105 return;
106 }
107 if (mgsearchptr == NULL) {
108 // most likely a configuration problem
109 logout << text_t2ascii
110 << "configuration error: queryfilter contains a null mgppsearchclass\n\n";
111 err = configurationError;
112 return;
113 }
114 if (full_text_browse(request.filterResultOptions)) {
115 browsefilter(request, response, err, logout);
116 return;
117 }
118 // open the database
119 gdbmptr->setlogout(&logout);
120 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
121 // most likely a system problem (we have already checked that the
122 // gdbm database exists)
123 logout << text_t2ascii
124 << "system problem: open on gdbm database \""
125 << gdbm_filename << "\" failed\n\n";
126 err = systemProblem;
127 return;
128 }
129
130
131 // get the query parameters
132 int startresults, endresults;
133 text_t phrasematch; // not used here any more
134 vector<queryparamclass> queryfilterparams;
135 parse_query_params (request, queryfilterparams, startresults,
136 endresults, phrasematch, logout);
137
138
139 // do query
140 queryresultsclass queryresults;
141 do_multi_query (request, queryfilterparams, queryresults, err, logout);
142 if (err != noError) return;
143
144 // assemble document results
145 if (need_matching_docs (request.filterResultOptions)) {
146
147 int resultnum = 1;
148 ResultDocInfo_t resultdoc;
149 text_t trans_OID;
150 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
151 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
152
153 if (endresults == -1) endresults = MAXNUMDOCS;
154 while (docorder_here != docorder_end) {
155 if (resultnum > endresults) break;
156
157 // translate the document number
158 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
159 logout << text_t2ascii
160 << "warning: could not translate mgpp document number \""
161 << *docorder_here << "\"to OID.\n\n";
162
163 } else {
164 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
165
166 // see if there is a result for this number,
167 // if it is in the request set (or the request set is empty)
168 if (docset_here != queryresults.docs.docset.end() &&
169 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
170 if (resultnum >= startresults) {
171 // add this document
172 resultdoc.OID = trans_OID;
173 resultdoc.result_num = resultnum;
174 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
175
176 response.docInfo.push_back (resultdoc);
177 }
178
179 resultnum++;
180 }
181 } // else
182
183 docorder_here++;
184 }
185 } // if need matching docs
186
187 // assemble the term results
188 if (need_term_info(request.filterResultOptions)) {
189 // note: the terms have already been sorted and uniqued
190
191 TermInfo_t terminfo;
192 bool terms_first = true;
193
194 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
195 termfreqclassarray::iterator terms_end = queryresults.terms.end();
196
197 while (terms_here != terms_end) {
198 terminfo.clear();
199 terminfo.term = (*terms_here).termstr;
200 terminfo.freq = (*terms_here).termfreq;
201 if (terms_first) {
202 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
203 text_tset::iterator termvariants_end = queryresults.termvariants.end();
204 while (termvariants_here != termvariants_end) {
205 terminfo.matchTerms.push_back (*termvariants_here);
206 termvariants_here++;
207 }
208 }
209 terms_first = false;
210
211 response.termInfo.push_back (terminfo);
212
213 terms_here++;
214 }
215 }
216
217 response.numDocs = queryresults.docs_matched;
218 response.isApprox = queryresults.is_approx;
219}
220
221void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
222 FilterResponse_t &response,
223 comerror_t &err, ostream &logout) {
224
225 outconvertclass text_t2ascii;
226
227 // get the query parameters
228 int startresults, endresults;
229 text_t phrasematch; // not used here any more, just have it so can use
230 // parse_query_params function
231
232 vector<queryparamclass> queryfilterparams;
233 parse_query_params (request, queryfilterparams, startresults,
234 endresults, phrasematch, logout);
235
236 vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
237
238 // do query
239 queryresultsclass queryresults;
240 queryresults.clear();
241
242 int numDocs = endresults-startresults;
243 mgsearchptr->setcollectdir (collectdir);
244
245 if (!((mgppsearchclass*)mgsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
246 // most likely a system problem
247 logout << text_t2ascii
248 << "system problem: could not do full text browse with mgpp for index \""
249 << (*query_here).index << (*query_here).subcollection
250 << (*query_here).language << "\".\n\n";
251 err = systemProblem;
252 return;
253 }
254
255 // assemble the term results
256 TermInfo_t terminfo;
257
258 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
259 termfreqclassarray::iterator terms_end = queryresults.terms.end();
260
261 while (terms_here != terms_end) {
262 terminfo.clear();
263 terminfo.term = (*terms_here).termstr;
264 terminfo.freq = (*terms_here).termfreq;
265
266 response.termInfo.push_back (terminfo);
267
268 terms_here++;
269 }
270
271
272}
273
274// mgppsearchptr and gdbmptr are assumed to be valid
275void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
276 const vector<queryparamclass> &query_params,
277 queryresultsclass &multiresults,
278 comerror_t &err, ostream &logout) {
279 outconvertclass text_t2ascii;
280
281 err = noError;
282 mgsearchptr->setcollectdir (collectdir);
283 multiresults.clear();
284
285 vector<queryparamclass>::const_iterator query_here = query_params.begin();
286 vector<queryparamclass>::const_iterator query_end = query_params.end();
287 while (query_here != query_end) {
288 queryresultsclass thisqueryresults;
289 text_t indx((*query_here).index);
290 if (!mgsearchptr->search((*query_here), thisqueryresults)) {
291 // most likely a system problem
292 logout << text_t2ascii
293 << "system problem: could not do search with mgpp for index \""
294 << (*query_here).index << (*query_here).subcollection
295 << (*query_here).language << "\".\n\n";
296 err = systemProblem;
297 return;
298 }
299
300 // combine the results
301 if (need_matching_docs (request.filterResultOptions)) {
302
303 if (query_params.size() == 1) {
304 multiresults.docs = thisqueryresults.docs; // just one set of results
305 multiresults.docs_matched = thisqueryresults.docs_matched;
306 multiresults.is_approx = thisqueryresults.is_approx;
307
308 } else {
309 if ((*query_here).combinequery == "and") {
310 multiresults.docs.combine_and (thisqueryresults.docs);
311 } else if ((*query_here).combinequery == "or") {
312 multiresults.docs.combine_or (thisqueryresults.docs);
313 } else if ((*query_here).combinequery == "not") {
314 multiresults.docs.combine_not (thisqueryresults.docs);
315 }
316 multiresults.docs_matched = multiresults.docs.docset.size();
317 multiresults.is_approx = Exact;
318 }
319 }
320
321 // combine the term information
322 if (need_term_info (request.filterResultOptions)) {
323 // append the terms
324 multiresults.orgterms.insert(multiresults.orgterms.end(),
325 thisqueryresults.orgterms.begin(),
326 thisqueryresults.orgterms.end());
327
328
329 // add the term variants -
330 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
331 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
332 while (termvar_here != termvar_end) {
333 multiresults.termvariants.insert(*termvar_here);
334 termvar_here++;
335 }
336 }
337
338 query_here++;
339 }
340
341 // sort and unique the query terms
342 multiresults.sortuniqqueryterms ();
343}
344
345
346
Note: See TracBrowser for help on using the repository browser.