source: trunk/gsdl/src/colservr/queryfilter.cpp@ 273

Last change on this file since 273 was 273, checked in by sjboddie, 25 years ago

fixed bug in isApprox and set MAXDOCS to always be 500

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.8 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryfilter.cpp 273 1999-06-16 02:03:25Z sjboddie $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.4 1999/06/16 02:03:25 sjboddie
15 fixed bug in isApprox and set MAXDOCS to always be 500
16
17 Revision 1.3 1999/04/19 23:56:09 rjmcnab
18 Finished the gdbm metadata stuff
19
20 Revision 1.2 1999/04/12 03:45:03 rjmcnab
21 Finished the query filter.
22
23 Revision 1.1 1999/04/06 22:22:09 rjmcnab
24 Initial revision.
25
26 */
27
28
29#include "queryfilter.h"
30#include "fileutil.h"
31#include "queryinfo.h"
32
33#define MAXDOCS 500 // note that maxdocs must be at least as large
34 // as the highest possible value of EndResults
35
36// some useful functions
37
38// translate will return true if successful
39static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
40 infodbclass info;
41
42 trans_OID.clear();
43
44 // get the info
45 if (gdbmptr == NULL) return false;
46 if (!gdbmptr->getinfo(docnum, info)) return false;
47
48 // translate
49 if (info["section"].empty()) return false;
50
51 trans_OID = info["section"];
52 return true;
53}
54
55
56
57queryfilterclass::queryfilterclass () {
58 gdbmptr = NULL;
59 mgsearchptr = NULL;
60
61 // -- onePerQuery StartResults integer
62 FilterOption_t filtopt;
63 filtopt.name = "StartResults";
64 filtopt.type = FilterOption_t::integert;
65 filtopt.repeatable = FilterOption_t::onePerQuery;
66 filtopt.defaultValue = "1";
67 filtopt.validValues.push_back("1");
68 filtopt.validValues.push_back("1000");
69 filterOptions["StartResults"] = filtopt;
70
71 // -- onePerQuery EndResults integer
72 filtopt.clear();
73 filtopt.name = "EndResults";
74 filtopt.type = FilterOption_t::integert;
75 filtopt.repeatable = FilterOption_t::onePerQuery;
76 filtopt.defaultValue = "10";
77 filtopt.validValues.push_back("1");
78 filtopt.validValues.push_back("1000");
79 filterOptions["EndResults"] = filtopt;
80
81 // -- onePerQuery QueryType enumerated (boolean, ranked)
82 filtopt.clear();
83 filtopt.name = "QueryType";
84 filtopt.type = FilterOption_t::enumeratedt;
85 filtopt.repeatable = FilterOption_t::onePerQuery;
86 filtopt.defaultValue = "ranked";
87 filtopt.validValues.push_back("boolean");
88 filtopt.validValues.push_back("ranked");
89 filterOptions["QueryType"] = filtopt;
90
91 // -- onePerTerm Term string ???
92 filtopt.clear();
93 filtopt.name = "Term";
94 filtopt.type = FilterOption_t::stringt;
95 filtopt.repeatable = FilterOption_t::onePerTerm;
96 filtopt.defaultValue = "";
97 filterOptions["Term"] = filtopt;
98
99 // -- onePerTerm Casefold boolean
100 filtopt.clear();
101 filtopt.name = "Casefold";
102 filtopt.type = FilterOption_t::booleant;
103 filtopt.repeatable = FilterOption_t::onePerTerm;
104 filtopt.defaultValue = "true";
105 filtopt.validValues.push_back("false");
106 filtopt.validValues.push_back("true");
107 filterOptions["Casefold"] = filtopt;
108
109 // -- onePerTerm Stem boolean
110 filtopt.clear();
111 filtopt.name = "Stem";
112 filtopt.type = FilterOption_t::booleant;
113 filtopt.repeatable = FilterOption_t::onePerTerm;
114 filtopt.defaultValue = "false";
115 filtopt.validValues.push_back("false");
116 filtopt.validValues.push_back("true");
117 filterOptions["Stem"] = filtopt;
118
119 // -- onePerTerm Index enumerated
120 filtopt.clear();
121 filtopt.name = "Index";
122 filtopt.type = FilterOption_t::enumeratedt;
123 filtopt.repeatable = FilterOption_t::onePerTerm;
124 filtopt.defaultValue = "";
125 filterOptions["Index"] = filtopt;
126
127 // -- onePerTerm Subcollection enumerated
128 filtopt.clear();
129 filtopt.name = "Subcollection";
130 filtopt.type = FilterOption_t::enumeratedt;
131 filtopt.repeatable = FilterOption_t::onePerTerm;
132 filtopt.defaultValue = "";
133 filterOptions["Subcollection"] = filtopt;
134
135 // -- onePerTerm Language enumerated
136 filtopt.clear();
137 filtopt.name = "Language";
138 filtopt.type = FilterOption_t::enumeratedt;
139 filtopt.repeatable = FilterOption_t::onePerTerm;
140 filtopt.defaultValue = "";
141 filterOptions["Language"] = filtopt;
142}
143
144queryfilterclass::~queryfilterclass () {
145}
146
147void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
148 filterclass::configure (key, cfgline);
149
150 if (key == "indexmap") {
151 indexmap.importmap (cfgline);
152
153 // update the list of indexes in the filter information
154 text_tarray options;
155 indexmap.gettoarray (options);
156 filterOptions["Index"].validValues = options;
157
158 } else if (key == "subcollectionmap") {
159 subcollectionmap.importmap (cfgline);
160
161 // update the list of subcollections in the filter information
162 text_tarray options;
163 subcollectionmap.gettoarray (options);
164 filterOptions["Subcollection"].validValues = options;
165
166 } else if (key == "languagemap") {
167 languagemap.importmap (cfgline);
168
169 // update the list of languages in the filter information
170 text_tarray options;
171 languagemap.gettoarray (options);
172 filterOptions["Language"].validValues = options;
173 }
174}
175
176bool queryfilterclass::init (ostream &logout) {
177 outconvertclass text_t2ascii;
178
179 if (!filterclass::init(logout)) return false;
180
181 // get the filename for the database and make sure it exists
182 gdbm_filename = filename_cat(collectdir,"index","text",collection);
183#ifdef _LITTLE_ENDIAN
184 gdbm_filename += ".ldb";
185#else
186 gdbm_filename += ".bdb";
187#endif
188 if (!file_exists(gdbm_filename)) {
189 logout << text_t2ascii
190 << "error: gdbm database \""
191 << gdbm_filename << "\" does not exist\n\n";
192 return false;
193 }
194
195 return true;
196}
197
198void queryfilterclass::filter (const FilterRequest_t &request,
199 FilterResponse_t &response,
200 comerror_t &err, ostream &logout) {
201 outconvertclass text_t2ascii;
202
203 response.clear ();
204 err = noError;
205 if (gdbmptr == NULL) {
206 // most likely a configuration problem
207 logout << text_t2ascii
208 << "configuration error: queryfilter contains a null gdbmclass\n\n";
209 err = configurationError;
210 return;
211 }
212 if (mgsearchptr == NULL) {
213 // most likely a configuration problem
214 logout << text_t2ascii
215 << "configuration error: queryfilter contains a null mgsearchclass\n\n";
216 err = configurationError;
217 return;
218 }
219
220 // open the database
221 gdbmptr->setlogout(&logout);
222 if (!gdbmptr->opendatabase (gdbm_filename)) {
223 // most likely a system problem (we have already checked that the
224 // gdbm database exists)
225 logout << text_t2ascii
226 << "system problem: open on gdbm database \""
227 << gdbm_filename << "\" failed\n\n";
228 err = systemProblem;
229 return;
230 }
231
232 // get the query parameters
233 int startresults = filterOptions["StartResults"].defaultValue.getint();
234 int endresults = filterOptions["EndResults"].defaultValue.getint();
235 text_t index = filterOptions["Index"].defaultValue;
236 text_t subcollection = filterOptions["Subcollection"].defaultValue;
237 text_t language = filterOptions["Language"].defaultValue;
238 queryparamclass queryparams;
239 queryparams.collection = collection;
240 queryparams.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
241 queryparams.casefolding = (filterOptions["Casefold"].defaultValue == "true");
242 queryparams.stemming = (filterOptions["Stem"].defaultValue == "true");
243
244 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
245 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
246 while (options_here != options_end) {
247 if ((*options_here).name == "StartResults") {
248 startresults = (*options_here).value.getint();
249 } else if ((*options_here).name == "EndResults") {
250 endresults = (*options_here).value.getint();
251 } else if ((*options_here).name == "QueryType") {
252 queryparams.search_type = ((*options_here).value == "ranked");
253 } else if ((*options_here).name == "Term") {
254 queryparams.querystring = (*options_here).value;
255 } else if ((*options_here).name == "Casefold") {
256 queryparams.casefolding = ((*options_here).value == "true");
257 } else if ((*options_here).name == "Stem") {
258 queryparams.stemming = ((*options_here).value == "true");
259 } else if ((*options_here).name == "Index") {
260 index = (*options_here).value;
261 } else if ((*options_here).name == "Subcollection") {
262 subcollection = (*options_here).value;
263 } else if ((*options_here).name == "Language") {
264 language = (*options_here).value;
265 } else {
266 logout << text_t2ascii
267 << "warning: unknown queryfilter option \""
268 << (*options_here).name
269 << "\" ignored.\n\n";
270 }
271
272 options_here++;
273 }
274
275 queryparams.search_index = index+subcollection+language;
276 queryparams.maxdocs = MAXDOCS;
277
278 // do query
279 queryresultsclass queryresults;
280 mgsearchptr->setcollectdir (collectdir);
281 if (!mgsearchptr->search(queryparams, queryresults)) {
282 // most likely a system problem
283 logout << text_t2ascii
284 << "system problem: could not do search with mg for index \""
285 << queryparams.search_index << "\".\n\n";
286 err = systemProblem;
287 return;
288 }
289
290 // assemble document results
291 if ((request.filterResultOptions & FROID) || (request.filterResultOptions & FRranking) ||
292 (request.filterResultOptions & FRmetadata)) {
293 int resultnum = 1;
294 ResultDocInfo_t resultdoc;
295 text_t trans_OID;
296 vector<docresultclass>::iterator docs_here = queryresults.docs.begin();
297 vector<docresultclass>::iterator docs_end = queryresults.docs.end();
298
299 while (docs_here != docs_end) {
300 if (resultnum > endresults) break;
301
302 // translate the document number
303 if (!translate(gdbmptr, (*docs_here).docnum, trans_OID)) {
304 logout << text_t2ascii
305 << "warning: could not translate mg document number \""
306 << (*docs_here).docnum << "\"to OID.\n\n";
307
308 } else {
309 // see if it is in the set (or the set is empty)
310 if (request.docSet.empty() || in_set(request.docSet, trans_OID)) {
311 if (resultnum >= startresults) {
312 // add this document
313 resultdoc.OID = trans_OID;
314 resultdoc.ranking = (int)((*docs_here).docweight * 10000.0 + 0.5);
315 response.docInfo.push_back (resultdoc);
316 }
317
318 resultnum++;
319 }
320 }
321
322 docs_here++;
323 }
324 }
325
326 // assemble the term results
327 if ((request.filterResultOptions & FRtermFreq) || (request.filterResultOptions & FRmatchTerms)) {
328 queryresults.sortqueryterms();
329 queryresults.uniqqueryterms();
330
331 TermInfo_t terminfo;
332 bool terms_first = true;
333 vector<termfreqclass>::iterator terms_here = queryresults.terms.begin();
334 vector<termfreqclass>::iterator terms_end = queryresults.terms.end();
335
336 while (terms_here != terms_end) {
337 terminfo.clear();
338 terminfo.term = (*terms_here).termstr;
339 terminfo.freq = (*terms_here).termfreq;
340 if (terms_first) terminfo.matchTerms = queryresults.termvariants;
341 terms_first = false;
342
343 response.termInfo.push_back (terminfo);
344
345 terms_here++;
346 }
347 }
348
349 response.numDocs = queryresults.getnumdocs();
350 response.isApprox = (queryresults.getnumdocs() == queryparams.maxdocs);
351}
Note: See TracBrowser for help on using the repository browser.