source: trunk/gsdl/src/colservr/queryfilter.cpp@ 311

Last change on this file since 311 was 311, checked in by rjmcnab, 25 years ago

Added a couple of fields to queryinfo to handle a special version
of mg.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.6 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryfilter.cpp 311 1999-06-29 22:06:23Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.6 1999/06/29 22:06:23 rjmcnab
15 Added a couple of fields to queryinfo to handle a special version
16 of mg.
17
18 Revision 1.5 1999/06/27 22:08:48 sjboddie
19 now check for defaultindex, defaultsubcollection, and defaultlanguage
20 entries in config files
21
22 Revision 1.4 1999/06/16 02:03:25 sjboddie
23 fixed bug in isApprox and set MAXDOCS to always be 500
24
25 Revision 1.3 1999/04/19 23:56:09 rjmcnab
26 Finished the gdbm metadata stuff
27
28 Revision 1.2 1999/04/12 03:45:03 rjmcnab
29 Finished the query filter.
30
31 Revision 1.1 1999/04/06 22:22:09 rjmcnab
32 Initial revision.
33
34 */
35
36
37#include "queryfilter.h"
38#include "fileutil.h"
39#include "queryinfo.h"
40
41#define MAXDOCS 500 // note that maxdocs must be at least as large
42 // as the highest possible value of EndResults
43
44// some useful functions
45
46// translate will return true if successful
47static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
48 infodbclass info;
49
50 trans_OID.clear();
51
52 // get the info
53 if (gdbmptr == NULL) return false;
54 if (!gdbmptr->getinfo(docnum, info)) return false;
55
56 // translate
57 if (info["section"].empty()) return false;
58
59 trans_OID = info["section"];
60 return true;
61}
62
63
64
65queryfilterclass::queryfilterclass () {
66 gdbmptr = NULL;
67 mgsearchptr = NULL;
68
69 // -- onePerQuery StartResults integer
70 FilterOption_t filtopt;
71 filtopt.name = "StartResults";
72 filtopt.type = FilterOption_t::integert;
73 filtopt.repeatable = FilterOption_t::onePerQuery;
74 filtopt.defaultValue = "1";
75 filtopt.validValues.push_back("1");
76 filtopt.validValues.push_back("1000");
77 filterOptions["StartResults"] = filtopt;
78
79 // -- onePerQuery EndResults integer
80 filtopt.clear();
81 filtopt.name = "EndResults";
82 filtopt.type = FilterOption_t::integert;
83 filtopt.repeatable = FilterOption_t::onePerQuery;
84 filtopt.defaultValue = "10";
85 filtopt.validValues.push_back("1");
86 filtopt.validValues.push_back("1000");
87 filterOptions["EndResults"] = filtopt;
88
89 // -- onePerQuery QueryType enumerated (boolean, ranked)
90 filtopt.clear();
91 filtopt.name = "QueryType";
92 filtopt.type = FilterOption_t::enumeratedt;
93 filtopt.repeatable = FilterOption_t::onePerQuery;
94 filtopt.defaultValue = "ranked";
95 filtopt.validValues.push_back("boolean");
96 filtopt.validValues.push_back("ranked");
97 filterOptions["QueryType"] = filtopt;
98
99 // -- onePerTerm Term string ???
100 filtopt.clear();
101 filtopt.name = "Term";
102 filtopt.type = FilterOption_t::stringt;
103 filtopt.repeatable = FilterOption_t::onePerTerm;
104 filtopt.defaultValue = "";
105 filterOptions["Term"] = filtopt;
106
107 // -- onePerTerm Casefold boolean
108 filtopt.clear();
109 filtopt.name = "Casefold";
110 filtopt.type = FilterOption_t::booleant;
111 filtopt.repeatable = FilterOption_t::onePerTerm;
112 filtopt.defaultValue = "true";
113 filtopt.validValues.push_back("false");
114 filtopt.validValues.push_back("true");
115 filterOptions["Casefold"] = filtopt;
116
117 // -- onePerTerm Stem boolean
118 filtopt.clear();
119 filtopt.name = "Stem";
120 filtopt.type = FilterOption_t::booleant;
121 filtopt.repeatable = FilterOption_t::onePerTerm;
122 filtopt.defaultValue = "false";
123 filtopt.validValues.push_back("false");
124 filtopt.validValues.push_back("true");
125 filterOptions["Stem"] = filtopt;
126
127 // -- onePerTerm Index enumerated
128 filtopt.clear();
129 filtopt.name = "Index";
130 filtopt.type = FilterOption_t::enumeratedt;
131 filtopt.repeatable = FilterOption_t::onePerTerm;
132 filtopt.defaultValue = "";
133 filterOptions["Index"] = filtopt;
134
135 // -- onePerTerm Subcollection enumerated
136 filtopt.clear();
137 filtopt.name = "Subcollection";
138 filtopt.type = FilterOption_t::enumeratedt;
139 filtopt.repeatable = FilterOption_t::onePerTerm;
140 filtopt.defaultValue = "";
141 filterOptions["Subcollection"] = filtopt;
142
143 // -- onePerTerm Language enumerated
144 filtopt.clear();
145 filtopt.name = "Language";
146 filtopt.type = FilterOption_t::enumeratedt;
147 filtopt.repeatable = FilterOption_t::onePerTerm;
148 filtopt.defaultValue = "";
149 filterOptions["Language"] = filtopt;
150}
151
152queryfilterclass::~queryfilterclass () {
153}
154
155void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
156 filterclass::configure (key, cfgline);
157
158 if (key == "indexmap") {
159 indexmap.importmap (cfgline);
160
161 // update the list of indexes in the filter information
162 text_tarray options;
163 indexmap.gettoarray (options);
164 filterOptions["Index"].validValues = options;
165
166 } else if (key == "defaultindex") {
167 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
168
169 } else if (key == "subcollectionmap") {
170 subcollectionmap.importmap (cfgline);
171
172 // update the list of subcollections in the filter information
173 text_tarray options;
174 subcollectionmap.gettoarray (options);
175 filterOptions["Subcollection"].validValues = options;
176
177 } else if (key == "defaultsubcollection") {
178 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
179
180 } else if (key == "languagemap") {
181 languagemap.importmap (cfgline);
182
183 // update the list of languages in the filter information
184 text_tarray options;
185 languagemap.gettoarray (options);
186 filterOptions["Language"].validValues = options;
187
188 } else if (key == "defaultlanguage")
189 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
190}
191
192bool queryfilterclass::init (ostream &logout) {
193 outconvertclass text_t2ascii;
194
195 if (!filterclass::init(logout)) return false;
196
197 // get the filename for the database and make sure it exists
198 gdbm_filename = filename_cat(collectdir,"index","text",collection);
199#ifdef _LITTLE_ENDIAN
200 gdbm_filename += ".ldb";
201#else
202 gdbm_filename += ".bdb";
203#endif
204 if (!file_exists(gdbm_filename)) {
205 logout << text_t2ascii
206 << "error: gdbm database \""
207 << gdbm_filename << "\" does not exist\n\n";
208 return false;
209 }
210
211 return true;
212}
213
214void queryfilterclass::filter (const FilterRequest_t &request,
215 FilterResponse_t &response,
216 comerror_t &err, ostream &logout) {
217 outconvertclass text_t2ascii;
218
219 response.clear ();
220 err = noError;
221 if (gdbmptr == NULL) {
222 // most likely a configuration problem
223 logout << text_t2ascii
224 << "configuration error: queryfilter contains a null gdbmclass\n\n";
225 err = configurationError;
226 return;
227 }
228 if (mgsearchptr == NULL) {
229 // most likely a configuration problem
230 logout << text_t2ascii
231 << "configuration error: queryfilter contains a null mgsearchclass\n\n";
232 err = configurationError;
233 return;
234 }
235
236 // open the database
237 gdbmptr->setlogout(&logout);
238 if (!gdbmptr->opendatabase (gdbm_filename)) {
239 // most likely a system problem (we have already checked that the
240 // gdbm database exists)
241 logout << text_t2ascii
242 << "system problem: open on gdbm database \""
243 << gdbm_filename << "\" failed\n\n";
244 err = systemProblem;
245 return;
246 }
247
248 // get the query parameters
249 int startresults = filterOptions["StartResults"].defaultValue.getint();
250 int endresults = filterOptions["EndResults"].defaultValue.getint();
251 text_t index = filterOptions["Index"].defaultValue;
252 text_t subcollection = filterOptions["Subcollection"].defaultValue;
253 text_t language = filterOptions["Language"].defaultValue;
254 queryparamclass queryparams;
255 queryparams.collection = collection;
256 queryparams.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
257 queryparams.casefolding = (filterOptions["Casefold"].defaultValue == "true");
258 queryparams.stemming = (filterOptions["Stem"].defaultValue == "true");
259
260 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
261 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
262 while (options_here != options_end) {
263 if ((*options_here).name == "StartResults") {
264 startresults = (*options_here).value.getint();
265 } else if ((*options_here).name == "EndResults") {
266 endresults = (*options_here).value.getint();
267 } else if ((*options_here).name == "QueryType") {
268 queryparams.search_type = ((*options_here).value == "ranked");
269 } else if ((*options_here).name == "Term") {
270 queryparams.querystring = (*options_here).value;
271 } else if ((*options_here).name == "Casefold") {
272 queryparams.casefolding = ((*options_here).value == "true");
273 } else if ((*options_here).name == "Stem") {
274 queryparams.stemming = ((*options_here).value == "true");
275 } else if ((*options_here).name == "Index") {
276 index = (*options_here).value;
277 } else if ((*options_here).name == "Subcollection") {
278 subcollection = (*options_here).value;
279 } else if ((*options_here).name == "Language") {
280 language = (*options_here).value;
281 } else {
282 logout << text_t2ascii
283 << "warning: unknown queryfilter option \""
284 << (*options_here).name
285 << "\" ignored.\n\n";
286 }
287
288 options_here++;
289 }
290
291 queryparams.search_index = index+subcollection+language;
292 queryparams.maxdocs = MAXDOCS;
293
294 // do query
295 queryresultsclass queryresults;
296 mgsearchptr->setcollectdir (collectdir);
297 if (!mgsearchptr->search(queryparams, queryresults)) {
298 // most likely a system problem
299 logout << text_t2ascii
300 << "system problem: could not do search with mg for index \""
301 << queryparams.search_index << "\".\n\n";
302 err = systemProblem;
303 return;
304 }
305
306 // assemble document results
307 if ((request.filterResultOptions & FROID) || (request.filterResultOptions & FRranking) ||
308 (request.filterResultOptions & FRmetadata)) {
309 int resultnum = 1;
310 ResultDocInfo_t resultdoc;
311 text_t trans_OID;
312 vector<docresultclass>::iterator docs_here = queryresults.docs.begin();
313 vector<docresultclass>::iterator docs_end = queryresults.docs.end();
314
315 while (docs_here != docs_end) {
316 if (resultnum > endresults) break;
317
318 // translate the document number
319 if (!translate(gdbmptr, (*docs_here).docnum, trans_OID)) {
320 logout << text_t2ascii
321 << "warning: could not translate mg document number \""
322 << (*docs_here).docnum << "\"to OID.\n\n";
323
324 } else {
325 // see if it is in the set (or the set is empty)
326 if (request.docSet.empty() || in_set(request.docSet, trans_OID)) {
327 if (resultnum >= startresults) {
328 // add this document
329 resultdoc.OID = trans_OID;
330 resultdoc.ranking = (int)((*docs_here).docweight * 10000.0 + 0.5);
331
332 // these next two are not available on all versions of mg
333 resultdoc.num_terms_matched = (*docs_here).num_query_terms_matched;
334 resultdoc.query_phrase_match = (*docs_here).query_phrase_match;
335
336 response.docInfo.push_back (resultdoc);
337 }
338
339 resultnum++;
340 }
341 }
342
343 docs_here++;
344 }
345 }
346
347 // assemble the term results
348 if ((request.filterResultOptions & FRtermFreq) || (request.filterResultOptions & FRmatchTerms)) {
349 queryresults.sortqueryterms();
350 queryresults.uniqqueryterms();
351
352 TermInfo_t terminfo;
353 bool terms_first = true;
354 vector<termfreqclass>::iterator terms_here = queryresults.terms.begin();
355 vector<termfreqclass>::iterator terms_end = queryresults.terms.end();
356
357 while (terms_here != terms_end) {
358 terminfo.clear();
359 terminfo.term = (*terms_here).termstr;
360 terminfo.freq = (*terms_here).termfreq;
361 if (terms_first) terminfo.matchTerms = queryresults.termvariants;
362 terms_first = false;
363
364 response.termInfo.push_back (terminfo);
365
366 terms_here++;
367 }
368 }
369
370 response.numDocs = queryresults.getnumdocs();
371 response.isApprox = (queryresults.getnumdocs() == queryparams.maxdocs);
372}
Note: See TracBrowser for help on using the repository browser.