source: trunk/gsdl/src/colservr/queryfilter.cpp@ 235

Last change on this file since 235 was 235, checked in by rjmcnab, 25 years ago

Finished the query filter.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.5 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryfilter.cpp 235 1999-04-12 03:45:05Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.2 1999/04/12 03:45:03 rjmcnab
15 Finished the query filter.
16
17 Revision 1.1 1999/04/06 22:22:09 rjmcnab
18 Initial revision.
19
20 */
21
22
23#include "queryfilter.h"
24#include "fileutil.h"
25#include "queryinfo.h"
26
27
28// some useful functions
29
30// translate will return true if successful
31static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
32 infodbclass info;
33
34 trans_OID.clear();
35
36 // get the info
37 if (gdbmptr == NULL) return false;
38 if (!gdbmptr->getinfo(docnum, info)) return false;
39
40 // translate
41 if (info["section"].empty()) return false;
42
43 trans_OID = info["section"];
44 return true;
45}
46
47
48
49queryfilterclass::queryfilterclass () {
50 gdbmptr = NULL;
51 mgsearchptr = NULL;
52
53 // -- onePerQuery StartResults integer
54 FilterOption_t filtopt;
55 filtopt.name = "StartResults";
56 filtopt.type = FilterOption_t::integert;
57 filtopt.repeatable = FilterOption_t::onePerQuery;
58 filtopt.defaultValue = "1";
59 filtopt.validValues.push_back("1");
60 filtopt.validValues.push_back("1000");
61 filterOptions["StartResults"] = filtopt;
62
63 // -- onePerQuery EndResults integer
64 filtopt.clear();
65 filtopt.name = "EndResults";
66 filtopt.type = FilterOption_t::integert;
67 filtopt.repeatable = FilterOption_t::onePerQuery;
68 filtopt.defaultValue = "10";
69 filtopt.validValues.push_back("1");
70 filtopt.validValues.push_back("1000");
71 filterOptions["EndResults"] = filtopt;
72
73 // -- onePerQuery QueryType enumerated (boolean, ranked)
74 filtopt.clear();
75 filtopt.name = "QueryType";
76 filtopt.type = FilterOption_t::enumeratedt;
77 filtopt.repeatable = FilterOption_t::onePerQuery;
78 filtopt.defaultValue = "ranked";
79 filtopt.validValues.push_back("boolean");
80 filtopt.validValues.push_back("ranked");
81 filterOptions["QueryType"] = filtopt;
82
83 // -- onePerTerm Term string ???
84 filtopt.clear();
85 filtopt.name = "Term";
86 filtopt.type = FilterOption_t::stringt;
87 filtopt.repeatable = FilterOption_t::onePerTerm;
88 filtopt.defaultValue = "";
89 filterOptions["Term"] = filtopt;
90
91 // -- onePerTerm Casefold boolean
92 filtopt.clear();
93 filtopt.name = "Casefold";
94 filtopt.type = FilterOption_t::booleant;
95 filtopt.repeatable = FilterOption_t::onePerTerm;
96 filtopt.defaultValue = "true";
97 filtopt.validValues.push_back("false");
98 filtopt.validValues.push_back("true");
99 filterOptions["Casefold"] = filtopt;
100
101 // -- onePerTerm Stem boolean
102 filtopt.clear();
103 filtopt.name = "Stem";
104 filtopt.type = FilterOption_t::booleant;
105 filtopt.repeatable = FilterOption_t::onePerTerm;
106 filtopt.defaultValue = "false";
107 filtopt.validValues.push_back("false");
108 filtopt.validValues.push_back("true");
109 filterOptions["Stem"] = filtopt;
110
111 // -- onePerTerm Index enumerated
112 filtopt.clear();
113 filtopt.name = "Index";
114 filtopt.type = FilterOption_t::enumeratedt;
115 filtopt.repeatable = FilterOption_t::onePerTerm;
116 filtopt.defaultValue = "";
117 filterOptions["Index"] = filtopt;
118
119 // -- onePerTerm Subcollection enumerated
120 filtopt.clear();
121 filtopt.name = "Subcollection";
122 filtopt.type = FilterOption_t::enumeratedt;
123 filtopt.repeatable = FilterOption_t::onePerTerm;
124 filtopt.defaultValue = "";
125 filterOptions["Subcollection"] = filtopt;
126
127 // -- onePerTerm Language enumerated
128 filtopt.clear();
129 filtopt.name = "Language";
130 filtopt.type = FilterOption_t::enumeratedt;
131 filtopt.repeatable = FilterOption_t::onePerTerm;
132 filtopt.defaultValue = "";
133 filterOptions["Language"] = filtopt;
134}
135
136queryfilterclass::~queryfilterclass () {
137}
138
139void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
140 filterclass::configure (key, cfgline);
141
142 if (key == "indexmap") {
143 indexmap.importmap (cfgline);
144
145 // update the list of indexes in the filter information
146 text_tarray options;
147 indexmap.gettoarray (options);
148 filterOptions["Index"].validValues = options;
149
150 } else if (key == "subcollectionmap") {
151 subcollectionmap.importmap (cfgline);
152
153 // update the list of subcollections in the filter information
154 text_tarray options;
155 subcollectionmap.gettoarray (options);
156 filterOptions["Subcollection"].validValues = options;
157
158 } else if (key == "languagemap") {
159 languagemap.importmap (cfgline);
160
161 // update the list of languages in the filter information
162 text_tarray options;
163 languagemap.gettoarray (options);
164 filterOptions["Language"].validValues = options;
165 }
166}
167
168bool queryfilterclass::init (ostream &logout) {
169 outconvertclass text_t2ascii;
170
171 if (!filterclass::init(logout)) return false;
172
173 // get the filename for the database and make sure it exists
174 gdbm_filename = filename_cat(collectdir,"index","text",collection);
175#ifdef _LITTLE_ENDIAN
176 gdbm_filename += ".ldb";
177#else
178 gdbm_filename += ".bdb";
179#endif
180 if (!file_exists(gdbm_filename)) {
181 logout << text_t2ascii
182 << "error: gdbm database \""
183 << gdbm_filename << "\" does not exist\n\n";
184 return false;
185 }
186
187 return true;
188}
189
190void queryfilterclass::filter (const FilterRequest_t &request,
191 FilterResponse_t &response,
192 comerror_t &err, ostream &logout) {
193 outconvertclass text_t2ascii;
194
195 response.clear ();
196 err = noError;
197 if (gdbmptr == NULL) {
198 // most likely a configuration problem
199 logout << text_t2ascii
200 << "configuration error: queryfilter contains a null gdbmclass\n\n";
201 err = configurationError;
202 return;
203 }
204 if (mgsearchptr == NULL) {
205 // most likely a configuration problem
206 logout << text_t2ascii
207 << "configuration error: queryfilter contains a null mgsearchclass\n\n";
208 err = configurationError;
209 return;
210 }
211
212 // open the database
213 gdbmptr->setlogout(&logout);
214 if (!gdbmptr->opendatabase (gdbm_filename)) {
215 // most likely a system problem (we have already checked that the
216 // gdbm database exists)
217 logout << text_t2ascii
218 << "system problem: open on gdbm database \""
219 << gdbm_filename << "\" failed\n\n";
220 err = systemProblem;
221 return;
222 }
223
224 // get the query parameters
225 int startresults = filterOptions["StartResults"].defaultValue.getint();
226 int endresults = filterOptions["EndResults"].defaultValue.getint();
227 text_t index = filterOptions["Index"].defaultValue;
228 text_t subcollection = filterOptions["Subcollection"].defaultValue;
229 text_t language = filterOptions["Language"].defaultValue;
230 queryparamclass queryparams;
231 queryparams.collection = collection;
232 queryparams.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
233 queryparams.casefolding = (filterOptions["Casefold"].defaultValue == "true");
234 queryparams.stemming = (filterOptions["Stem"].defaultValue == "true");
235
236 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
237 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
238 while (options_here != options_end) {
239 if ((*options_here).name == "StartResults") {
240 startresults = (*options_here).value.getint();
241 } else if ((*options_here).name == "EndResults") {
242 endresults = (*options_here).value.getint();
243 } else if ((*options_here).name == "QueryType") {
244 queryparams.search_type = ((*options_here).value == "ranked");
245 } else if ((*options_here).name == "Term") {
246 queryparams.querystring = (*options_here).value;
247 } else if ((*options_here).name == "Casefold") {
248 queryparams.casefolding = ((*options_here).value == "true");
249 } else if ((*options_here).name == "Stem") {
250 queryparams.stemming = ((*options_here).value == "true");
251 } else if ((*options_here).name == "Index") {
252 index = (*options_here).value;
253 } else if ((*options_here).name == "Subcollection") {
254 subcollection = (*options_here).value;
255 } else if ((*options_here).name == "Language") {
256 language = (*options_here).value;
257 } else {
258 logout << text_t2ascii
259 << "warning: unknown queryfilter option \""
260 << (*options_here).name
261 << "\" ignored.\n\n";
262 }
263
264 options_here++;
265 }
266
267 queryparams.search_index = index+subcollection+language;
268 queryparams.maxdocs = (endresults > 100) ? endresults : 100;
269
270 // do query
271 queryresultsclass queryresults;
272 mgsearchptr->setcollectdir (collectdir);
273 if (!mgsearchptr->search(queryparams, queryresults)) {
274 // most likely a system problem
275 logout << text_t2ascii
276 << "system problem: could not do search with mg for index \""
277 << queryparams.search_index << "\".\n\n";
278 err = systemProblem;
279 return;
280 }
281
282 // assemble document results
283 if ((request.filterResultOptions & FROID) || (request.filterResultOptions & FRranking) ||
284 (request.filterResultOptions & FRmetadata)) {
285 int resultnum = 1;
286 ResultDocInfo_t resultdoc;
287 text_t trans_OID;
288 vector<docresultclass>::iterator docs_here = queryresults.docs.begin();
289 vector<docresultclass>::iterator docs_end = queryresults.docs.end();
290
291 while (docs_here != docs_end) {
292 if (resultnum > endresults) break;
293
294 // translate the document number
295 if (!translate(gdbmptr, (*docs_here).docnum, trans_OID)) {
296 logout << text_t2ascii
297 << "warning: could not translate mg document number \""
298 << (*docs_here).docnum << "\"to OID.\n\n";
299
300 } else {
301 // see if it is in the set (or the set is empty)
302 if (request.docSet.empty() || in_set(request.docSet, trans_OID)) {
303 if (resultnum >= startresults) {
304 // add this document
305 resultdoc.OID = trans_OID;
306 resultdoc.ranking = (int)((*docs_here).docweight * 10000.0 + 0.5);
307 response.docInfo.push_back (resultdoc);
308 }
309
310 resultnum++;
311 }
312 }
313
314 docs_here++;
315 }
316 }
317
318 // assemble the term results
319 if ((request.filterResultOptions & FRtermFreq) || (request.filterResultOptions & FRmatchTerms)) {
320 queryresults.sortqueryterms();
321 queryresults.uniqqueryterms();
322
323 TermInfo_t terminfo;
324 bool terms_first = true;
325 vector<termfreqclass>::iterator terms_here = queryresults.terms.begin();
326 vector<termfreqclass>::iterator terms_end = queryresults.terms.end();
327
328 while (terms_here != terms_end) {
329 terminfo.clear();
330 terminfo.term = (*terms_here).termstr;
331 terminfo.freq = (*terms_here).termfreq;
332 if (terms_first) terminfo.matchTerms = queryresults.termvariants;
333 terms_first = false;
334
335 response.termInfo.push_back (terminfo);
336
337 terms_here++;
338 }
339 }
340
341 response.numDocs = response.docInfo.size();
342 response.isApprox = (queryresults.getnumdocs() != queryparams.maxdocs);
343}
Note: See TracBrowser for help on using the repository browser.