source: trunk/gsdl/src/colservr/queryfilter.cpp@ 238

Last change on this file since 238 was 238, checked in by rjmcnab, 25 years ago

Finished the gdbm metadata stuff

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.6 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryfilter.cpp 238 1999-04-19 23:56:10Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.3 1999/04/19 23:56:09 rjmcnab
15 Finished the gdbm metadata stuff
16
17 Revision 1.2 1999/04/12 03:45:03 rjmcnab
18 Finished the query filter.
19
20 Revision 1.1 1999/04/06 22:22:09 rjmcnab
21 Initial revision.
22
23 */
24
25
26#include "queryfilter.h"
27#include "fileutil.h"
28#include "queryinfo.h"
29
30
31// some useful functions
32
33// translate will return true if successful
34static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
35 infodbclass info;
36
37 trans_OID.clear();
38
39 // get the info
40 if (gdbmptr == NULL) return false;
41 if (!gdbmptr->getinfo(docnum, info)) return false;
42
43 // translate
44 if (info["section"].empty()) return false;
45
46 trans_OID = info["section"];
47 return true;
48}
49
50
51
52queryfilterclass::queryfilterclass () {
53 gdbmptr = NULL;
54 mgsearchptr = NULL;
55
56 // -- onePerQuery StartResults integer
57 FilterOption_t filtopt;
58 filtopt.name = "StartResults";
59 filtopt.type = FilterOption_t::integert;
60 filtopt.repeatable = FilterOption_t::onePerQuery;
61 filtopt.defaultValue = "1";
62 filtopt.validValues.push_back("1");
63 filtopt.validValues.push_back("1000");
64 filterOptions["StartResults"] = filtopt;
65
66 // -- onePerQuery EndResults integer
67 filtopt.clear();
68 filtopt.name = "EndResults";
69 filtopt.type = FilterOption_t::integert;
70 filtopt.repeatable = FilterOption_t::onePerQuery;
71 filtopt.defaultValue = "10";
72 filtopt.validValues.push_back("1");
73 filtopt.validValues.push_back("1000");
74 filterOptions["EndResults"] = filtopt;
75
76 // -- onePerQuery QueryType enumerated (boolean, ranked)
77 filtopt.clear();
78 filtopt.name = "QueryType";
79 filtopt.type = FilterOption_t::enumeratedt;
80 filtopt.repeatable = FilterOption_t::onePerQuery;
81 filtopt.defaultValue = "ranked";
82 filtopt.validValues.push_back("boolean");
83 filtopt.validValues.push_back("ranked");
84 filterOptions["QueryType"] = filtopt;
85
86 // -- onePerTerm Term string ???
87 filtopt.clear();
88 filtopt.name = "Term";
89 filtopt.type = FilterOption_t::stringt;
90 filtopt.repeatable = FilterOption_t::onePerTerm;
91 filtopt.defaultValue = "";
92 filterOptions["Term"] = filtopt;
93
94 // -- onePerTerm Casefold boolean
95 filtopt.clear();
96 filtopt.name = "Casefold";
97 filtopt.type = FilterOption_t::booleant;
98 filtopt.repeatable = FilterOption_t::onePerTerm;
99 filtopt.defaultValue = "true";
100 filtopt.validValues.push_back("false");
101 filtopt.validValues.push_back("true");
102 filterOptions["Casefold"] = filtopt;
103
104 // -- onePerTerm Stem boolean
105 filtopt.clear();
106 filtopt.name = "Stem";
107 filtopt.type = FilterOption_t::booleant;
108 filtopt.repeatable = FilterOption_t::onePerTerm;
109 filtopt.defaultValue = "false";
110 filtopt.validValues.push_back("false");
111 filtopt.validValues.push_back("true");
112 filterOptions["Stem"] = filtopt;
113
114 // -- onePerTerm Index enumerated
115 filtopt.clear();
116 filtopt.name = "Index";
117 filtopt.type = FilterOption_t::enumeratedt;
118 filtopt.repeatable = FilterOption_t::onePerTerm;
119 filtopt.defaultValue = "";
120 filterOptions["Index"] = filtopt;
121
122 // -- onePerTerm Subcollection enumerated
123 filtopt.clear();
124 filtopt.name = "Subcollection";
125 filtopt.type = FilterOption_t::enumeratedt;
126 filtopt.repeatable = FilterOption_t::onePerTerm;
127 filtopt.defaultValue = "";
128 filterOptions["Subcollection"] = filtopt;
129
130 // -- onePerTerm Language enumerated
131 filtopt.clear();
132 filtopt.name = "Language";
133 filtopt.type = FilterOption_t::enumeratedt;
134 filtopt.repeatable = FilterOption_t::onePerTerm;
135 filtopt.defaultValue = "";
136 filterOptions["Language"] = filtopt;
137}
138
139queryfilterclass::~queryfilterclass () {
140}
141
142void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
143 filterclass::configure (key, cfgline);
144
145 if (key == "indexmap") {
146 indexmap.importmap (cfgline);
147
148 // update the list of indexes in the filter information
149 text_tarray options;
150 indexmap.gettoarray (options);
151 filterOptions["Index"].validValues = options;
152
153 } else if (key == "subcollectionmap") {
154 subcollectionmap.importmap (cfgline);
155
156 // update the list of subcollections in the filter information
157 text_tarray options;
158 subcollectionmap.gettoarray (options);
159 filterOptions["Subcollection"].validValues = options;
160
161 } else if (key == "languagemap") {
162 languagemap.importmap (cfgline);
163
164 // update the list of languages in the filter information
165 text_tarray options;
166 languagemap.gettoarray (options);
167 filterOptions["Language"].validValues = options;
168 }
169}
170
171bool queryfilterclass::init (ostream &logout) {
172 outconvertclass text_t2ascii;
173
174 if (!filterclass::init(logout)) return false;
175
176 // get the filename for the database and make sure it exists
177 gdbm_filename = filename_cat(collectdir,"index","text",collection);
178#ifdef _LITTLE_ENDIAN
179 gdbm_filename += ".ldb";
180#else
181 gdbm_filename += ".bdb";
182#endif
183 if (!file_exists(gdbm_filename)) {
184 logout << text_t2ascii
185 << "error: gdbm database \""
186 << gdbm_filename << "\" does not exist\n\n";
187 return false;
188 }
189
190 return true;
191}
192
193void queryfilterclass::filter (const FilterRequest_t &request,
194 FilterResponse_t &response,
195 comerror_t &err, ostream &logout) {
196 outconvertclass text_t2ascii;
197
198 response.clear ();
199 err = noError;
200 if (gdbmptr == NULL) {
201 // most likely a configuration problem
202 logout << text_t2ascii
203 << "configuration error: queryfilter contains a null gdbmclass\n\n";
204 err = configurationError;
205 return;
206 }
207 if (mgsearchptr == NULL) {
208 // most likely a configuration problem
209 logout << text_t2ascii
210 << "configuration error: queryfilter contains a null mgsearchclass\n\n";
211 err = configurationError;
212 return;
213 }
214
215 // open the database
216 gdbmptr->setlogout(&logout);
217 if (!gdbmptr->opendatabase (gdbm_filename)) {
218 // most likely a system problem (we have already checked that the
219 // gdbm database exists)
220 logout << text_t2ascii
221 << "system problem: open on gdbm database \""
222 << gdbm_filename << "\" failed\n\n";
223 err = systemProblem;
224 return;
225 }
226
227 // get the query parameters
228 int startresults = filterOptions["StartResults"].defaultValue.getint();
229 int endresults = filterOptions["EndResults"].defaultValue.getint();
230 text_t index = filterOptions["Index"].defaultValue;
231 text_t subcollection = filterOptions["Subcollection"].defaultValue;
232 text_t language = filterOptions["Language"].defaultValue;
233 queryparamclass queryparams;
234 queryparams.collection = collection;
235 queryparams.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
236 queryparams.casefolding = (filterOptions["Casefold"].defaultValue == "true");
237 queryparams.stemming = (filterOptions["Stem"].defaultValue == "true");
238
239 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
240 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
241 while (options_here != options_end) {
242 if ((*options_here).name == "StartResults") {
243 startresults = (*options_here).value.getint();
244 } else if ((*options_here).name == "EndResults") {
245 endresults = (*options_here).value.getint();
246 } else if ((*options_here).name == "QueryType") {
247 queryparams.search_type = ((*options_here).value == "ranked");
248 } else if ((*options_here).name == "Term") {
249 queryparams.querystring = (*options_here).value;
250 } else if ((*options_here).name == "Casefold") {
251 queryparams.casefolding = ((*options_here).value == "true");
252 } else if ((*options_here).name == "Stem") {
253 queryparams.stemming = ((*options_here).value == "true");
254 } else if ((*options_here).name == "Index") {
255 index = (*options_here).value;
256 } else if ((*options_here).name == "Subcollection") {
257 subcollection = (*options_here).value;
258 } else if ((*options_here).name == "Language") {
259 language = (*options_here).value;
260 } else {
261 logout << text_t2ascii
262 << "warning: unknown queryfilter option \""
263 << (*options_here).name
264 << "\" ignored.\n\n";
265 }
266
267 options_here++;
268 }
269
270 queryparams.search_index = index+subcollection+language;
271 queryparams.maxdocs = (endresults > 100) ? endresults : 100;
272
273 // do query
274 queryresultsclass queryresults;
275 mgsearchptr->setcollectdir (collectdir);
276 if (!mgsearchptr->search(queryparams, queryresults)) {
277 // most likely a system problem
278 logout << text_t2ascii
279 << "system problem: could not do search with mg for index \""
280 << queryparams.search_index << "\".\n\n";
281 err = systemProblem;
282 return;
283 }
284
285 // assemble document results
286 if ((request.filterResultOptions & FROID) || (request.filterResultOptions & FRranking) ||
287 (request.filterResultOptions & FRmetadata)) {
288 int resultnum = 1;
289 ResultDocInfo_t resultdoc;
290 text_t trans_OID;
291 vector<docresultclass>::iterator docs_here = queryresults.docs.begin();
292 vector<docresultclass>::iterator docs_end = queryresults.docs.end();
293
294 while (docs_here != docs_end) {
295 if (resultnum > endresults) break;
296
297 // translate the document number
298 if (!translate(gdbmptr, (*docs_here).docnum, trans_OID)) {
299 logout << text_t2ascii
300 << "warning: could not translate mg document number \""
301 << (*docs_here).docnum << "\"to OID.\n\n";
302
303 } else {
304 // see if it is in the set (or the set is empty)
305 if (request.docSet.empty() || in_set(request.docSet, trans_OID)) {
306 if (resultnum >= startresults) {
307 // add this document
308 resultdoc.OID = trans_OID;
309 resultdoc.ranking = (int)((*docs_here).docweight * 10000.0 + 0.5);
310 response.docInfo.push_back (resultdoc);
311 }
312
313 resultnum++;
314 }
315 }
316
317 docs_here++;
318 }
319 }
320
321 // assemble the term results
322 if ((request.filterResultOptions & FRtermFreq) || (request.filterResultOptions & FRmatchTerms)) {
323 queryresults.sortqueryterms();
324 queryresults.uniqqueryterms();
325
326 TermInfo_t terminfo;
327 bool terms_first = true;
328 vector<termfreqclass>::iterator terms_here = queryresults.terms.begin();
329 vector<termfreqclass>::iterator terms_end = queryresults.terms.end();
330
331 while (terms_here != terms_end) {
332 terminfo.clear();
333 terminfo.term = (*terms_here).termstr;
334 terminfo.freq = (*terms_here).termfreq;
335 if (terms_first) terminfo.matchTerms = queryresults.termvariants;
336 terms_first = false;
337
338 response.termInfo.push_back (terminfo);
339
340 terms_here++;
341 }
342 }
343
344 response.numDocs = queryresults.getnumdocs();
345 response.isApprox = (queryresults.getnumdocs() != queryparams.maxdocs);
346}
Note: See TracBrowser for help on using the repository browser.