source: trunk/gsdl/src/colservr/queryfilter.cpp@ 9210

Last change on this file since 9210 was 8024, checked in by davidb, 20 years ago

Renaming of variables and member functions using contstructs such as
'mgsearchptr' and the like to 'textsearchptr' to better reflect what
the item represents.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.6 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp -- base class for queryfilters
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryfilter.h"
27#include "fileutil.h"
28#include "gsdltools.h"
29#include <assert.h>
30
31
32// translate will return true if successful
33bool queryfilterclass::translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
34 infodbclass info;
35
36 trans_OID.clear();
37
38 // get the info
39 if (gdbmptr == NULL) return false;
40 if (!gdbmptr->getinfo(docnum, info)) return false;
41
42 // translate
43 if (info["section"].empty()) return false;
44
45 trans_OID = info["section"];
46 return true;
47}
48
49
50// whether document results are needed
51bool queryfilterclass::need_matching_docs (int filterResultOptions) {
52 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
53 (filterResultOptions & FRmetadata));
54}
55
56// whether term information is needed
57bool queryfilterclass::need_term_info (int filterResultOptions) {
58 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
59}
60
61/////////////////////////////////
62// functions for queryfilterclass
63/////////////////////////////////
64
65// get the query parameters
66void queryfilterclass::parse_query_params (const FilterRequest_t &request,
67 vector<queryparamclass> &query_params,
68 int &startresults, int &endresults,
69 text_t &phrasematch, ostream &logout) {
70 outconvertclass text_t2ascii;
71
72 // set defaults for the return parameters
73 query_params.erase(query_params.begin(), query_params.end());
74 startresults = filterOptions["StartResults"].defaultValue.getint();
75 endresults = filterOptions["EndResults"].defaultValue.getint();
76 phrasematch = filterOptions["PhraseMatch"].defaultValue;
77
78 // set defaults for query parameters
79 queryparamclass query;
80 query.combinequery = "or"; // first one must be "or"
81 query.collection = collection;
82 query.index = filterOptions["Index"].defaultValue;
83 query.subcollection = filterOptions["Subcollection"].defaultValue;
84 query.language = filterOptions["Language"].defaultValue;
85 query.querystring.clear();
86 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
87 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
88 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
89 query.stemming = (filterOptions["Stem"].defaultValue == "true");
90 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
91 query.level = filterOptions["Level"].defaultValue;
92 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
93 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
94 while (options_here != options_end) {
95 if ((*options_here).name == "CombineQuery") {
96 // add this query
97
98 // "all", needed when combining queries where the document results are needed
99 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
100 query_params.push_back (query);
101
102 // start on next query
103 query.clear();
104 query.combinequery = (*options_here).value;
105
106 // set defaults for query parameters
107 query.collection = collection;
108 query.index = filterOptions["Index"].defaultValue;
109 query.subcollection = filterOptions["Subcollection"].defaultValue;
110 query.language = filterOptions["Language"].defaultValue;
111 query.querystring.clear();
112 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
113 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
114 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
115 query.stemming = (filterOptions["Stem"].defaultValue == "true");
116 query.level = filterOptions["Level"].defaultValue;
117 // "all", needed when combining queries where the document results are needed
118 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
119 else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
120
121 } else if ((*options_here).name == "StartResults") {
122 startresults = (*options_here).value.getint();
123 } else if ((*options_here).name == "EndResults") {
124 endresults = (*options_here).value.getint();
125 } else if ((*options_here).name == "QueryType") {
126 query.search_type = ((*options_here).value == "ranked");
127 } else if ((*options_here).name == "MatchMode") {
128 query.match_mode = ((*options_here).value == "all");
129 if (query.match_mode == 1) query.maxdocs = -1;
130 } else if ((*options_here).name == "Term") {
131 query.querystring = (*options_here).value;
132 } else if ((*options_here).name == "Casefold") {
133 query.casefolding = ((*options_here).value == "true");
134 } else if ((*options_here).name == "Stem") {
135 query.stemming = ((*options_here).value == "true");
136 } else if ((*options_here).name == "Index"&& (*options_here).value !="") {
137 query.index = (*options_here).value;
138 } else if ((*options_here).name == "Subcollection") {
139 query.subcollection = (*options_here).value;
140 } else if ((*options_here).name == "Language") {
141 query.language = (*options_here).value;
142 } else if ((*options_here).name == "Maxdocs") {
143 query.maxdocs = (*options_here).value.getint();
144 } else if ((*options_here).name == "PhraseMatch") {
145 phrasematch = (*options_here).value;
146 } else if ((*options_here).name == "Level") {
147 query.level = (*options_here).value;
148 } else {
149 logout << text_t2ascii
150 << "warning: unknown queryfilter option \""
151 << (*options_here).name
152 << "\" ignored.\n\n";
153 }
154
155 options_here++;
156 }
157
158 // add the last query
159 query_params.push_back (query);
160}
161
162
163
164
165queryfilterclass::queryfilterclass () {
166 gdbmptr = NULL;
167 textsearchptr = NULL;
168
169 FilterOption_t filtopt;
170 filtopt.name = "CombineQuery";
171 filtopt.type = FilterOption_t::enumeratedt;
172 filtopt.repeatable = FilterOption_t::onePerQuery;
173 filtopt.defaultValue = "and";
174 filtopt.validValues.push_back("and");
175 filtopt.validValues.push_back("or");
176 filtopt.validValues.push_back("not");
177 filterOptions["CombineQuery"] = filtopt;
178
179 // -- onePerQuery StartResults integer
180 filtopt.clear();
181 filtopt.name = "StartResults";
182 filtopt.type = FilterOption_t::integert;
183 filtopt.repeatable = FilterOption_t::onePerQuery;
184 filtopt.defaultValue = "1";
185 filtopt.validValues.push_back("1");
186 filtopt.validValues.push_back("1000");
187 filterOptions["StartResults"] = filtopt;
188
189 // -- onePerQuery EndResults integer
190 filtopt.clear();
191 filtopt.name = "EndResults";
192 filtopt.type = FilterOption_t::integert;
193 filtopt.repeatable = FilterOption_t::onePerQuery;
194 filtopt.defaultValue = "10";
195 filtopt.validValues.push_back("-1");
196 filtopt.validValues.push_back("1000");
197 filterOptions["EndResults"] = filtopt;
198
199 // -- onePerQuery QueryType enumerated (boolean, ranked)
200 filtopt.clear();
201 filtopt.name = "QueryType";
202 filtopt.type = FilterOption_t::enumeratedt;
203 filtopt.repeatable = FilterOption_t::onePerQuery;
204 filtopt.defaultValue = "ranked";
205 filtopt.validValues.push_back("boolean");
206 filtopt.validValues.push_back("ranked");
207 filterOptions["QueryType"] = filtopt;
208
209 // -- onePerQuery MatchMode enumerated (some, all)
210 filtopt.clear();
211 filtopt.name = "MatchMode";
212 filtopt.type = FilterOption_t::enumeratedt;
213 filtopt.repeatable = FilterOption_t::onePerQuery;
214 filtopt.defaultValue = "some";
215 filtopt.validValues.push_back("some");
216 filtopt.validValues.push_back("all");
217 filterOptions["MatchMode"] = filtopt;
218
219 // -- onePerTerm Term string ???
220 filtopt.clear();
221 filtopt.name = "Term";
222 filtopt.type = FilterOption_t::stringt;
223 filtopt.repeatable = FilterOption_t::onePerTerm;
224 filtopt.defaultValue = "";
225 filterOptions["Term"] = filtopt;
226
227 // -- onePerTerm Casefold boolean
228 filtopt.clear();
229 filtopt.name = "Casefold";
230 filtopt.type = FilterOption_t::booleant;
231 filtopt.repeatable = FilterOption_t::onePerTerm;
232 filtopt.defaultValue = "true";
233 filtopt.validValues.push_back("false");
234 filtopt.validValues.push_back("true");
235 filterOptions["Casefold"] = filtopt;
236
237 // -- onePerTerm Stem boolean
238 filtopt.clear();
239 filtopt.name = "Stem";
240 filtopt.type = FilterOption_t::booleant;
241 filtopt.repeatable = FilterOption_t::onePerTerm;
242 filtopt.defaultValue = "false";
243 filtopt.validValues.push_back("false");
244 filtopt.validValues.push_back("true");
245 filterOptions["Stem"] = filtopt;
246
247 // -- onePerTerm Index enumerated
248 filtopt.clear();
249 filtopt.name = "Index";
250 filtopt.type = FilterOption_t::enumeratedt;
251 filtopt.repeatable = FilterOption_t::onePerTerm;
252 filtopt.defaultValue = "";
253 filterOptions["Index"] = filtopt;
254
255 // -- onePerTerm Subcollection enumerated
256 filtopt.clear();
257 filtopt.name = "Subcollection";
258 filtopt.type = FilterOption_t::enumeratedt;
259 filtopt.repeatable = FilterOption_t::onePerTerm;
260 filtopt.defaultValue = "";
261 filterOptions["Subcollection"] = filtopt;
262
263 // -- onePerTerm Language enumerated
264 filtopt.clear();
265 filtopt.name = "Language";
266 filtopt.type = FilterOption_t::enumeratedt;
267 filtopt.repeatable = FilterOption_t::onePerTerm;
268 filtopt.defaultValue = "";
269 filterOptions["Language"] = filtopt;
270
271 // -- onePerQuery Maxdocs integer
272 filtopt.clear();
273 filtopt.name = "Maxdocs";
274 filtopt.type = FilterOption_t::integert;
275 filtopt.repeatable = FilterOption_t::onePerQuery;
276 filtopt.defaultValue = "200";
277 filtopt.validValues.push_back("-1");
278 filtopt.validValues.push_back("1000");
279 filterOptions["Maxdocs"] = filtopt;
280
281 // -- onePerQuery PhraseMatch enumerated
282 filtopt.clear();
283 filtopt.name = "PhraseMatch";
284 filtopt.type = FilterOption_t::enumeratedt;
285 filtopt.repeatable = FilterOption_t::onePerQuery;
286 filtopt.defaultValue = "some_phrases";
287 filtopt.validValues.push_back ("all_phrases");
288 filtopt.validValues.push_back ("some_phrases");
289 filtopt.validValues.push_back ("all_docs");
290 filterOptions["PhraseMatch"] = filtopt;
291}
292
293queryfilterclass::~queryfilterclass () {
294 // don't delete gdbmptr or mgsearchptr here, they'll
295 // be cleaned up by mggdbmsource
296}
297
298void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
299 filterclass::configure (key, cfgline);
300
301 if (key == "indexmap") {
302 indexmap.importmap (cfgline);
303
304 // update the list of indexes in the filter information
305 text_tarray options;
306 indexmap.gettoarray (options);
307 filterOptions["Index"].validValues = options;
308
309 } else if (key == "defaultindex") {
310 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
311
312 } else if (key == "subcollectionmap") {
313 subcollectionmap.importmap (cfgline);
314
315 // update the list of subcollections in the filter information
316 text_tarray options;
317 subcollectionmap.gettoarray (options);
318 filterOptions["Subcollection"].validValues = options;
319
320 } else if (key == "defaultsubcollection") {
321 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
322
323 } else if (key == "languagemap") {
324 languagemap.importmap (cfgline);
325
326 // update the list of languages in the filter information
327 text_tarray options;
328 languagemap.gettoarray (options);
329 filterOptions["Language"].validValues = options;
330
331 } else if (key == "defaultlanguage") {
332 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
333 }
334}
335
336bool queryfilterclass::init (ostream &logout) {
337 outconvertclass text_t2ascii;
338
339 if (!filterclass::init(logout)) return false;
340
341 if (filterOptions["Index"].defaultValue.empty()) {
342 // use first index in map as default if no default is set explicitly
343 text_tarray fromarray;
344 indexmap.getfromarray(fromarray);
345 if (fromarray.size()) {
346 filterOptions["Index"].defaultValue = fromarray[0];
347 }
348 }
349
350 if (filterOptions["Subcollection"].defaultValue.empty()) {
351 // use first subcollection in map as default if no default is set explicitly
352 text_tarray fromarray;
353 subcollectionmap.getfromarray(fromarray);
354 if (fromarray.size()) {
355 filterOptions["Subcollection"].defaultValue = fromarray[0];
356 }
357 }
358
359 if (filterOptions["Language"].defaultValue.empty()) {
360 // use first language in map as default if no default is set explicitly
361 text_tarray fromarray;
362 languagemap.getfromarray(fromarray);
363 if (fromarray.size()) {
364 filterOptions["Language"].defaultValue = fromarray[0];
365 }
366 }
367
368 // get the filename for the database and make sure it exists
369 gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", collection);
370
371 if (littleEndian()) gdbm_filename += ".ldb";
372 else gdbm_filename += ".bdb";
373
374 if (!file_exists(gdbm_filename)) {
375 logout << text_t2ascii
376 << "warning: gdbm database \"" //****
377 << gdbm_filename << "\" does not exist\n\n";
378 //return false; //****
379 }
380
381 return true;
382}
383
Note: See TracBrowser for help on using the repository browser.