Context Navigation

source: trunk/gsdl/src/colservr/queryfilter.cpp@ 12770

Last change on this file since 12770 was 12770, checked in by mdewsnip, 18 years ago
Changed the Lucene "-fuzzy" argument to "-fuzziness <value>", for more accurate control.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 14.8 KB

Line
1	/**********************************************************************
2	*
3	* queryfilter.cpp -- base class for queryfilters
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "queryfilter.h"
27	#include "fileutil.h"
28	#include "gsdltools.h"
29	#include <assert.h>
30
31
32	// translate will return true if successful
33	bool queryfilterclass::translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
34	infodbclass info;
35
36	trans_OID.clear();
37
38	// get the info
39	if (gdbmptr == NULL) return false;
40	if (!gdbmptr->getinfo(docnum, info)) return false;
41
42	// translate
43	if (info["section"].empty()) return false;
44
45	trans_OID = info["section"];
46	return true;
47	}
48
49
50	// whether document results are needed
51	bool queryfilterclass::need_matching_docs (int filterResultOptions) {
52	return ((filterResultOptions & FROID) \|\| (filterResultOptions & FRranking) \|\|
53	(filterResultOptions & FRmetadata));
54	}
55
56	// whether term information is needed
57	bool queryfilterclass::need_term_info (int filterResultOptions) {
58	return ((filterResultOptions & FRtermFreq) \|\| (filterResultOptions & FRmatchTerms));
59	}
60
61	/////////////////////////////////
62	// functions for queryfilterclass
63	/////////////////////////////////
64
65	// get the query parameters
66	void queryfilterclass::parse_query_params (const FilterRequest_t &request,
67	vector<queryparamclass> &query_params,
68	int &startresults, int &endresults,
69	text_t &phrasematch, ostream &logout) {
70	outconvertclass text_t2ascii;
71
72	// set defaults for the return parameters
73	query_params.erase(query_params.begin(), query_params.end());
74	startresults = filterOptions["StartResults"].defaultValue.getint();
75	endresults = filterOptions["EndResults"].defaultValue.getint();
76	phrasematch = filterOptions["PhraseMatch"].defaultValue;
77
78	// set defaults for query parameters
79	queryparamclass query;
80	query.combinequery = "or"; // first one must be "or"
81	query.collection = collection;
82	query.index = filterOptions["Index"].defaultValue;
83	query.subcollection = filterOptions["Subcollection"].defaultValue;
84	query.language = filterOptions["Language"].defaultValue;
85	query.querystring.clear();
86	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
87	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
88	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
89	query.stemming = (filterOptions["Stem"].defaultValue == "true");
90	query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
91	query.level = filterOptions["Level"].defaultValue;
92	query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
93	query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
94	query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
95	query.maxnumeric = maxnumeric;
96	OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
97	OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
98	while (options_here != options_end) {
99	if ((*options_here).name == "CombineQuery") {
100	// add this query
101
102	// "all", needed when combining queries where the document results are needed
103	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
104	query_params.push_back (query);
105
106	// start on next query
107	query.clear();
108	query.combinequery = (*options_here).value;
109
110	// set defaults for query parameters
111	query.collection = collection;
112	query.index = filterOptions["Index"].defaultValue;
113	query.subcollection = filterOptions["Subcollection"].defaultValue;
114	query.language = filterOptions["Language"].defaultValue;
115	query.querystring.clear();
116	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
117	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
118	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
119	query.stemming = (filterOptions["Stem"].defaultValue == "true");
120	query.level = filterOptions["Level"].defaultValue;
121	query.filterstring = filterOptions["FilterString"].defaultValue; // Lucene specific
122	query.sortfield = filterOptions["SortField"].defaultValue; // Lucene specific
123	query.fuzziness = filterOptions["Fuzziness"].defaultValue; // Lucene specific
124	query.maxnumeric = maxnumeric;
125	// "all", needed when combining queries where the document results are needed
126	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
127	else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
128
129	} else if ((*options_here).name == "StartResults") {
130	startresults = (*options_here).value.getint();
131	} else if ((*options_here).name == "EndResults") {
132	endresults = (*options_here).value.getint();
133	} else if ((*options_here).name == "QueryType") {
134	query.search_type = ((*options_here).value == "ranked");
135	} else if ((*options_here).name == "MatchMode") {
136	query.match_mode = ((*options_here).value == "all");
137	if (query.match_mode == 1) query.maxdocs = -1;
138	} else if ((*options_here).name == "Term") {
139	query.querystring = (*options_here).value;
140	} else if ((*options_here).name == "Casefold") {
141	query.casefolding = ((*options_here).value == "true");
142	} else if ((*options_here).name == "Stem") {
143	query.stemming = ((*options_here).value == "true");
144	} else if ((options_here).name == "Index"&& (options_here).value !="") {
145	query.index = (*options_here).value;
146	} else if ((*options_here).name == "Subcollection") {
147	query.subcollection = (*options_here).value;
148	} else if ((*options_here).name == "Language") {
149	query.language = (*options_here).value;
150	} else if ((*options_here).name == "Maxdocs") {
151	query.maxdocs = (*options_here).value.getint();
152	} else if ((*options_here).name == "PhraseMatch") {
153	phrasematch = (*options_here).value;
154	} else if ((*options_here).name == "Level") {
155	query.level = (*options_here).value;
156	} else if ((*options_here).name == "FilterString") {
157	query.filterstring = (*options_here).value;
158	} else if ((*options_here).name == "SortField") {
159	query.sortfield = (*options_here).value;
160	} else if ((*options_here).name == "Fuzziness") {
161	query.fuzziness = (*options_here).value;
162	} else {
163	logout << text_t2ascii
164	<< "warning: unknown queryfilter option \""
165	<< (*options_here).name
166	<< "\" ignored.\n\n";
167	}
168
169	++options_here;
170	}
171
172	// Store the start and end results in the query too, as lucene now needs to
173	// pass them through to the Java
174	query.startresults = startresults;
175	query.endresults = endresults;
176
177	// add the last query
178	query_params.push_back (query);
179	}
180
181
182
183
184	queryfilterclass::queryfilterclass () {
185	gdbmptr = NULL;
186	textsearchptr = NULL;
187	maxnumeric = 4;
188
189	FilterOption_t filtopt;
190	filtopt.name = "CombineQuery";
191	filtopt.type = FilterOption_t::enumeratedt;
192	filtopt.repeatable = FilterOption_t::onePerQuery;
193	filtopt.defaultValue = "and";
194	filtopt.validValues.push_back("and");
195	filtopt.validValues.push_back("or");
196	filtopt.validValues.push_back("not");
197	filterOptions["CombineQuery"] = filtopt;
198
199	// -- onePerQuery StartResults integer
200	filtopt.clear();
201	filtopt.name = "StartResults";
202	filtopt.type = FilterOption_t::integert;
203	filtopt.repeatable = FilterOption_t::onePerQuery;
204	filtopt.defaultValue = "1";
205	filtopt.validValues.push_back("1");
206	filtopt.validValues.push_back("1000");
207	filterOptions["StartResults"] = filtopt;
208
209	// -- onePerQuery EndResults integer
210	filtopt.clear();
211	filtopt.name = "EndResults";
212	filtopt.type = FilterOption_t::integert;
213	filtopt.repeatable = FilterOption_t::onePerQuery;
214	filtopt.defaultValue = "10";
215	filtopt.validValues.push_back("-1");
216	filtopt.validValues.push_back("1000");
217	filterOptions["EndResults"] = filtopt;
218
219	// -- onePerQuery QueryType enumerated (boolean, ranked)
220	filtopt.clear();
221	filtopt.name = "QueryType";
222	filtopt.type = FilterOption_t::enumeratedt;
223	filtopt.repeatable = FilterOption_t::onePerQuery;
224	filtopt.defaultValue = "ranked";
225	filtopt.validValues.push_back("boolean");
226	filtopt.validValues.push_back("ranked");
227	filterOptions["QueryType"] = filtopt;
228
229	// -- onePerQuery MatchMode enumerated (some, all)
230	filtopt.clear();
231	filtopt.name = "MatchMode";
232	filtopt.type = FilterOption_t::enumeratedt;
233	filtopt.repeatable = FilterOption_t::onePerQuery;
234	filtopt.defaultValue = "some";
235	filtopt.validValues.push_back("some");
236	filtopt.validValues.push_back("all");
237	filterOptions["MatchMode"] = filtopt;
238
239	// -- onePerTerm Term string ???
240	filtopt.clear();
241	filtopt.name = "Term";
242	filtopt.type = FilterOption_t::stringt;
243	filtopt.repeatable = FilterOption_t::onePerTerm;
244	filtopt.defaultValue = "";
245	filterOptions["Term"] = filtopt;
246
247	// -- onePerTerm Casefold boolean
248	filtopt.clear();
249	filtopt.name = "Casefold";
250	filtopt.type = FilterOption_t::booleant;
251	filtopt.repeatable = FilterOption_t::onePerTerm;
252	filtopt.defaultValue = "true";
253	filtopt.validValues.push_back("false");
254	filtopt.validValues.push_back("true");
255	filterOptions["Casefold"] = filtopt;
256
257	// -- onePerTerm Stem boolean
258	filtopt.clear();
259	filtopt.name = "Stem";
260	filtopt.type = FilterOption_t::booleant;
261	filtopt.repeatable = FilterOption_t::onePerTerm;
262	filtopt.defaultValue = "false";
263	filtopt.validValues.push_back("false");
264	filtopt.validValues.push_back("true");
265	filterOptions["Stem"] = filtopt;
266
267	// -- onePerTerm Index enumerated
268	filtopt.clear();
269	filtopt.name = "Index";
270	filtopt.type = FilterOption_t::enumeratedt;
271	filtopt.repeatable = FilterOption_t::onePerTerm;
272	filtopt.defaultValue = "";
273	filterOptions["Index"] = filtopt;
274
275	// -- onePerTerm Subcollection enumerated
276	filtopt.clear();
277	filtopt.name = "Subcollection";
278	filtopt.type = FilterOption_t::enumeratedt;
279	filtopt.repeatable = FilterOption_t::onePerTerm;
280	filtopt.defaultValue = "";
281	filterOptions["Subcollection"] = filtopt;
282
283	// -- onePerTerm Language enumerated
284	filtopt.clear();
285	filtopt.name = "Language";
286	filtopt.type = FilterOption_t::enumeratedt;
287	filtopt.repeatable = FilterOption_t::onePerTerm;
288	filtopt.defaultValue = "";
289	filterOptions["Language"] = filtopt;
290
291	// -- onePerQuery Maxdocs integer
292	filtopt.clear();
293	filtopt.name = "Maxdocs";
294	filtopt.type = FilterOption_t::integert;
295	filtopt.repeatable = FilterOption_t::onePerQuery;
296	filtopt.defaultValue = "200";
297	filtopt.validValues.push_back("-1");
298	filtopt.validValues.push_back("1000");
299	filterOptions["Maxdocs"] = filtopt;
300
301	// -- onePerQuery PhraseMatch enumerated
302	filtopt.clear();
303	filtopt.name = "PhraseMatch";
304	filtopt.type = FilterOption_t::enumeratedt;
305	filtopt.repeatable = FilterOption_t::onePerQuery;
306	filtopt.defaultValue = "some_phrases";
307	filtopt.validValues.push_back ("all_phrases");
308	filtopt.validValues.push_back ("some_phrases");
309	filtopt.validValues.push_back ("all_docs");
310	filterOptions["PhraseMatch"] = filtopt;
311	}
312
313	queryfilterclass::~queryfilterclass () {
314	// don't delete gdbmptr or mgsearchptr here, they'll
315	// be cleaned up by mggdbmsource
316	}
317
318	void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
319	filterclass::configure (key, cfgline);
320
321	if (key == "indexmap") {
322	indexmap.importmap (cfgline);
323
324	// update the list of indexes in the filter information
325	text_tarray options;
326	indexmap.gettoarray (options);
327	filterOptions["Index"].validValues = options;
328
329	} else if (key == "defaultindex") {
330	indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
331
332	} else if (key == "subcollectionmap") {
333	subcollectionmap.importmap (cfgline);
334
335	// update the list of subcollections in the filter information
336	text_tarray options;
337	subcollectionmap.gettoarray (options);
338	filterOptions["Subcollection"].validValues = options;
339
340	} else if (key == "defaultsubcollection") {
341	subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
342
343	} else if (key == "languagemap") {
344	languagemap.importmap (cfgline);
345
346	// update the list of languages in the filter information
347	text_tarray options;
348	languagemap.gettoarray (options);
349	filterOptions["Language"].validValues = options;
350
351	} else if (key == "defaultlanguage") {
352	languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
353	} else if (key == "indexstem") {
354	indexstem = cfgline[0];
355	} else if (key == "maxnumeric") {
356	maxnumeric = cfgline[0].getint();
357	}
358
359	}
360
361	bool queryfilterclass::init (ostream &logout) {
362	outconvertclass text_t2ascii;
363
364	if (!filterclass::init(logout)) return false;
365
366	if (filterOptions["Index"].defaultValue.empty()) {
367	// use first index in map as default if no default is set explicitly
368	text_tarray fromarray;
369	indexmap.getfromarray(fromarray);
370	if (fromarray.size()) {
371	filterOptions["Index"].defaultValue = fromarray[0];
372	}
373	}
374
375	if (filterOptions["Subcollection"].defaultValue.empty()) {
376	// use first subcollection in map as default if no default is set explicitly
377	text_tarray fromarray;
378	subcollectionmap.getfromarray(fromarray);
379	if (fromarray.size()) {
380	filterOptions["Subcollection"].defaultValue = fromarray[0];
381	}
382	}
383
384	if (filterOptions["Language"].defaultValue.empty()) {
385	// use first language in map as default if no default is set explicitly
386	text_tarray fromarray;
387	languagemap.getfromarray(fromarray);
388	if (fromarray.size()) {
389	filterOptions["Language"].defaultValue = fromarray[0];
390	}
391	}
392
393	// get the filename for the database and make sure it exists
394	if (indexstem.empty()) {
395	indexstem = collection;
396	}
397	gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", indexstem);
398
399	if (littleEndian()) gdbm_filename += ".ldb";
400	else gdbm_filename += ".bdb";
401
402	if (!file_exists(gdbm_filename)) {
403	logout << text_t2ascii
404	<< "warning: gdbm database \"" //****
405	<< gdbm_filename << "\" does not exist\n\n";
406	//return false; //****
407	}
408
409	return true;
410	}
411

Note: See TracBrowser for help on using the repository browser.

Download in other formats: