Context Navigation

source: gsdl/trunk/src/colservr/lucenequeryfilter.cpp@ 15580

Last change on this file since 15580 was 15558, checked in by mdewsnip, 16 years ago
(Adding new DB support) Changed lots of "gdbm"s to "db"s, in preparation for adding new DB types.
Property svn:keywords set to `Author Date Id Revision`
File size: 13.0 KB

Line
1	/**********************************************************************
2	*
3	* lucenequeryfilter.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26
27
28	#include "lucenequeryfilter.h"
29	#include "fileutil.h"
30	#include <assert.h>
31	#include "lucenesearch.h"
32
33	/////////////////////////////////
34	// functions for queryfilterclass
35	/////////////////////////////////
36
37
38	lucenequeryfilterclass::lucenequeryfilterclass ()
39	: queryfilterclass() {
40
41
42	FilterOption_t filtopt;
43
44	// -- onePerTerm Level enumerated
45	// likely to be Doc, Sec, Para, but we dont assume anything now
46	filtopt.clear();
47	filtopt.name = "Level";
48	filtopt.type = FilterOption_t::enumeratedt;
49	filtopt.repeatable = FilterOption_t::onePerTerm;
50	filterOptions["Level"] = filtopt;
51
52	// -- IndexField, enumerated, used to list available fields
53	filtopt.clear();
54	filtopt.name = "IndexField";
55	filtopt.type = FilterOption_t::enumeratedt;
56	filtopt.repeatable = FilterOption_t::onePerTerm;
57	filtopt.defaultValue = "";
58	filterOptions["IndexField"] = filtopt;
59
60	}
61
62	lucenequeryfilterclass::~lucenequeryfilterclass () {
63	}
64
65
66	//whether a query is a full text browse
67	bool lucenequeryfilterclass::full_text_browse (int filterRequestOptions) {
68	return (filterRequestOptions & FRfullTextBrowse);
69	}
70
71	void lucenequeryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
72	queryfilterclass::configure(key, cfgline);
73
74	if (key == "indexfieldmap") {
75	indexfieldmap.importmap (cfgline);
76
77	// update the list of indexes in the filter information
78	text_tarray options;
79	indexfieldmap.gettoarray (options);
80
81	text_tarray::const_iterator here = options.begin();
82	text_tarray::const_iterator end = options.end();
83	bool start = true;
84	while (here !=end) {
85	if (!(*here).empty()) {
86	filterOptions["IndexField"].validValues.push_back(*here);
87	if (start) {
88	filterOptions["IndexField"].defaultValue = *here;
89	start = false;
90	}
91	}
92	++here;
93	}
94	} else if (key == "indexlevels") {
95	text_tarray::const_iterator here = cfgline.begin();
96	text_tarray::const_iterator end = cfgline.end();
97	bool first=true;
98	while (here != end) {
99	if (!(*here).empty()) {
100	if (first) {
101	first = false;
102	// the default is the first value
103	filterOptions["Level"].defaultValue = *here;
104	}
105	filterOptions["Level"].validValues.push_back(*here);
106	}
107	++here;
108	}
109	} else if (key == "textlevel") {
110	((lucenesearchclass *)textsearchptr)->set_gdbm_level( cfgline[0]);
111	}
112
113	}
114
115
116	void lucenequeryfilterclass::filter(const FilterRequest_t &request,
117	FilterResponse_t &response,
118	comerror_t &err, ostream &logout) {
119
120	outconvertclass text_t2ascii;
121
122	response.clear ();
123	err = noError;
124	if (db_ptr == NULL) {
125	// most likely a configuration problem
126	logout << text_t2ascii
127	<< "configuration error: queryfilter contains a null dbclass\n\n";
128	err = configurationError;
129	return;
130	}
131	if (textsearchptr == NULL) {
132	// most likely a configuration problem
133	logout << text_t2ascii
134	<< "configuration error: queryfilter contains a null textsearchclass (lucene)\n\n";
135	err = configurationError;
136	return;
137	}
138	if (full_text_browse(request.filterResultOptions)) {
139	browsefilter(request, response, err, logout);
140	return;
141	}
142	// open the database
143	db_ptr->setlogout(&logout);
144	if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
145	// most likely a system problem (we have already checked that the database exists)
146	logout << text_t2ascii
147	<< "system problem: open on database \"" << db_filename << "\" failed\n\n";
148	err = systemProblem;
149	return;
150	}
151
152
153	// get the query parameters
154	int startresults, endresults;
155	text_t phrasematch; // not used here any more
156	vector<queryparamclass> queryfilterparams;
157	parse_query_params (request, queryfilterparams, startresults,
158	endresults, phrasematch, logout);
159
160
161	// do query
162	queryresultsclass queryresults;
163	do_multi_query (request, queryfilterparams, queryresults, err, logout);
164	response.error_message = queryresults.error_message;
165	if (err != noError) return;
166
167	// assemble document results
168	if (need_matching_docs (request.filterResultOptions)) {
169
170	int resultnum = 1;
171	ResultDocInfo_t resultdoc;
172	text_t trans_OID;
173	vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
174	vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
175
176	// Now handled by Lucene directly
177	//if (endresults == -1) endresults = MAXNUMDOCS;
178
179	while (docorder_here != docorder_end)
180	{
181	// Now handled by Lucene directly
182	//if (resultnum > endresults) break;
183
184	// translate the document number
185	if (!translate(db_ptr, *docorder_here, trans_OID))
186	{
187	logout << text_t2ascii
188	<< "warning: could not translate lucene document number \""
189	<< *docorder_here << "\" to OID.\n\n";
190
191	}
192	else
193	{
194	docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
195
196	// see if there is a result for this number,
197	// if it is in the request set (or the request set is empty)
198	if (docset_here != queryresults.docs.docset.end() && (request.docSet.empty() \|\| in_set(request.docSet, trans_OID)))
199	{
200	// Now handled by Lucene directly
201	//if (resultnum >= startresults) {
202
203	// add this document
204	resultdoc.OID = trans_OID;
205	resultdoc.result_num = resultnum;
206	resultdoc.ranking = (int)((docset_here).second.docweight 10000.0 + 0.5);
207
208	response.docInfo.push_back (resultdoc);
209	//}
210	++resultnum;
211	}
212	} // else
213
214	++docorder_here;
215	}
216	} // if need matching docs
217
218	// assemble the term results
219	if (need_term_info(request.filterResultOptions)) {
220	// note: the terms have already been sorted and uniqued - ?? have they??
221
222	TermInfo_t terminfo;
223	bool terms_first = true;
224
225	termfreqclassarray::iterator terms_here = queryresults.terms.begin();
226	termfreqclassarray::iterator terms_end = queryresults.terms.end();
227
228	while (terms_here != terms_end) {
229	terminfo.clear();
230	terminfo.term = (*terms_here).termstr;
231	terminfo.freq = (*terms_here).termfreq;
232	// lucene doesn't return any termvariants at this stage,
233	// so make sure the original term is set
234	terminfo.matchTerms.push_back(terminfo.term);
235
236	// this bit gets the matchTerms ie the equivalent (stem/casefold) terms
237	if (terms_first) {
238	text_tset::iterator termvariants_here = queryresults.termvariants.begin();
239	text_tset::iterator termvariants_end = queryresults.termvariants.end();
240	while (termvariants_here != termvariants_end) {
241	terminfo.matchTerms.push_back (*termvariants_here);
242	++termvariants_here;
243	}
244	}
245	terms_first = false;
246
247	response.termInfo.push_back (terminfo);
248
249	++terms_here;
250	}
251
252	// add the stop words
253	text_tset::iterator stopwords_here = queryresults.stopwords.begin();
254	text_tset::iterator stopwords_end = queryresults.stopwords.end();
255	while (stopwords_here != stopwords_end) {
256	response.stopwords.insert(*stopwords_here);
257	++stopwords_here;
258	}
259	}
260
261	db_ptr->closedatabase(); // Important that local library doesn't leave any files open
262	response.numDocs = queryresults.docs_matched;
263	response.isApprox = queryresults.is_approx;
264	}
265
266	void lucenequeryfilterclass::browsefilter(const FilterRequest_t &request,
267	FilterResponse_t &response,
268	comerror_t &err, ostream &logout) {
269
270	outconvertclass text_t2ascii;
271
272	// get the query parameters
273	int startresults, endresults;
274	text_t phrasematch; // not used here any more, just have it so can use
275	// parse_query_params function
276
277	vector<queryparamclass> queryfilterparams;
278	parse_query_params (request, queryfilterparams, startresults,
279	endresults, phrasematch, logout);
280
281	vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
282
283	// do query
284	queryresultsclass queryresults;
285	queryresults.clear();
286
287	int numDocs = endresults-startresults;
288	textsearchptr->setcollectdir (collectdir);
289
290	if (!((lucenesearchclass)textsearchptr)->browse_search((query_here), startresults, numDocs, queryresults)) {
291	// most likely a system problem
292	logout << text_t2ascii
293	<< "system problem: could not do full text browse with lucene for index \""
294	<< (query_here).index << (query_here).subcollection
295	<< (*query_here).language << "\".\n\n";
296	err = systemProblem;
297	return;
298	}
299
300	// assemble the term results
301	TermInfo_t terminfo;
302
303	termfreqclassarray::iterator terms_here = queryresults.terms.begin();
304	termfreqclassarray::iterator terms_end = queryresults.terms.end();
305
306	while (terms_here != terms_end) {
307	terminfo.clear();
308	terminfo.term = (*terms_here).termstr;
309	terminfo.freq = (*terms_here).termfreq;
310
311	response.termInfo.push_back (terminfo);
312
313	++terms_here;
314	}
315
316
317	}
318
319	// lucenesearchptr and db_ptr are assumed to be valid
320	void lucenequeryfilterclass::do_multi_query (const FilterRequest_t &request,
321	const vector<queryparamclass> &query_params,
322	queryresultsclass &multiresults,
323	comerror_t &err, ostream &logout) {
324	outconvertclass text_t2ascii;
325
326	err = noError;
327	textsearchptr->setcollectdir (collectdir);
328	multiresults.clear();
329
330	vector<queryparamclass>::const_iterator query_here = query_params.begin();
331	vector<queryparamclass>::const_iterator query_end = query_params.end();
332	while (query_here != query_end) {
333	queryresultsclass thisqueryresults;
334	if (!textsearchptr->search((*query_here), thisqueryresults)) {
335	// most likely a system problem
336	logout << text_t2ascii
337	<< "system problem: could not do search with lucene for index \""
338	<< (query_here).index << (query_here).level
339	<< (*query_here).subcollection
340	<< (*query_here).language << "\".\n\n";
341	err = systemProblem;
342	return;
343	}
344
345	// check for syntax error
346	if (thisqueryresults.syntax_error==true) {
347	logout << text_t2ascii
348	<< "syntax problem: invalid query string \""
349	<< (*query_here).querystring<<"\".\n";
350	err = syntaxError;
351	return;
352	}
353	// combine the results
354	if (need_matching_docs (request.filterResultOptions)) {
355
356	if (query_params.size() == 1) {
357	multiresults.error_message = thisqueryresults.error_message;
358	multiresults.docs = thisqueryresults.docs; // just one set of results
359	multiresults.docs_matched = thisqueryresults.docs_matched;
360	multiresults.is_approx = thisqueryresults.is_approx;
361
362	} else {
363	if ((*query_here).combinequery == "and") {
364	multiresults.docs.combine_and (thisqueryresults.docs);
365	} else if ((*query_here).combinequery == "or") {
366	multiresults.docs.combine_or (thisqueryresults.docs);
367	} else if ((*query_here).combinequery == "not") {
368	multiresults.docs.combine_not (thisqueryresults.docs);
369	}
370	multiresults.docs_matched = multiresults.docs.docset.size();
371	multiresults.is_approx = Exact;
372	}
373	}
374
375	// combine the term information
376	if (need_term_info (request.filterResultOptions)) {
377	// append the terms
378	multiresults.orgterms.insert(multiresults.orgterms.end(),
379	thisqueryresults.orgterms.begin(),
380	thisqueryresults.orgterms.end());
381
382
383	// add the term variants -
384	text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
385	text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
386	while (termvar_here != termvar_end) {
387	multiresults.termvariants.insert(*termvar_here);
388	++termvar_here;
389	}
390
391	// add the stop words
392	text_tset::iterator stopwords_here = thisqueryresults.stopwords.begin();
393	text_tset::iterator stopwords_end = thisqueryresults.stopwords.end();
394	while (stopwords_here != stopwords_end) {
395	multiresults.stopwords.insert(*stopwords_here);
396	++stopwords_here;
397	}
398	}
399
400	++query_here;
401	}
402
403	// sort and unique the query terms
404	multiresults.sortuniqqueryterms ();
405	}
406
407
408

Note: See TracBrowser for help on using the repository browser.

Download in other formats: