Context Navigation

source: trunk/gsdl/src/colservr/queryinfo.cpp@ 12770

Last change on this file since 12770 was 12770, checked in by mdewsnip, 18 years ago
Changed the Lucene "-fuzzy" argument to "-fuzziness <value>", for more accurate control.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 11.9 KB

Line
1	/**********************************************************************
2	*
3	* queryinfo.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26	#include "queryinfo.h"
27
28
29	// query parameters
30
31	queryparamclass::queryparamclass () {
32	clear ();
33	}
34
35	void queryparamclass::clear () {
36	combinequery.clear();
37	collection.clear();
38	index.clear();
39	subcollection.clear();
40	language.clear();
41	level.clear();
42	querystring.clear();
43	search_type = 0; // 0 = boolean, 1 = ranked
44	match_mode = 0; // 0 = some, 1 = all
45	casefolding = 0;
46	stemming = 0;
47	maxdocs = -1; // all
48	maxnumeric = 4; // must default to the same value as mg_passes
49	filterstring.clear();
50	sortfield.clear();
51	fuzziness.clear();
52	startresults = 1; // all
53	endresults = 10; // all
54	}
55
56
57	queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
58	combinequery = q.combinequery;
59	collection = q.collection;
60	index = q.index;
61	subcollection = q.subcollection;
62	language = q.language;
63	level = q.level;
64	querystring = q.querystring;
65	search_type = q.search_type;
66	match_mode = q.match_mode;
67	casefolding = q.casefolding;
68	stemming = q.stemming;
69	maxdocs = q.maxdocs;
70	maxnumeric = q.maxnumeric;
71	filterstring = q.filterstring;
72	sortfield = q.sortfield;
73	fuzziness = q.fuzziness;
74	startresults = q.startresults;
75	endresults = q.endresults;
76	return *this;
77	}
78
79
80	bool operator==(const queryparamclass &x, const queryparamclass &y) {
81	return ((x.combinequery == y.combinequery) &&
82	(x.collection == y.collection) &&
83	(x.index == y.index) &&
84	(x.subcollection == y.subcollection) &&
85	(x.language == y.language) &&
86	(x.level == y.level) &&
87	(x.querystring == y.querystring) &&
88	(x.search_type == y.search_type) &&
89	(x.match_mode == y.match_mode) &&
90	(x.casefolding == y.casefolding) &&
91	(x.stemming == y.stemming) &&
92	(x.maxdocs == y.maxdocs) &&
93	(x.maxnumeric == y.maxnumeric) &&
94	(x.filterstring == y.filterstring) &&
95	(x.sortfield == y.sortfield) &&
96	(x.fuzziness == y.fuzziness) &&
97	(x.startresults == y.startresults) &&
98	(x.startresults == y.startresults));
99	}
100
101	bool operator!=(const queryparamclass &x, const queryparamclass &y) {
102	return !(x == y);
103	}
104
105
106	ostream &operator<< (ostream &outs, queryparamclass &q) {
107	outconvertclass text_t2ascii;
108
109	outs << "*** queryparamclass\n";
110	outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
111	outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
112	outs << text_t2ascii << " index = \"" << q.index << "\"\n";
113	outs << text_t2ascii << " level = \"" << q.level << "\"\n";
114	outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
115	outs << text_t2ascii << " language = \"" << q.language << "\"\n";
116	outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
117	outs << " search_type = \"" << q.search_type << "\"\n";
118	outs << " match_mode = \"" << q.match_mode << "\"\n";
119	outs << " casefolding = \"" << q.casefolding << "\"\n";
120	outs << " stemming = \"" << q.stemming << "\"\n";
121	outs << " maxdocs = \"" << q.maxdocs << "\"\n";
122	outs << " maxnumeric = \"" << q.maxnumeric << "\"\n";
123	outs << " filterstring = \"" << q.filterstring << "\"\n";
124	outs << " sortfield = \"" << q.sortfield << "\"\n";
125	outs << " fuzziness = \"" << q.fuzziness << "\"\n";
126	outs << " startresults = \"" << q.startresults << "\"\n";
127	outs << " endresults = \"" << q.endresults << "\"\n";
128	outs << "\n";
129
130	return outs;
131	}
132
133
134
135
136	// term frequencies
137
138	termfreqclass::termfreqclass () {
139	clear();
140	}
141
142	void termfreqclass::clear() {
143	termstr.clear();
144	termstemstr.clear();
145	utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
146	termfreq = 0;
147	}
148
149	termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
150	termstr = t.termstr;
151	termstemstr = t.termstemstr;
152	utf8equivterms = t.utf8equivterms;
153	termfreq = t.termfreq;
154
155	return *this;
156	}
157
158	bool operator==(const termfreqclass &x, const termfreqclass &y) {
159	return ((x.termstr == y.termstr) &&
160	(x.termstemstr == y.termstemstr) &&
161	(x.termfreq == y.termfreq));
162	}
163
164	bool operator!=(const termfreqclass &x, const termfreqclass &y) {
165	return !(x == y);
166	}
167
168	// ordered by termfreq and then by termstr
169	bool operator<(const termfreqclass &x, const termfreqclass &y) {
170	return ((x.termfreq < y.termfreq) \|\|
171	((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) \|\|
172	((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
173	}
174
175	bool operator>(const termfreqclass &x, const termfreqclass &y) {
176	return ((x.termfreq > y.termfreq) \|\|
177	((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) \|\|
178	((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
179	}
180
181	// stream output for debugging purposes
182	ostream &operator<< (ostream &outs, termfreqclass &t) {
183	outconvertclass text_t2ascii;
184
185	outs << text_t2ascii << " t:\"" << t.termstr << "\"";
186	outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
187	outs << " f:" << t.termfreq << "\n";
188
189	return outs;
190	}
191
192
193
194	// one query result
195
196	docresultclass::docresultclass() {
197	clear ();
198	}
199
200	void docresultclass::clear () {
201	docnum=-1;
202	docweight=0.0;
203	num_query_terms_matched=0;
204	num_phrase_match=0;
205	}
206
207	// merges two result classes relating to a single docnum
208	docresultclass &docresultclass::combine(const docresultclass &d) {
209	docweight += d.docweight; // budget!
210	num_query_terms_matched += d.num_query_terms_matched;
211	num_phrase_match += d.num_phrase_match;
212
213	return *this;
214	}
215
216	docresultclass &docresultclass::operator=(const docresultclass &d) {
217	docnum = d.docnum;
218	docweight = d.docweight;
219	num_query_terms_matched = d.num_query_terms_matched;
220	num_phrase_match = d.num_phrase_match;
221
222	return *this;
223	}
224
225
226	bool operator==(const docresultclass &x, const docresultclass &y) {
227	return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
228	(x.num_query_terms_matched == y.num_query_terms_matched) &&
229	(x.num_phrase_match == y.num_phrase_match));
230	}
231
232	bool operator<(const docresultclass &x, const docresultclass &y) {
233	return ((x.docnum < y.docnum) \|\|
234	((x.docnum == y.docnum) &&
235	((x.docweight < y.docweight) \|\|
236	((x.docweight == y.docweight) &&
237	((x.num_query_terms_matched < y.num_query_terms_matched) \|\|
238	((x.num_query_terms_matched == y.num_query_terms_matched) &&
239	((x.num_phrase_match < y.num_phrase_match))))))));
240	}
241
242
243	// stream output for debugging purposes
244	ostream &operator<< (ostream &outs, docresultclass &a) {
245	outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
246	return outs;
247	}
248
249
250
251	// many document results
252
253	docresultsclass::docresultsclass () {
254	clear ();
255	}
256
257	void docresultsclass::clear () {
258	docset.erase(docset.begin(), docset.end());
259	docorder.erase(docorder.begin(), docorder.end());
260	}
261
262	void docresultsclass::docnum_order() {
263	docorder.erase(docorder.begin(), docorder.end());
264
265	docresultmap::iterator here = docset.begin();
266	docresultmap::iterator end = docset.end();
267	while (here != end) {
268	docorder.push_back ((*here).first);
269	++here;
270	}
271	}
272
273	void docresultsclass::combine_and (const docresultsclass &d) {
274	docorder.erase(docorder.begin(), docorder.end());
275
276	// put the resulting set in tempresults
277	docresultmap tempresults;
278
279	docresultmap::const_iterator d_here = d.docset.begin();
280	docresultmap::const_iterator d_end = d.docset.end();
281	docresultmap::iterator found = docset.end();
282	while (d_here != d_end) {
283	found = docset.find((*d_here).first);
284	if (found != docset.end()) {
285	(found).second.combine ((d_here).second);
286	tempresults[(found).first] = (found).second;
287	}
288	++d_here;
289	}
290
291	// then copy it back to docset
292	docset = tempresults;
293	}
294
295	void docresultsclass::combine_or (const docresultsclass &d) {
296	docorder.erase(docorder.begin(), docorder.end());
297
298	docresultmap::const_iterator d_here = d.docset.begin();
299	docresultmap::const_iterator d_end = d.docset.end();
300	docresultmap::iterator found = docset.end();
301	while (d_here != d_end) {
302	found = docset.find((*d_here).first);
303	if (found != docset.end()) {
304	(found).second.combine ((d_here).second);
305	} else {
306	docset[(d_here).first] = (d_here).second;
307	}
308	++d_here;
309	}
310	}
311
312	void docresultsclass::combine_not (const docresultsclass &d) {
313	docorder.erase(docorder.begin(), docorder.end());
314
315	docresultmap::const_iterator d_here = d.docset.begin();
316	docresultmap::const_iterator d_end = d.docset.end();
317	docresultmap::iterator found = docset.end();
318	while (d_here != d_end) {
319	found = docset.find((*d_here).first);
320	if (found != docset.end()) docset.erase (found);
321	++d_here;
322	}
323	}
324
325	docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
326	docset = d.docset;
327	docorder = d.docorder;
328
329	return *this;
330	}
331
332
333
334
335	// query results
336
337	void queryresultsclass::clear () {
338	error_message = g_EmptyText;
339	docs_matched = 0;
340	is_approx = Exact;
341	syntax_error = false;
342	postprocessed = false;
343
344	docs.clear();
345	orgterms.erase(orgterms.begin(),orgterms.end());
346	terms.erase(terms.begin(),terms.end());
347	}
348
349	queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
350	error_message = q.error_message;
351	docs_matched = q.docs_matched;
352	is_approx = q.is_approx;
353	syntax_error = q.syntax_error;
354	postprocessed = q.postprocessed;
355
356	docs = q.docs;
357	terms = q.terms;
358	termvariants = q.termvariants;
359
360	return *this;
361	}
362
363	void queryresultsclass::sortuniqqueryterms() {
364	termfreqclassarray tempterms = orgterms;
365	text_tset seenterms;
366	terms.erase(terms.begin(), terms.end());
367
368	// sort the terms to get the frequencies in ascending order
369	sort (tempterms.begin(), tempterms.end());
370
371	// insert first occurance of each term (maximum)
372	termfreqclassarray::reverse_iterator here = tempterms.rbegin();
373	termfreqclassarray::reverse_iterator end = tempterms.rend();
374	while (here != end) {
375	if (seenterms.find((*here).termstr) == seenterms.end()) {
376	// the termstemstr and utf8equivterms might be different for
377	// different occurances of the term
378	(*here).termstemstr.clear();
379	(here).utf8equivterms.erase((here).utf8equivterms.begin(),
380	(*here).utf8equivterms.end());
381	terms.push_back(*here);
382	seenterms.insert((*here).termstr);
383	}
384	++here;
385	}
386
387	// now re-sort in ascending order
388	sort (terms.begin(), terms.end());
389	}
390
391
392	// stream output for debugging purposes
393	ostream &operator<< (ostream &outs, queryresultsclass &q) {
394	outs << "*** queryresultsclass\n";
395	outs << "docs\n";
396
397	docresultmap::iterator docshere = q.docs.docset.begin();
398	docresultmap::iterator docsend = q.docs.docset.end();
399	while (docshere != docsend) {
400	outs << (*docshere).second;
401	++docshere;
402	}
403
404	outs << "orgterms\n";
405	termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
406	termfreqclassarray::iterator orgtermsend = q.orgterms.end();
407	while (orgtermshere != orgtermsend) {
408	outs << (*orgtermshere);
409	++orgtermshere;
410	}
411
412	outs << "terms\n";
413	termfreqclassarray::iterator termshere = q.terms.begin();
414	termfreqclassarray::iterator termsend = q.terms.end();
415	while (termshere != termsend) {
416	outs << (*termshere);
417	++termshere;
418	}
419
420	outs << "\n";
421
422	return outs;
423	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: