Context Navigation

source: trunk/gsdl/src/colservr/queryinfo.cpp@ 351

Last change on this file since 351 was 351, checked in by rjmcnab, 25 years ago
Added ability to combine two or more independant queries.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 10.0 KB

Line
1	/**********************************************************************
2	*
3	* queryinfo.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* PUT COPYRIGHT NOTICE HERE
7	*
8	* $Id: queryinfo.cpp 351 1999-07-07 06:19:47Z rjmcnab $
9	*
10	*********************************************************************/
11
12	/*
13	$Log$
14	Revision 1.7 1999/07/07 06:19:47 rjmcnab
15	Added ability to combine two or more independant queries.
16
17	Revision 1.6 1999/07/01 09:29:21 rjmcnab
18	Changes for better reporting of number documents which match a query. Changes
19	should still work as before with older versions of mg.
20
21	Revision 1.5 1999/07/01 03:56:17 rjmcnab
22	Added a set of utf8 encoded equivalent terms of a query term. I also
23	added a flag for handling post-processing of the query.
24
25	Revision 1.4 1999/06/30 04:04:13 rjmcnab
26	made stemming functions available from mgsearch and made the stems
27	for the query terms available in queryinfo
28
29	Revision 1.3 1999/06/29 22:06:23 rjmcnab
30	Added a couple of fields to queryinfo to handle a special version
31	of mg.
32
33	Revision 1.2 1999/01/12 01:51:02 rjmcnab
34
35	Standard header.
36
37	Revision 1.1 1999/01/08 09:02:18 rjmcnab
38
39	Moved from src/library.
40
41	*/
42
43
44	#include "queryinfo.h"
45
46
47	// query parameters
48
49	queryparamclass::queryparamclass () {
50	clear ();
51	}
52
53	void queryparamclass::clear () {
54	combinequery.clear();
55	collection.clear();
56	index.clear();
57	subcollection.clear();
58	language.clear();
59	querystring.clear();
60	search_type = 0; // 0 = boolean, 1 = ranked
61	casefolding = 0;
62	stemming = 0;
63	maxdocs = -1; // all
64	}
65
66
67	queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
68	combinequery = q.combinequery;
69	collection = q.collection;
70	index = q.index;
71	subcollection = q.subcollection;
72	language = q.language;
73	querystring = q.querystring;
74	search_type = q.search_type;
75	casefolding = q.casefolding;
76	stemming = q.stemming;
77	maxdocs = q.maxdocs;
78
79	return *this;
80	}
81
82
83	bool operator==(const queryparamclass &x, const queryparamclass &y) {
84	return ((x.combinequery == y.combinequery) &&
85	(x.collection == y.collection) &&
86	(x.index == y.index) &&
87	(x.subcollection == y.subcollection) &&
88	(x.language == y.language) &&
89	(x.querystring == y.querystring) &&
90	(x.search_type == y.search_type) &&
91	(x.casefolding == y.casefolding) &&
92	(x.stemming == y.stemming) &&
93	(x.maxdocs == y.maxdocs));
94	}
95
96	bool operator!=(const queryparamclass &x, const queryparamclass &y) {
97	return !(x == y);
98	}
99
100
101	ostream &operator<< (ostream &outs, queryparamclass &q) {
102	outconvertclass text_t2ascii;
103
104	outs << "*** queryparamclass\n";
105	outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
106	outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
107	outs << text_t2ascii << " index = \"" << q.index << "\"\n";
108	outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
109	outs << text_t2ascii << " language = \"" << q.language << "\"\n";
110	outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
111	outs << " search_type = \"" << q.search_type << "\"\n";
112	outs << " casefolding = \"" << q.casefolding << "\"\n";
113	outs << " stemming = \"" << q.stemming << "\"\n";
114	outs << " maxdocs = \"" << q.maxdocs << "\"\n";
115	outs << "\n";
116
117	return outs;
118	}
119
120
121
122
123	// term frequencies
124
125	termfreqclass::termfreqclass () {
126	clear();
127	}
128
129	void termfreqclass::clear() {
130	termstr.clear();
131	termstemstr.clear();
132	utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
133	termfreq = 0;
134	}
135
136	termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
137	termstr = t.termstr;
138	termstemstr = t.termstemstr;
139	utf8equivterms = t.utf8equivterms;
140	termfreq = t.termfreq;
141
142	return *this;
143	}
144
145	bool operator==(const termfreqclass &x, const termfreqclass &y) {
146	return ((x.termstr == y.termstr) &&
147	(x.termstemstr == y.termstemstr) &&
148	(x.termfreq == y.termfreq));
149	}
150
151	bool operator!=(const termfreqclass &x, const termfreqclass &y) {
152	return !(x == y);
153	}
154
155	// ordered by termfreq and then by termstr
156	bool operator<(const termfreqclass &x, const termfreqclass &y) {
157	return ((x.termfreq < y.termfreq) \|\|
158	((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) \|\|
159	((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
160	}
161
162	bool operator>(const termfreqclass &x, const termfreqclass &y) {
163	return ((x.termfreq > y.termfreq) \|\|
164	((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) \|\|
165	((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
166	}
167
168	// stream output for debugging purposes
169	ostream &operator<< (ostream &outs, termfreqclass &t) {
170	outconvertclass text_t2ascii;
171
172	outs << text_t2ascii << " t:\"" << t.termstr << "\"";
173	outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
174	outs << " f:" << t.termfreq << "\n";
175
176	return outs;
177	}
178
179
180
181	// one query result
182
183	docresultclass::docresultclass() {
184	clear ();
185	}
186
187	void docresultclass::clear () {
188	docnum=-1;
189	docweight=0.0;
190	num_query_terms_matched=0;
191	num_phrase_match=0;
192	}
193
194	// merges two result classes relating to a single docnum
195	docresultclass &docresultclass::combine(const docresultclass &d) {
196	docweight += d.docweight; // budget!
197	num_query_terms_matched += d.num_query_terms_matched;
198	num_phrase_match += d.num_phrase_match;
199
200	return *this;
201	}
202
203	docresultclass &docresultclass::operator=(const docresultclass &d) {
204	docnum = d.docnum;
205	docweight = d.docweight;
206	num_query_terms_matched = d.num_query_terms_matched;
207	num_phrase_match = d.num_phrase_match;
208
209	return *this;
210	}
211
212
213	// stream output for debugging purposes
214	ostream &operator<< (ostream &outs, docresultclass &a) {
215	outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
216	return outs;
217	}
218
219
220
221	// many document results
222
223	docresultsclass::docresultsclass () {
224	clear ();
225	}
226
227	void docresultsclass::clear () {
228	docset.erase(docset.begin(), docset.end());
229	docorder.erase(docorder.begin(), docorder.end());
230	}
231
232	void docresultsclass::docnum_order() {
233	docorder.erase(docorder.begin(), docorder.end());
234
235	docresultmap::iterator here = docset.begin();
236	docresultmap::iterator end = docset.end();
237	while (here != end) {
238	docorder.push_back ((*here).first);
239	here++;
240	}
241	}
242
243	void docresultsclass::combine_and (const docresultsclass &d) {
244	docorder.erase(docorder.begin(), docorder.end());
245
246	// put the resulting set in tempresults
247	docresultmap tempresults;
248
249	docresultmap::const_iterator d_here = d.docset.begin();
250	docresultmap::const_iterator d_end = d.docset.end();
251	docresultmap::iterator found = docset.end();
252	while (d_here != d_end) {
253	found = docset.find((*d_here).first);
254	if (found != docset.end()) {
255	(found).second.combine ((d_here).second);
256	tempresults[(found).first] = (found).second;
257	}
258	d_here++;
259	}
260
261	// then copy it back to docset
262	docset = tempresults;
263	}
264
265	void docresultsclass::combine_or (const docresultsclass &d) {
266	docorder.erase(docorder.begin(), docorder.end());
267
268	docresultmap::const_iterator d_here = d.docset.begin();
269	docresultmap::const_iterator d_end = d.docset.end();
270	docresultmap::iterator found = docset.end();
271	while (d_here != d_end) {
272	found = docset.find((*d_here).first);
273	if (found != docset.end()) {
274	(found).second.combine ((d_here).second);
275	} else {
276	docset[(d_here).first] = (d_here).second;
277	}
278	d_here++;
279	}
280	}
281
282	void docresultsclass::combine_not (const docresultsclass &d) {
283	docorder.erase(docorder.begin(), docorder.end());
284
285	docresultmap::const_iterator d_here = d.docset.begin();
286	docresultmap::const_iterator d_end = d.docset.end();
287	docresultmap::iterator found = docset.end();
288	while (d_here != d_end) {
289	found = docset.find((*d_here).first);
290	if (found != docset.end()) docset.erase (found);
291	d_here++;
292	}
293	}
294
295	docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
296	docset = d.docset;
297	docorder = d.docorder;
298
299	return *this;
300	}
301
302
303
304
305	// query results
306
307	void queryresultsclass::clear () {
308	docs_matched = 0;
309	is_approx = false;
310
311	postprocessed = false;
312
313	docs.clear();
314	orgterms.erase(orgterms.begin(),orgterms.end());
315	terms.erase(terms.begin(),terms.end());
316	}
317
318	queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
319	docs_matched = q.docs_matched;
320	is_approx = q.is_approx;
321
322	postprocessed = q.postprocessed;
323
324	docs = q.docs;
325	terms = q.terms;
326	termvariants = q.termvariants;
327
328	return *this;
329	}
330
331	void queryresultsclass::sortuniqqueryterms() {
332	vector<termfreqclass> tempterms = orgterms;
333	text_tset seenterms;
334	terms.clear();
335
336	// sort the terms to get the frequencies in ascending order
337	sort (tempterms.begin(), tempterms.end());
338
339	// insert first occurance of each term (maximum)
340	vector<termfreqclass>::reverse_iterator here = tempterms.rbegin();
341	vector<termfreqclass>::reverse_iterator end = tempterms.rend();
342	while (here != end) {
343	if (seenterms.find((*here).termstr) == seenterms.end()) {
344	// the termstemstr and utf8equivterms might be different for
345	// different occurances of the term
346	(*here).termstemstr.clear();
347	(here).utf8equivterms.erase((here).utf8equivterms.begin(),
348	(*here).utf8equivterms.end());
349	terms.push_back(*here);
350	seenterms.insert((*here).termstr);
351	}
352	here++;
353	}
354
355	// now re-sort in ascending order
356	sort (terms.begin(), terms.end());
357	}
358
359
360	// stream output for debugging purposes
361	ostream &operator<< (ostream &outs, queryresultsclass &q) {
362	outs << "*** queryresultsclass\n";
363	outs << "docs\n";
364
365	docresultmap::iterator docshere = q.docs.docset.begin();
366	docresultmap::iterator docsend = q.docs.docset.end();
367	while (docshere != docsend) {
368	outs << (*docshere).second;
369	docshere++;
370	}
371
372	outs << "orgterms\n";
373	vector<termfreqclass>::iterator orgtermshere = q.orgterms.begin();
374	vector<termfreqclass>::iterator orgtermsend = q.orgterms.end();
375	while (orgtermshere != orgtermsend) {
376	outs << (*orgtermshere);
377	orgtermshere++;
378	}
379
380	outs << "terms\n";
381	vector<termfreqclass>::iterator termshere = q.terms.begin();
382	vector<termfreqclass>::iterator termsend = q.terms.end();
383	while (termshere != termsend) {
384	outs << (*termshere);
385	termshere++;
386	}
387
388	outs << "\n";
389
390	return outs;
391	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: