source: trunk/gsdl/src/colservr/queryinfo.cpp@ 351

Last change on this file since 351 was 351, checked in by rjmcnab, 25 years ago

Added ability to combine two or more independant queries.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.0 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryinfo.cpp 351 1999-07-07 06:19:47Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.7 1999/07/07 06:19:47 rjmcnab
15 Added ability to combine two or more independant queries.
16
17 Revision 1.6 1999/07/01 09:29:21 rjmcnab
18 Changes for better reporting of number documents which match a query. Changes
19 should still work as before with older versions of mg.
20
21 Revision 1.5 1999/07/01 03:56:17 rjmcnab
22 Added a set of utf8 encoded equivalent terms of a query term. I also
23 added a flag for handling post-processing of the query.
24
25 Revision 1.4 1999/06/30 04:04:13 rjmcnab
26 made stemming functions available from mgsearch and made the stems
27 for the query terms available in queryinfo
28
29 Revision 1.3 1999/06/29 22:06:23 rjmcnab
30 Added a couple of fields to queryinfo to handle a special version
31 of mg.
32
33 Revision 1.2 1999/01/12 01:51:02 rjmcnab
34
35 Standard header.
36
37 Revision 1.1 1999/01/08 09:02:18 rjmcnab
38
39 Moved from src/library.
40
41 */
42
43
44#include "queryinfo.h"
45
46
47// query parameters
48
49queryparamclass::queryparamclass () {
50 clear ();
51}
52
53void queryparamclass::clear () {
54 combinequery.clear();
55 collection.clear();
56 index.clear();
57 subcollection.clear();
58 language.clear();
59 querystring.clear();
60 search_type = 0; // 0 = boolean, 1 = ranked
61 casefolding = 0;
62 stemming = 0;
63 maxdocs = -1; // all
64}
65
66
67queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
68 combinequery = q.combinequery;
69 collection = q.collection;
70 index = q.index;
71 subcollection = q.subcollection;
72 language = q.language;
73 querystring = q.querystring;
74 search_type = q.search_type;
75 casefolding = q.casefolding;
76 stemming = q.stemming;
77 maxdocs = q.maxdocs;
78
79 return *this;
80}
81
82
83bool operator==(const queryparamclass &x, const queryparamclass &y) {
84 return ((x.combinequery == y.combinequery) &&
85 (x.collection == y.collection) &&
86 (x.index == y.index) &&
87 (x.subcollection == y.subcollection) &&
88 (x.language == y.language) &&
89 (x.querystring == y.querystring) &&
90 (x.search_type == y.search_type) &&
91 (x.casefolding == y.casefolding) &&
92 (x.stemming == y.stemming) &&
93 (x.maxdocs == y.maxdocs));
94}
95
96bool operator!=(const queryparamclass &x, const queryparamclass &y) {
97 return !(x == y);
98}
99
100
101ostream &operator<< (ostream &outs, queryparamclass &q) {
102 outconvertclass text_t2ascii;
103
104 outs << "*** queryparamclass\n";
105 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
106 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
107 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
108 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
109 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
110 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
111 outs << " search_type = \"" << q.search_type << "\"\n";
112 outs << " casefolding = \"" << q.casefolding << "\"\n";
113 outs << " stemming = \"" << q.stemming << "\"\n";
114 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
115 outs << "\n";
116
117 return outs;
118}
119
120
121
122
123// term frequencies
124
125termfreqclass::termfreqclass () {
126 clear();
127}
128
129void termfreqclass::clear() {
130 termstr.clear();
131 termstemstr.clear();
132 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
133 termfreq = 0;
134}
135
136termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
137 termstr = t.termstr;
138 termstemstr = t.termstemstr;
139 utf8equivterms = t.utf8equivterms;
140 termfreq = t.termfreq;
141
142 return *this;
143}
144
145bool operator==(const termfreqclass &x, const termfreqclass &y) {
146 return ((x.termstr == y.termstr) &&
147 (x.termstemstr == y.termstemstr) &&
148 (x.termfreq == y.termfreq));
149}
150
151bool operator!=(const termfreqclass &x, const termfreqclass &y) {
152 return !(x == y);
153}
154
155// ordered by termfreq and then by termstr
156bool operator<(const termfreqclass &x, const termfreqclass &y) {
157 return ((x.termfreq < y.termfreq) ||
158 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
159 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
160}
161
162bool operator>(const termfreqclass &x, const termfreqclass &y) {
163 return ((x.termfreq > y.termfreq) ||
164 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
165 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
166}
167
168// stream output for debugging purposes
169ostream &operator<< (ostream &outs, termfreqclass &t) {
170 outconvertclass text_t2ascii;
171
172 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
173 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
174 outs << " f:" << t.termfreq << "\n";
175
176 return outs;
177}
178
179
180
181// one query result
182
183docresultclass::docresultclass() {
184 clear ();
185}
186
187void docresultclass::clear () {
188 docnum=-1;
189 docweight=0.0;
190 num_query_terms_matched=0;
191 num_phrase_match=0;
192}
193
194// merges two result classes relating to a single docnum
195docresultclass &docresultclass::combine(const docresultclass &d) {
196 docweight += d.docweight; // budget!
197 num_query_terms_matched += d.num_query_terms_matched;
198 num_phrase_match += d.num_phrase_match;
199
200 return *this;
201}
202
203docresultclass &docresultclass::operator=(const docresultclass &d) {
204 docnum = d.docnum;
205 docweight = d.docweight;
206 num_query_terms_matched = d.num_query_terms_matched;
207 num_phrase_match = d.num_phrase_match;
208
209 return *this;
210}
211
212
213// stream output for debugging purposes
214ostream &operator<< (ostream &outs, docresultclass &a) {
215 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
216 return outs;
217}
218
219
220
221// many document results
222
223docresultsclass::docresultsclass () {
224 clear ();
225}
226
227void docresultsclass::clear () {
228 docset.erase(docset.begin(), docset.end());
229 docorder.erase(docorder.begin(), docorder.end());
230}
231
232void docresultsclass::docnum_order() {
233 docorder.erase(docorder.begin(), docorder.end());
234
235 docresultmap::iterator here = docset.begin();
236 docresultmap::iterator end = docset.end();
237 while (here != end) {
238 docorder.push_back ((*here).first);
239 here++;
240 }
241}
242
243void docresultsclass::combine_and (const docresultsclass &d) {
244 docorder.erase(docorder.begin(), docorder.end());
245
246 // put the resulting set in tempresults
247 docresultmap tempresults;
248
249 docresultmap::const_iterator d_here = d.docset.begin();
250 docresultmap::const_iterator d_end = d.docset.end();
251 docresultmap::iterator found = docset.end();
252 while (d_here != d_end) {
253 found = docset.find((*d_here).first);
254 if (found != docset.end()) {
255 (*found).second.combine ((*d_here).second);
256 tempresults[(*found).first] = (*found).second;
257 }
258 d_here++;
259 }
260
261 // then copy it back to docset
262 docset = tempresults;
263}
264
265void docresultsclass::combine_or (const docresultsclass &d) {
266 docorder.erase(docorder.begin(), docorder.end());
267
268 docresultmap::const_iterator d_here = d.docset.begin();
269 docresultmap::const_iterator d_end = d.docset.end();
270 docresultmap::iterator found = docset.end();
271 while (d_here != d_end) {
272 found = docset.find((*d_here).first);
273 if (found != docset.end()) {
274 (*found).second.combine ((*d_here).second);
275 } else {
276 docset[(*d_here).first] = (*d_here).second;
277 }
278 d_here++;
279 }
280}
281
282void docresultsclass::combine_not (const docresultsclass &d) {
283 docorder.erase(docorder.begin(), docorder.end());
284
285 docresultmap::const_iterator d_here = d.docset.begin();
286 docresultmap::const_iterator d_end = d.docset.end();
287 docresultmap::iterator found = docset.end();
288 while (d_here != d_end) {
289 found = docset.find((*d_here).first);
290 if (found != docset.end()) docset.erase (found);
291 d_here++;
292 }
293}
294
295docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
296 docset = d.docset;
297 docorder = d.docorder;
298
299 return *this;
300}
301
302
303
304
305// query results
306
307void queryresultsclass::clear () {
308 docs_matched = 0;
309 is_approx = false;
310
311 postprocessed = false;
312
313 docs.clear();
314 orgterms.erase(orgterms.begin(),orgterms.end());
315 terms.erase(terms.begin(),terms.end());
316}
317
318queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
319 docs_matched = q.docs_matched;
320 is_approx = q.is_approx;
321
322 postprocessed = q.postprocessed;
323
324 docs = q.docs;
325 terms = q.terms;
326 termvariants = q.termvariants;
327
328 return *this;
329}
330
331void queryresultsclass::sortuniqqueryterms() {
332 vector<termfreqclass> tempterms = orgterms;
333 text_tset seenterms;
334 terms.clear();
335
336 // sort the terms to get the frequencies in ascending order
337 sort (tempterms.begin(), tempterms.end());
338
339 // insert first occurance of each term (maximum)
340 vector<termfreqclass>::reverse_iterator here = tempterms.rbegin();
341 vector<termfreqclass>::reverse_iterator end = tempterms.rend();
342 while (here != end) {
343 if (seenterms.find((*here).termstr) == seenterms.end()) {
344 // the termstemstr and utf8equivterms might be different for
345 // different occurances of the term
346 (*here).termstemstr.clear();
347 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
348 (*here).utf8equivterms.end());
349 terms.push_back(*here);
350 seenterms.insert((*here).termstr);
351 }
352 here++;
353 }
354
355 // now re-sort in ascending order
356 sort (terms.begin(), terms.end());
357}
358
359
360// stream output for debugging purposes
361ostream &operator<< (ostream &outs, queryresultsclass &q) {
362 outs << "*** queryresultsclass\n";
363 outs << "docs\n";
364
365 docresultmap::iterator docshere = q.docs.docset.begin();
366 docresultmap::iterator docsend = q.docs.docset.end();
367 while (docshere != docsend) {
368 outs << (*docshere).second;
369 docshere++;
370 }
371
372 outs << "orgterms\n";
373 vector<termfreqclass>::iterator orgtermshere = q.orgterms.begin();
374 vector<termfreqclass>::iterator orgtermsend = q.orgterms.end();
375 while (orgtermshere != orgtermsend) {
376 outs << (*orgtermshere);
377 orgtermshere++;
378 }
379
380 outs << "terms\n";
381 vector<termfreqclass>::iterator termshere = q.terms.begin();
382 vector<termfreqclass>::iterator termsend = q.terms.end();
383 while (termshere != termsend) {
384 outs << (*termshere);
385 termshere++;
386 }
387
388 outs << "\n";
389
390 return outs;
391}
Note: See TracBrowser for help on using the repository browser.