source: trunk/gsdl/src/colservr/queryinfo.cpp@ 358

Last change on this file since 358 was 358, checked in by rjmcnab, 25 years ago

Fixed a couple of compiler conflicts

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.1 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryinfo.cpp 358 1999-07-09 02:19:44Z rjmcnab $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.8 1999/07/09 02:19:44 rjmcnab
15 Fixed a couple of compiler conflicts
16
17 Revision 1.7 1999/07/07 06:19:47 rjmcnab
18 Added ability to combine two or more independant queries.
19
20 Revision 1.6 1999/07/01 09:29:21 rjmcnab
21 Changes for better reporting of number documents which match a query. Changes
22 should still work as before with older versions of mg.
23
24 Revision 1.5 1999/07/01 03:56:17 rjmcnab
25 Added a set of utf8 encoded equivalent terms of a query term. I also
26 added a flag for handling post-processing of the query.
27
28 Revision 1.4 1999/06/30 04:04:13 rjmcnab
29 made stemming functions available from mgsearch and made the stems
30 for the query terms available in queryinfo
31
32 Revision 1.3 1999/06/29 22:06:23 rjmcnab
33 Added a couple of fields to queryinfo to handle a special version
34 of mg.
35
36 Revision 1.2 1999/01/12 01:51:02 rjmcnab
37
38 Standard header.
39
40 Revision 1.1 1999/01/08 09:02:18 rjmcnab
41
42 Moved from src/library.
43
44 */
45
46
47#include "queryinfo.h"
48
49
50// query parameters
51
52queryparamclass::queryparamclass () {
53 clear ();
54}
55
56void queryparamclass::clear () {
57 combinequery.clear();
58 collection.clear();
59 index.clear();
60 subcollection.clear();
61 language.clear();
62 querystring.clear();
63 search_type = 0; // 0 = boolean, 1 = ranked
64 casefolding = 0;
65 stemming = 0;
66 maxdocs = -1; // all
67}
68
69
70queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
71 combinequery = q.combinequery;
72 collection = q.collection;
73 index = q.index;
74 subcollection = q.subcollection;
75 language = q.language;
76 querystring = q.querystring;
77 search_type = q.search_type;
78 casefolding = q.casefolding;
79 stemming = q.stemming;
80 maxdocs = q.maxdocs;
81
82 return *this;
83}
84
85
86bool operator==(const queryparamclass &x, const queryparamclass &y) {
87 return ((x.combinequery == y.combinequery) &&
88 (x.collection == y.collection) &&
89 (x.index == y.index) &&
90 (x.subcollection == y.subcollection) &&
91 (x.language == y.language) &&
92 (x.querystring == y.querystring) &&
93 (x.search_type == y.search_type) &&
94 (x.casefolding == y.casefolding) &&
95 (x.stemming == y.stemming) &&
96 (x.maxdocs == y.maxdocs));
97}
98
99bool operator!=(const queryparamclass &x, const queryparamclass &y) {
100 return !(x == y);
101}
102
103
104ostream &operator<< (ostream &outs, queryparamclass &q) {
105 outconvertclass text_t2ascii;
106
107 outs << "*** queryparamclass\n";
108 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
109 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
110 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
111 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
112 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
113 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
114 outs << " search_type = \"" << q.search_type << "\"\n";
115 outs << " casefolding = \"" << q.casefolding << "\"\n";
116 outs << " stemming = \"" << q.stemming << "\"\n";
117 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
118 outs << "\n";
119
120 return outs;
121}
122
123
124
125
126// term frequencies
127
128termfreqclass::termfreqclass () {
129 clear();
130}
131
132void termfreqclass::clear() {
133 termstr.clear();
134 termstemstr.clear();
135 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
136 termfreq = 0;
137}
138
139termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
140 termstr = t.termstr;
141 termstemstr = t.termstemstr;
142 utf8equivterms = t.utf8equivterms;
143 termfreq = t.termfreq;
144
145 return *this;
146}
147
148bool operator==(const termfreqclass &x, const termfreqclass &y) {
149 return ((x.termstr == y.termstr) &&
150 (x.termstemstr == y.termstemstr) &&
151 (x.termfreq == y.termfreq));
152}
153
154bool operator!=(const termfreqclass &x, const termfreqclass &y) {
155 return !(x == y);
156}
157
158// ordered by termfreq and then by termstr
159bool operator<(const termfreqclass &x, const termfreqclass &y) {
160 return ((x.termfreq < y.termfreq) ||
161 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
162 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
163}
164
165bool operator>(const termfreqclass &x, const termfreqclass &y) {
166 return ((x.termfreq > y.termfreq) ||
167 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
168 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
169}
170
171// stream output for debugging purposes
172ostream &operator<< (ostream &outs, termfreqclass &t) {
173 outconvertclass text_t2ascii;
174
175 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
176 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
177 outs << " f:" << t.termfreq << "\n";
178
179 return outs;
180}
181
182
183
184// one query result
185
186docresultclass::docresultclass() {
187 clear ();
188}
189
190void docresultclass::clear () {
191 docnum=-1;
192 docweight=0.0;
193 num_query_terms_matched=0;
194 num_phrase_match=0;
195}
196
197// merges two result classes relating to a single docnum
198docresultclass &docresultclass::combine(const docresultclass &d) {
199 docweight += d.docweight; // budget!
200 num_query_terms_matched += d.num_query_terms_matched;
201 num_phrase_match += d.num_phrase_match;
202
203 return *this;
204}
205
206docresultclass &docresultclass::operator=(const docresultclass &d) {
207 docnum = d.docnum;
208 docweight = d.docweight;
209 num_query_terms_matched = d.num_query_terms_matched;
210 num_phrase_match = d.num_phrase_match;
211
212 return *this;
213}
214
215
216// stream output for debugging purposes
217ostream &operator<< (ostream &outs, docresultclass &a) {
218 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
219 return outs;
220}
221
222
223
224// many document results
225
226docresultsclass::docresultsclass () {
227 clear ();
228}
229
230void docresultsclass::clear () {
231 docset.erase(docset.begin(), docset.end());
232 docorder.erase(docorder.begin(), docorder.end());
233}
234
235void docresultsclass::docnum_order() {
236 docorder.erase(docorder.begin(), docorder.end());
237
238 docresultmap::iterator here = docset.begin();
239 docresultmap::iterator end = docset.end();
240 while (here != end) {
241 docorder.push_back ((*here).first);
242 here++;
243 }
244}
245
246void docresultsclass::combine_and (const docresultsclass &d) {
247 docorder.erase(docorder.begin(), docorder.end());
248
249 // put the resulting set in tempresults
250 docresultmap tempresults;
251
252 docresultmap::const_iterator d_here = d.docset.begin();
253 docresultmap::const_iterator d_end = d.docset.end();
254 docresultmap::iterator found = docset.end();
255 while (d_here != d_end) {
256 found = docset.find((*d_here).first);
257 if (found != docset.end()) {
258 (*found).second.combine ((*d_here).second);
259 tempresults[(*found).first] = (*found).second;
260 }
261 d_here++;
262 }
263
264 // then copy it back to docset
265 docset = tempresults;
266}
267
268void docresultsclass::combine_or (const docresultsclass &d) {
269 docorder.erase(docorder.begin(), docorder.end());
270
271 docresultmap::const_iterator d_here = d.docset.begin();
272 docresultmap::const_iterator d_end = d.docset.end();
273 docresultmap::iterator found = docset.end();
274 while (d_here != d_end) {
275 found = docset.find((*d_here).first);
276 if (found != docset.end()) {
277 (*found).second.combine ((*d_here).second);
278 } else {
279 docset[(*d_here).first] = (*d_here).second;
280 }
281 d_here++;
282 }
283}
284
285void docresultsclass::combine_not (const docresultsclass &d) {
286 docorder.erase(docorder.begin(), docorder.end());
287
288 docresultmap::const_iterator d_here = d.docset.begin();
289 docresultmap::const_iterator d_end = d.docset.end();
290 docresultmap::iterator found = docset.end();
291 while (d_here != d_end) {
292 found = docset.find((*d_here).first);
293 if (found != docset.end()) docset.erase (found);
294 d_here++;
295 }
296}
297
298docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
299 docset = d.docset;
300 docorder = d.docorder;
301
302 return *this;
303}
304
305
306
307
308// query results
309
310void queryresultsclass::clear () {
311 docs_matched = 0;
312 is_approx = false;
313
314 postprocessed = false;
315
316 docs.clear();
317 orgterms.erase(orgterms.begin(),orgterms.end());
318 terms.erase(terms.begin(),terms.end());
319}
320
321queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
322 docs_matched = q.docs_matched;
323 is_approx = q.is_approx;
324
325 postprocessed = q.postprocessed;
326
327 docs = q.docs;
328 terms = q.terms;
329 termvariants = q.termvariants;
330
331 return *this;
332}
333
334void queryresultsclass::sortuniqqueryterms() {
335 vector<termfreqclass> tempterms = orgterms;
336 text_tset seenterms;
337 terms.erase(terms.begin(), terms.end());
338
339 // sort the terms to get the frequencies in ascending order
340 sort (tempterms.begin(), tempterms.end());
341
342 // insert first occurance of each term (maximum)
343 vector<termfreqclass>::reverse_iterator here = tempterms.rbegin();
344 vector<termfreqclass>::reverse_iterator end = tempterms.rend();
345 while (here != end) {
346 if (seenterms.find((*here).termstr) == seenterms.end()) {
347 // the termstemstr and utf8equivterms might be different for
348 // different occurances of the term
349 (*here).termstemstr.clear();
350 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
351 (*here).utf8equivterms.end());
352 terms.push_back(*here);
353 seenterms.insert((*here).termstr);
354 }
355 here++;
356 }
357
358 // now re-sort in ascending order
359 sort (terms.begin(), terms.end());
360}
361
362
363// stream output for debugging purposes
364ostream &operator<< (ostream &outs, queryresultsclass &q) {
365 outs << "*** queryresultsclass\n";
366 outs << "docs\n";
367
368 docresultmap::iterator docshere = q.docs.docset.begin();
369 docresultmap::iterator docsend = q.docs.docset.end();
370 while (docshere != docsend) {
371 outs << (*docshere).second;
372 docshere++;
373 }
374
375 outs << "orgterms\n";
376 vector<termfreqclass>::iterator orgtermshere = q.orgterms.begin();
377 vector<termfreqclass>::iterator orgtermsend = q.orgterms.end();
378 while (orgtermshere != orgtermsend) {
379 outs << (*orgtermshere);
380 orgtermshere++;
381 }
382
383 outs << "terms\n";
384 vector<termfreqclass>::iterator termshere = q.terms.begin();
385 vector<termfreqclass>::iterator termsend = q.terms.end();
386 while (termshere != termsend) {
387 outs << (*termshere);
388 termshere++;
389 }
390
391 outs << "\n";
392
393 return outs;
394}
Note: See TracBrowser for help on using the repository browser.