source: branches/New_Config_Format-branch/gsdl/src/colservr/queryinfo.cpp@ 1279

Last change on this file since 1279 was 534, checked in by sjboddie, 25 years ago

added gpl notice

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.0 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: queryinfo.cpp 534 1999-09-07 04:57:43Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.12 1999/09/07 04:57:24 sjboddie
31 added gpl notice
32
33 Revision 1.11 1999/08/31 22:47:57 rjmcnab
34 Added MatchMode.
35
36 Revision 1.10 1999/07/16 03:42:23 sjboddie
37 changed isApprox
38
39 Revision 1.9 1999/07/16 00:14:01 sjboddie
40 added termfreqclassarray type
41
42 Revision 1.8 1999/07/09 02:19:44 rjmcnab
43 Fixed a couple of compiler conflicts
44
45 Revision 1.7 1999/07/07 06:19:47 rjmcnab
46 Added ability to combine two or more independant queries.
47
48 Revision 1.6 1999/07/01 09:29:21 rjmcnab
49 Changes for better reporting of number documents which match a query. Changes
50 should still work as before with older versions of mg.
51
52 Revision 1.5 1999/07/01 03:56:17 rjmcnab
53 Added a set of utf8 encoded equivalent terms of a query term. I also
54 added a flag for handling post-processing of the query.
55
56 Revision 1.4 1999/06/30 04:04:13 rjmcnab
57 made stemming functions available from mgsearch and made the stems
58 for the query terms available in queryinfo
59
60 Revision 1.3 1999/06/29 22:06:23 rjmcnab
61 Added a couple of fields to queryinfo to handle a special version
62 of mg.
63
64 Revision 1.2 1999/01/12 01:51:02 rjmcnab
65
66 Standard header.
67
68 Revision 1.1 1999/01/08 09:02:18 rjmcnab
69
70 Moved from src/library.
71
72 */
73
74
75#include "queryinfo.h"
76
77
78// query parameters
79
80queryparamclass::queryparamclass () {
81 clear ();
82}
83
84void queryparamclass::clear () {
85 combinequery.clear();
86 collection.clear();
87 index.clear();
88 subcollection.clear();
89 language.clear();
90 querystring.clear();
91 search_type = 0; // 0 = boolean, 1 = ranked
92 match_mode = 0; // 0 = some, 1 = all
93 casefolding = 0;
94 stemming = 0;
95 maxdocs = -1; // all
96}
97
98
99queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
100 combinequery = q.combinequery;
101 collection = q.collection;
102 index = q.index;
103 subcollection = q.subcollection;
104 language = q.language;
105 querystring = q.querystring;
106 search_type = q.search_type;
107 match_mode = q.match_mode;
108 casefolding = q.casefolding;
109 stemming = q.stemming;
110 maxdocs = q.maxdocs;
111
112 return *this;
113}
114
115
116bool operator==(const queryparamclass &x, const queryparamclass &y) {
117 return ((x.combinequery == y.combinequery) &&
118 (x.collection == y.collection) &&
119 (x.index == y.index) &&
120 (x.subcollection == y.subcollection) &&
121 (x.language == y.language) &&
122 (x.querystring == y.querystring) &&
123 (x.search_type == y.search_type) &&
124 (x.match_mode == y.match_mode) &&
125 (x.casefolding == y.casefolding) &&
126 (x.stemming == y.stemming) &&
127 (x.maxdocs == y.maxdocs));
128}
129
130bool operator!=(const queryparamclass &x, const queryparamclass &y) {
131 return !(x == y);
132}
133
134
135ostream &operator<< (ostream &outs, queryparamclass &q) {
136 outconvertclass text_t2ascii;
137
138 outs << "*** queryparamclass\n";
139 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
140 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
141 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
142 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
143 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
144 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
145 outs << " search_type = \"" << q.search_type << "\"\n";
146 outs << " match_mode = \"" << q.match_mode << "\"\n";
147 outs << " casefolding = \"" << q.casefolding << "\"\n";
148 outs << " stemming = \"" << q.stemming << "\"\n";
149 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
150 outs << "\n";
151
152 return outs;
153}
154
155
156
157
158// term frequencies
159
160termfreqclass::termfreqclass () {
161 clear();
162}
163
164void termfreqclass::clear() {
165 termstr.clear();
166 termstemstr.clear();
167 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
168 termfreq = 0;
169}
170
171termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
172 termstr = t.termstr;
173 termstemstr = t.termstemstr;
174 utf8equivterms = t.utf8equivterms;
175 termfreq = t.termfreq;
176
177 return *this;
178}
179
180bool operator==(const termfreqclass &x, const termfreqclass &y) {
181 return ((x.termstr == y.termstr) &&
182 (x.termstemstr == y.termstemstr) &&
183 (x.termfreq == y.termfreq));
184}
185
186bool operator!=(const termfreqclass &x, const termfreqclass &y) {
187 return !(x == y);
188}
189
190// ordered by termfreq and then by termstr
191bool operator<(const termfreqclass &x, const termfreqclass &y) {
192 return ((x.termfreq < y.termfreq) ||
193 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
194 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
195}
196
197bool operator>(const termfreqclass &x, const termfreqclass &y) {
198 return ((x.termfreq > y.termfreq) ||
199 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
200 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
201}
202
203// stream output for debugging purposes
204ostream &operator<< (ostream &outs, termfreqclass &t) {
205 outconvertclass text_t2ascii;
206
207 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
208 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
209 outs << " f:" << t.termfreq << "\n";
210
211 return outs;
212}
213
214
215
216// one query result
217
218docresultclass::docresultclass() {
219 clear ();
220}
221
222void docresultclass::clear () {
223 docnum=-1;
224 docweight=0.0;
225 num_query_terms_matched=0;
226 num_phrase_match=0;
227}
228
229// merges two result classes relating to a single docnum
230docresultclass &docresultclass::combine(const docresultclass &d) {
231 docweight += d.docweight; // budget!
232 num_query_terms_matched += d.num_query_terms_matched;
233 num_phrase_match += d.num_phrase_match;
234
235 return *this;
236}
237
238docresultclass &docresultclass::operator=(const docresultclass &d) {
239 docnum = d.docnum;
240 docweight = d.docweight;
241 num_query_terms_matched = d.num_query_terms_matched;
242 num_phrase_match = d.num_phrase_match;
243
244 return *this;
245}
246
247
248bool operator==(const docresultclass &x, const docresultclass &y) {
249 return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
250 (x.num_query_terms_matched == y.num_query_terms_matched) &&
251 (x.num_phrase_match == y.num_phrase_match));
252}
253
254bool operator<(const docresultclass &x, const docresultclass &y) {
255 return ((x.docnum < y.docnum) ||
256 ((x.docnum == y.docnum) &&
257 ((x.docweight < y.docweight) ||
258 ((x.docweight == y.docweight) &&
259 ((x.num_query_terms_matched < y.num_query_terms_matched) ||
260 ((x.num_query_terms_matched == y.num_query_terms_matched) &&
261 ((x.num_phrase_match < y.num_phrase_match))))))));
262}
263
264
265// stream output for debugging purposes
266ostream &operator<< (ostream &outs, docresultclass &a) {
267 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
268 return outs;
269}
270
271
272
273// many document results
274
275docresultsclass::docresultsclass () {
276 clear ();
277}
278
279void docresultsclass::clear () {
280 docset.erase(docset.begin(), docset.end());
281 docorder.erase(docorder.begin(), docorder.end());
282}
283
284void docresultsclass::docnum_order() {
285 docorder.erase(docorder.begin(), docorder.end());
286
287 docresultmap::iterator here = docset.begin();
288 docresultmap::iterator end = docset.end();
289 while (here != end) {
290 docorder.push_back ((*here).first);
291 here++;
292 }
293}
294
295void docresultsclass::combine_and (const docresultsclass &d) {
296 docorder.erase(docorder.begin(), docorder.end());
297
298 // put the resulting set in tempresults
299 docresultmap tempresults;
300
301 docresultmap::const_iterator d_here = d.docset.begin();
302 docresultmap::const_iterator d_end = d.docset.end();
303 docresultmap::iterator found = docset.end();
304 while (d_here != d_end) {
305 found = docset.find((*d_here).first);
306 if (found != docset.end()) {
307 (*found).second.combine ((*d_here).second);
308 tempresults[(*found).first] = (*found).second;
309 }
310 d_here++;
311 }
312
313 // then copy it back to docset
314 docset = tempresults;
315}
316
317void docresultsclass::combine_or (const docresultsclass &d) {
318 docorder.erase(docorder.begin(), docorder.end());
319
320 docresultmap::const_iterator d_here = d.docset.begin();
321 docresultmap::const_iterator d_end = d.docset.end();
322 docresultmap::iterator found = docset.end();
323 while (d_here != d_end) {
324 found = docset.find((*d_here).first);
325 if (found != docset.end()) {
326 (*found).second.combine ((*d_here).second);
327 } else {
328 docset[(*d_here).first] = (*d_here).second;
329 }
330 d_here++;
331 }
332}
333
334void docresultsclass::combine_not (const docresultsclass &d) {
335 docorder.erase(docorder.begin(), docorder.end());
336
337 docresultmap::const_iterator d_here = d.docset.begin();
338 docresultmap::const_iterator d_end = d.docset.end();
339 docresultmap::iterator found = docset.end();
340 while (d_here != d_end) {
341 found = docset.find((*d_here).first);
342 if (found != docset.end()) docset.erase (found);
343 d_here++;
344 }
345}
346
347docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
348 docset = d.docset;
349 docorder = d.docorder;
350
351 return *this;
352}
353
354
355
356
357// query results
358
359void queryresultsclass::clear () {
360 docs_matched = 0;
361 is_approx = Exact;
362
363 postprocessed = false;
364
365 docs.clear();
366 orgterms.erase(orgterms.begin(),orgterms.end());
367 terms.erase(terms.begin(),terms.end());
368}
369
370queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
371 docs_matched = q.docs_matched;
372 is_approx = q.is_approx;
373
374 postprocessed = q.postprocessed;
375
376 docs = q.docs;
377 terms = q.terms;
378 termvariants = q.termvariants;
379
380 return *this;
381}
382
383void queryresultsclass::sortuniqqueryterms() {
384 termfreqclassarray tempterms = orgterms;
385 text_tset seenterms;
386 terms.erase(terms.begin(), terms.end());
387
388 // sort the terms to get the frequencies in ascending order
389 sort (tempterms.begin(), tempterms.end());
390
391 // insert first occurance of each term (maximum)
392 termfreqclassarray::reverse_iterator here = tempterms.rbegin();
393 termfreqclassarray::reverse_iterator end = tempterms.rend();
394 while (here != end) {
395 if (seenterms.find((*here).termstr) == seenterms.end()) {
396 // the termstemstr and utf8equivterms might be different for
397 // different occurances of the term
398 (*here).termstemstr.clear();
399 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
400 (*here).utf8equivterms.end());
401 terms.push_back(*here);
402 seenterms.insert((*here).termstr);
403 }
404 here++;
405 }
406
407 // now re-sort in ascending order
408 sort (terms.begin(), terms.end());
409}
410
411
412// stream output for debugging purposes
413ostream &operator<< (ostream &outs, queryresultsclass &q) {
414 outs << "*** queryresultsclass\n";
415 outs << "docs\n";
416
417 docresultmap::iterator docshere = q.docs.docset.begin();
418 docresultmap::iterator docsend = q.docs.docset.end();
419 while (docshere != docsend) {
420 outs << (*docshere).second;
421 docshere++;
422 }
423
424 outs << "orgterms\n";
425 termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
426 termfreqclassarray::iterator orgtermsend = q.orgterms.end();
427 while (orgtermshere != orgtermsend) {
428 outs << (*orgtermshere);
429 orgtermshere++;
430 }
431
432 outs << "terms\n";
433 termfreqclassarray::iterator termshere = q.terms.begin();
434 termfreqclassarray::iterator termsend = q.terms.end();
435 while (termshere != termsend) {
436 outs << (*termshere);
437 termshere++;
438 }
439
440 outs << "\n";
441
442 return outs;
443}
Note: See TracBrowser for help on using the repository browser.