source: trunk/gsdl/src/colservr/queryinfo.cpp@ 1319

Last change on this file since 1319 was 1319, checked in by kjm18, 24 years ago

added level info to queryparamclass for mgpp collections

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.8 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryinfo.h"
27
28
29// query parameters
30
31queryparamclass::queryparamclass () {
32 clear ();
33}
34
35void queryparamclass::clear () {
36 combinequery.clear();
37 collection.clear();
38 index.clear();
39 subcollection.clear();
40 language.clear();
41 level.clear();
42 querystring.clear();
43 search_type = 0; // 0 = boolean, 1 = ranked
44 match_mode = 0; // 0 = some, 1 = all
45 casefolding = 0;
46 stemming = 0;
47 maxdocs = -1; // all
48}
49
50
51queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
52 combinequery = q.combinequery;
53 collection = q.collection;
54 index = q.index;
55 subcollection = q.subcollection;
56 language = q.language;
57 level = q.level;
58 querystring = q.querystring;
59 search_type = q.search_type;
60 match_mode = q.match_mode;
61 casefolding = q.casefolding;
62 stemming = q.stemming;
63 maxdocs = q.maxdocs;
64
65 return *this;
66}
67
68
69bool operator==(const queryparamclass &x, const queryparamclass &y) {
70 return ((x.combinequery == y.combinequery) &&
71 (x.collection == y.collection) &&
72 (x.index == y.index) &&
73 (x.subcollection == y.subcollection) &&
74 (x.language == y.language) &&
75 (x.level == y.level) &&
76 (x.querystring == y.querystring) &&
77 (x.search_type == y.search_type) &&
78 (x.match_mode == y.match_mode) &&
79 (x.casefolding == y.casefolding) &&
80 (x.stemming == y.stemming) &&
81 (x.maxdocs == y.maxdocs));
82}
83
84bool operator!=(const queryparamclass &x, const queryparamclass &y) {
85 return !(x == y);
86}
87
88
89ostream &operator<< (ostream &outs, queryparamclass &q) {
90 outconvertclass text_t2ascii;
91
92 outs << "*** queryparamclass\n";
93 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
94 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
95 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
96 outs << text_t2ascii << " level = \"" << q.level << "\"\n";
97 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
98 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
99 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
100 outs << " search_type = \"" << q.search_type << "\"\n";
101 outs << " match_mode = \"" << q.match_mode << "\"\n";
102 outs << " casefolding = \"" << q.casefolding << "\"\n";
103 outs << " stemming = \"" << q.stemming << "\"\n";
104 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
105 outs << "\n";
106
107 return outs;
108}
109
110
111
112
113// term frequencies
114
115termfreqclass::termfreqclass () {
116 clear();
117}
118
119void termfreqclass::clear() {
120 termstr.clear();
121 termstemstr.clear();
122 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
123 termfreq = 0;
124}
125
126termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
127 termstr = t.termstr;
128 termstemstr = t.termstemstr;
129 utf8equivterms = t.utf8equivterms;
130 termfreq = t.termfreq;
131
132 return *this;
133}
134
135bool operator==(const termfreqclass &x, const termfreqclass &y) {
136 return ((x.termstr == y.termstr) &&
137 (x.termstemstr == y.termstemstr) &&
138 (x.termfreq == y.termfreq));
139}
140
141bool operator!=(const termfreqclass &x, const termfreqclass &y) {
142 return !(x == y);
143}
144
145// ordered by termfreq and then by termstr
146bool operator<(const termfreqclass &x, const termfreqclass &y) {
147 return ((x.termfreq < y.termfreq) ||
148 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
149 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
150}
151
152bool operator>(const termfreqclass &x, const termfreqclass &y) {
153 return ((x.termfreq > y.termfreq) ||
154 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
155 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
156}
157
158// stream output for debugging purposes
159ostream &operator<< (ostream &outs, termfreqclass &t) {
160 outconvertclass text_t2ascii;
161
162 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
163 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
164 outs << " f:" << t.termfreq << "\n";
165
166 return outs;
167}
168
169
170
171// one query result
172
173docresultclass::docresultclass() {
174 clear ();
175}
176
177void docresultclass::clear () {
178 docnum=-1;
179 docweight=0.0;
180 num_query_terms_matched=0;
181 num_phrase_match=0;
182}
183
184// merges two result classes relating to a single docnum
185docresultclass &docresultclass::combine(const docresultclass &d) {
186 docweight += d.docweight; // budget!
187 num_query_terms_matched += d.num_query_terms_matched;
188 num_phrase_match += d.num_phrase_match;
189
190 return *this;
191}
192
193docresultclass &docresultclass::operator=(const docresultclass &d) {
194 docnum = d.docnum;
195 docweight = d.docweight;
196 num_query_terms_matched = d.num_query_terms_matched;
197 num_phrase_match = d.num_phrase_match;
198
199 return *this;
200}
201
202
203bool operator==(const docresultclass &x, const docresultclass &y) {
204 return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
205 (x.num_query_terms_matched == y.num_query_terms_matched) &&
206 (x.num_phrase_match == y.num_phrase_match));
207}
208
209bool operator<(const docresultclass &x, const docresultclass &y) {
210 return ((x.docnum < y.docnum) ||
211 ((x.docnum == y.docnum) &&
212 ((x.docweight < y.docweight) ||
213 ((x.docweight == y.docweight) &&
214 ((x.num_query_terms_matched < y.num_query_terms_matched) ||
215 ((x.num_query_terms_matched == y.num_query_terms_matched) &&
216 ((x.num_phrase_match < y.num_phrase_match))))))));
217}
218
219
220// stream output for debugging purposes
221ostream &operator<< (ostream &outs, docresultclass &a) {
222 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
223 return outs;
224}
225
226
227
228// many document results
229
230docresultsclass::docresultsclass () {
231 clear ();
232}
233
234void docresultsclass::clear () {
235 docset.erase(docset.begin(), docset.end());
236 docorder.erase(docorder.begin(), docorder.end());
237}
238
239void docresultsclass::docnum_order() {
240 docorder.erase(docorder.begin(), docorder.end());
241
242 docresultmap::iterator here = docset.begin();
243 docresultmap::iterator end = docset.end();
244 while (here != end) {
245 docorder.push_back ((*here).first);
246 here++;
247 }
248}
249
250void docresultsclass::combine_and (const docresultsclass &d) {
251 docorder.erase(docorder.begin(), docorder.end());
252
253 // put the resulting set in tempresults
254 docresultmap tempresults;
255
256 docresultmap::const_iterator d_here = d.docset.begin();
257 docresultmap::const_iterator d_end = d.docset.end();
258 docresultmap::iterator found = docset.end();
259 while (d_here != d_end) {
260 found = docset.find((*d_here).first);
261 if (found != docset.end()) {
262 (*found).second.combine ((*d_here).second);
263 tempresults[(*found).first] = (*found).second;
264 }
265 d_here++;
266 }
267
268 // then copy it back to docset
269 docset = tempresults;
270}
271
272void docresultsclass::combine_or (const docresultsclass &d) {
273 docorder.erase(docorder.begin(), docorder.end());
274
275 docresultmap::const_iterator d_here = d.docset.begin();
276 docresultmap::const_iterator d_end = d.docset.end();
277 docresultmap::iterator found = docset.end();
278 while (d_here != d_end) {
279 found = docset.find((*d_here).first);
280 if (found != docset.end()) {
281 (*found).second.combine ((*d_here).second);
282 } else {
283 docset[(*d_here).first] = (*d_here).second;
284 }
285 d_here++;
286 }
287}
288
289void docresultsclass::combine_not (const docresultsclass &d) {
290 docorder.erase(docorder.begin(), docorder.end());
291
292 docresultmap::const_iterator d_here = d.docset.begin();
293 docresultmap::const_iterator d_end = d.docset.end();
294 docresultmap::iterator found = docset.end();
295 while (d_here != d_end) {
296 found = docset.find((*d_here).first);
297 if (found != docset.end()) docset.erase (found);
298 d_here++;
299 }
300}
301
302docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
303 docset = d.docset;
304 docorder = d.docorder;
305
306 return *this;
307}
308
309
310
311
312// query results
313
314void queryresultsclass::clear () {
315 docs_matched = 0;
316 is_approx = Exact;
317
318 postprocessed = false;
319
320 docs.clear();
321 orgterms.erase(orgterms.begin(),orgterms.end());
322 terms.erase(terms.begin(),terms.end());
323}
324
325queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
326 docs_matched = q.docs_matched;
327 is_approx = q.is_approx;
328
329 postprocessed = q.postprocessed;
330
331 docs = q.docs;
332 terms = q.terms;
333 termvariants = q.termvariants;
334
335 return *this;
336}
337
338void queryresultsclass::sortuniqqueryterms() {
339 termfreqclassarray tempterms = orgterms;
340 text_tset seenterms;
341 terms.erase(terms.begin(), terms.end());
342
343 // sort the terms to get the frequencies in ascending order
344 sort (tempterms.begin(), tempterms.end());
345
346 // insert first occurance of each term (maximum)
347 termfreqclassarray::reverse_iterator here = tempterms.rbegin();
348 termfreqclassarray::reverse_iterator end = tempterms.rend();
349 while (here != end) {
350 if (seenterms.find((*here).termstr) == seenterms.end()) {
351 // the termstemstr and utf8equivterms might be different for
352 // different occurances of the term
353 (*here).termstemstr.clear();
354 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
355 (*here).utf8equivterms.end());
356 terms.push_back(*here);
357 seenterms.insert((*here).termstr);
358 }
359 here++;
360 }
361
362 // now re-sort in ascending order
363 sort (terms.begin(), terms.end());
364}
365
366
367// stream output for debugging purposes
368ostream &operator<< (ostream &outs, queryresultsclass &q) {
369 outs << "*** queryresultsclass\n";
370 outs << "docs\n";
371
372 docresultmap::iterator docshere = q.docs.docset.begin();
373 docresultmap::iterator docsend = q.docs.docset.end();
374 while (docshere != docsend) {
375 outs << (*docshere).second;
376 docshere++;
377 }
378
379 outs << "orgterms\n";
380 termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
381 termfreqclassarray::iterator orgtermsend = q.orgterms.end();
382 while (orgtermshere != orgtermsend) {
383 outs << (*orgtermshere);
384 orgtermshere++;
385 }
386
387 outs << "terms\n";
388 termfreqclassarray::iterator termshere = q.terms.begin();
389 termfreqclassarray::iterator termsend = q.terms.end();
390 while (termshere != termsend) {
391 outs << (*termshere);
392 termshere++;
393 }
394
395 outs << "\n";
396
397 return outs;
398}
Note: See TracBrowser for help on using the repository browser.