source: trunk/gsdl/src/colservr/queryinfo.cpp@ 4824

Last change on this file since 4824 was 4217, checked in by kjdon, 21 years ago

now we pass on syntax_error from mgpp parsing back to the mgppqueryfilter via queryresultsclass

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryinfo.h"
27
28
29// query parameters
30
31queryparamclass::queryparamclass () {
32 clear ();
33}
34
35void queryparamclass::clear () {
36 combinequery.clear();
37 collection.clear();
38 index.clear();
39 subcollection.clear();
40 language.clear();
41 level.clear();
42 querystring.clear();
43 search_type = 0; // 0 = boolean, 1 = ranked
44 match_mode = 0; // 0 = some, 1 = all
45 casefolding = 0;
46 stemming = 0;
47 maxdocs = -1; // all
48 maxnumeric = 4; // must default to the same value as mg_passes
49}
50
51
52queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
53 combinequery = q.combinequery;
54 collection = q.collection;
55 index = q.index;
56 subcollection = q.subcollection;
57 language = q.language;
58 level = q.level;
59 querystring = q.querystring;
60 search_type = q.search_type;
61 match_mode = q.match_mode;
62 casefolding = q.casefolding;
63 stemming = q.stemming;
64 maxdocs = q.maxdocs;
65 maxnumeric = q.maxnumeric;
66
67 return *this;
68}
69
70
71bool operator==(const queryparamclass &x, const queryparamclass &y) {
72 return ((x.combinequery == y.combinequery) &&
73 (x.collection == y.collection) &&
74 (x.index == y.index) &&
75 (x.subcollection == y.subcollection) &&
76 (x.language == y.language) &&
77 (x.level == y.level) &&
78 (x.querystring == y.querystring) &&
79 (x.search_type == y.search_type) &&
80 (x.match_mode == y.match_mode) &&
81 (x.casefolding == y.casefolding) &&
82 (x.stemming == y.stemming) &&
83 (x.maxdocs == y.maxdocs) &&
84 (x.maxnumeric == y.maxnumeric));
85}
86
87bool operator!=(const queryparamclass &x, const queryparamclass &y) {
88 return !(x == y);
89}
90
91
92ostream &operator<< (ostream &outs, queryparamclass &q) {
93 outconvertclass text_t2ascii;
94
95 outs << "*** queryparamclass\n";
96 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
97 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
98 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
99 outs << text_t2ascii << " level = \"" << q.level << "\"\n";
100 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
101 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
102 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
103 outs << " search_type = \"" << q.search_type << "\"\n";
104 outs << " match_mode = \"" << q.match_mode << "\"\n";
105 outs << " casefolding = \"" << q.casefolding << "\"\n";
106 outs << " stemming = \"" << q.stemming << "\"\n";
107 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
108 outs << " maxnumeric = \"" << q.maxnumeric << "\"\n";
109 outs << "\n";
110
111 return outs;
112}
113
114
115
116
117// term frequencies
118
119termfreqclass::termfreqclass () {
120 clear();
121}
122
123void termfreqclass::clear() {
124 termstr.clear();
125 termstemstr.clear();
126 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
127 termfreq = 0;
128}
129
130termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
131 termstr = t.termstr;
132 termstemstr = t.termstemstr;
133 utf8equivterms = t.utf8equivterms;
134 termfreq = t.termfreq;
135
136 return *this;
137}
138
139bool operator==(const termfreqclass &x, const termfreqclass &y) {
140 return ((x.termstr == y.termstr) &&
141 (x.termstemstr == y.termstemstr) &&
142 (x.termfreq == y.termfreq));
143}
144
145bool operator!=(const termfreqclass &x, const termfreqclass &y) {
146 return !(x == y);
147}
148
149// ordered by termfreq and then by termstr
150bool operator<(const termfreqclass &x, const termfreqclass &y) {
151 return ((x.termfreq < y.termfreq) ||
152 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
153 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
154}
155
156bool operator>(const termfreqclass &x, const termfreqclass &y) {
157 return ((x.termfreq > y.termfreq) ||
158 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
159 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
160}
161
162// stream output for debugging purposes
163ostream &operator<< (ostream &outs, termfreqclass &t) {
164 outconvertclass text_t2ascii;
165
166 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
167 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
168 outs << " f:" << t.termfreq << "\n";
169
170 return outs;
171}
172
173
174
175// one query result
176
177docresultclass::docresultclass() {
178 clear ();
179}
180
181void docresultclass::clear () {
182 docnum=-1;
183 docweight=0.0;
184 num_query_terms_matched=0;
185 num_phrase_match=0;
186}
187
188// merges two result classes relating to a single docnum
189docresultclass &docresultclass::combine(const docresultclass &d) {
190 docweight += d.docweight; // budget!
191 num_query_terms_matched += d.num_query_terms_matched;
192 num_phrase_match += d.num_phrase_match;
193
194 return *this;
195}
196
197docresultclass &docresultclass::operator=(const docresultclass &d) {
198 docnum = d.docnum;
199 docweight = d.docweight;
200 num_query_terms_matched = d.num_query_terms_matched;
201 num_phrase_match = d.num_phrase_match;
202
203 return *this;
204}
205
206
207bool operator==(const docresultclass &x, const docresultclass &y) {
208 return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
209 (x.num_query_terms_matched == y.num_query_terms_matched) &&
210 (x.num_phrase_match == y.num_phrase_match));
211}
212
213bool operator<(const docresultclass &x, const docresultclass &y) {
214 return ((x.docnum < y.docnum) ||
215 ((x.docnum == y.docnum) &&
216 ((x.docweight < y.docweight) ||
217 ((x.docweight == y.docweight) &&
218 ((x.num_query_terms_matched < y.num_query_terms_matched) ||
219 ((x.num_query_terms_matched == y.num_query_terms_matched) &&
220 ((x.num_phrase_match < y.num_phrase_match))))))));
221}
222
223
224// stream output for debugging purposes
225ostream &operator<< (ostream &outs, docresultclass &a) {
226 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
227 return outs;
228}
229
230
231
232// many document results
233
234docresultsclass::docresultsclass () {
235 clear ();
236}
237
238void docresultsclass::clear () {
239 docset.erase(docset.begin(), docset.end());
240 docorder.erase(docorder.begin(), docorder.end());
241}
242
243void docresultsclass::docnum_order() {
244 docorder.erase(docorder.begin(), docorder.end());
245
246 docresultmap::iterator here = docset.begin();
247 docresultmap::iterator end = docset.end();
248 while (here != end) {
249 docorder.push_back ((*here).first);
250 here++;
251 }
252}
253
254void docresultsclass::combine_and (const docresultsclass &d) {
255 docorder.erase(docorder.begin(), docorder.end());
256
257 // put the resulting set in tempresults
258 docresultmap tempresults;
259
260 docresultmap::const_iterator d_here = d.docset.begin();
261 docresultmap::const_iterator d_end = d.docset.end();
262 docresultmap::iterator found = docset.end();
263 while (d_here != d_end) {
264 found = docset.find((*d_here).first);
265 if (found != docset.end()) {
266 (*found).second.combine ((*d_here).second);
267 tempresults[(*found).first] = (*found).second;
268 }
269 d_here++;
270 }
271
272 // then copy it back to docset
273 docset = tempresults;
274}
275
276void docresultsclass::combine_or (const docresultsclass &d) {
277 docorder.erase(docorder.begin(), docorder.end());
278
279 docresultmap::const_iterator d_here = d.docset.begin();
280 docresultmap::const_iterator d_end = d.docset.end();
281 docresultmap::iterator found = docset.end();
282 while (d_here != d_end) {
283 found = docset.find((*d_here).first);
284 if (found != docset.end()) {
285 (*found).second.combine ((*d_here).second);
286 } else {
287 docset[(*d_here).first] = (*d_here).second;
288 }
289 d_here++;
290 }
291}
292
293void docresultsclass::combine_not (const docresultsclass &d) {
294 docorder.erase(docorder.begin(), docorder.end());
295
296 docresultmap::const_iterator d_here = d.docset.begin();
297 docresultmap::const_iterator d_end = d.docset.end();
298 docresultmap::iterator found = docset.end();
299 while (d_here != d_end) {
300 found = docset.find((*d_here).first);
301 if (found != docset.end()) docset.erase (found);
302 d_here++;
303 }
304}
305
306docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
307 docset = d.docset;
308 docorder = d.docorder;
309
310 return *this;
311}
312
313
314
315
316// query results
317
318void queryresultsclass::clear () {
319 docs_matched = 0;
320 is_approx = Exact;
321 syntax_error = false;
322 postprocessed = false;
323
324 docs.clear();
325 orgterms.erase(orgterms.begin(),orgterms.end());
326 terms.erase(terms.begin(),terms.end());
327}
328
329queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
330 docs_matched = q.docs_matched;
331 is_approx = q.is_approx;
332 syntax_error = q.syntax_error;
333 postprocessed = q.postprocessed;
334
335 docs = q.docs;
336 terms = q.terms;
337 termvariants = q.termvariants;
338
339 return *this;
340}
341
342void queryresultsclass::sortuniqqueryterms() {
343 termfreqclassarray tempterms = orgterms;
344 text_tset seenterms;
345 terms.erase(terms.begin(), terms.end());
346
347 // sort the terms to get the frequencies in ascending order
348 sort (tempterms.begin(), tempterms.end());
349
350 // insert first occurance of each term (maximum)
351 termfreqclassarray::reverse_iterator here = tempterms.rbegin();
352 termfreqclassarray::reverse_iterator end = tempterms.rend();
353 while (here != end) {
354 if (seenterms.find((*here).termstr) == seenterms.end()) {
355 // the termstemstr and utf8equivterms might be different for
356 // different occurances of the term
357 (*here).termstemstr.clear();
358 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
359 (*here).utf8equivterms.end());
360 terms.push_back(*here);
361 seenterms.insert((*here).termstr);
362 }
363 here++;
364 }
365
366 // now re-sort in ascending order
367 sort (terms.begin(), terms.end());
368}
369
370
371// stream output for debugging purposes
372ostream &operator<< (ostream &outs, queryresultsclass &q) {
373 outs << "*** queryresultsclass\n";
374 outs << "docs\n";
375
376 docresultmap::iterator docshere = q.docs.docset.begin();
377 docresultmap::iterator docsend = q.docs.docset.end();
378 while (docshere != docsend) {
379 outs << (*docshere).second;
380 docshere++;
381 }
382
383 outs << "orgterms\n";
384 termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
385 termfreqclassarray::iterator orgtermsend = q.orgterms.end();
386 while (orgtermshere != orgtermsend) {
387 outs << (*orgtermshere);
388 orgtermshere++;
389 }
390
391 outs << "terms\n";
392 termfreqclassarray::iterator termshere = q.terms.begin();
393 termfreqclassarray::iterator termsend = q.terms.end();
394 while (termshere != termsend) {
395 outs << (*termshere);
396 termshere++;
397 }
398
399 outs << "\n";
400
401 return outs;
402}
Note: See TracBrowser for help on using the repository browser.