source: main/trunk/greenstone2/runtime-src/src/colservr/queryinfo.cpp@ 28762

Last change on this file since 28762 was 27062, checked in by kjdon, 11 years ago

sortoder added to query params for lucene

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryinfo.h"
27
28
29// query parameters
30
31queryparamclass::queryparamclass () {
32 clear ();
33}
34
35void queryparamclass::clear () {
36 combinequery.clear();
37 collection.clear();
38 index.clear();
39 subcollection.clear();
40 language.clear();
41 level.clear();
42 querystring.clear();
43 search_type = 0; // 0 = boolean, 1 = ranked
44 match_mode = 0; // 0 = some, 1 = all
45 casefolding = 0;
46 stemming = 0;
47 accentfolding = 0;
48 maxdocs = -1; // all
49 maxnumeric = 4; // must default to the same value as mg_passes
50 filterstring.clear();
51 sortfield.clear();
52 fuzziness.clear();
53 sortorder = 0; // 0 = ascending, 1 = descending
54 startresults = 1; // all
55 endresults = 10; // all
56}
57
58
59queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
60 combinequery = q.combinequery;
61 collection = q.collection;
62 index = q.index;
63 subcollection = q.subcollection;
64 language = q.language;
65 level = q.level;
66 querystring = q.querystring;
67 search_type = q.search_type;
68 match_mode = q.match_mode;
69 casefolding = q.casefolding;
70 stemming = q.stemming;
71 accentfolding = q.accentfolding;
72 maxdocs = q.maxdocs;
73 maxnumeric = q.maxnumeric;
74 filterstring = q.filterstring;
75 sortfield = q.sortfield;
76 fuzziness = q.fuzziness;
77 sortorder = q.sortorder;
78 startresults = q.startresults;
79 endresults = q.endresults;
80 return *this;
81}
82
83
84bool operator==(const queryparamclass &x, const queryparamclass &y) {
85 return ((x.combinequery == y.combinequery) &&
86 (x.collection == y.collection) &&
87 (x.index == y.index) &&
88 (x.subcollection == y.subcollection) &&
89 (x.language == y.language) &&
90 (x.level == y.level) &&
91 (x.querystring == y.querystring) &&
92 (x.search_type == y.search_type) &&
93 (x.match_mode == y.match_mode) &&
94 (x.casefolding == y.casefolding) &&
95 (x.stemming == y.stemming) &&
96 (x.accentfolding == y.accentfolding) &&
97 (x.maxdocs == y.maxdocs) &&
98 (x.maxnumeric == y.maxnumeric) &&
99 (x.filterstring == y.filterstring) &&
100 (x.sortfield == y.sortfield) &&
101 (x.fuzziness == y.fuzziness) &&
102 (x.sortorder == y.sortorder) &&
103 (x.startresults == y.startresults) &&
104 (x.startresults == y.startresults));
105}
106
107bool operator!=(const queryparamclass &x, const queryparamclass &y) {
108 return !(x == y);
109}
110
111
112ostream &operator<< (ostream &outs, queryparamclass &q) {
113 outconvertclass text_t2ascii;
114
115 outs << "*** queryparamclass\n";
116 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
117 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
118 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
119 outs << text_t2ascii << " level = \"" << q.level << "\"\n";
120 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
121 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
122 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
123 outs << " search_type = \"" << q.search_type << "\"\n";
124 outs << " match_mode = \"" << q.match_mode << "\"\n";
125 outs << " casefolding = \"" << q.casefolding << "\"\n";
126 outs << " stemming = \"" << q.stemming << "\"\n";
127 outs << " accentfolding = \"" << q.accentfolding << "\"\n";
128 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
129 outs << " maxnumeric = \"" << q.maxnumeric << "\"\n";
130 outs << " filterstring = \"" << q.filterstring << "\"\n";
131 outs << " sortfield = \"" << q.sortfield << "\"\n";
132 outs << " fuzziness = \"" << q.fuzziness << "\"\n";
133 outs << " sortorder = \"" << q.sortorder << "\"\n";
134 outs << " startresults = \"" << q.startresults << "\"\n";
135 outs << " endresults = \"" << q.endresults << "\"\n";
136 outs << "\n";
137
138 return outs;
139}
140
141
142
143
144// term frequencies
145
146termfreqclass::termfreqclass () {
147 clear();
148}
149
150void termfreqclass::clear() {
151 termstr.clear();
152 termstemstr.clear();
153 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
154 termfreq = 0;
155}
156
157termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
158 termstr = t.termstr;
159 termstemstr = t.termstemstr;
160 utf8equivterms = t.utf8equivterms;
161 termfreq = t.termfreq;
162
163 return *this;
164}
165
166bool operator==(const termfreqclass &x, const termfreqclass &y) {
167 return ((x.termstr == y.termstr) &&
168 (x.termstemstr == y.termstemstr) &&
169 (x.termfreq == y.termfreq));
170}
171
172bool operator!=(const termfreqclass &x, const termfreqclass &y) {
173 return !(x == y);
174}
175
176// ordered by termfreq and then by termstr
177bool operator<(const termfreqclass &x, const termfreqclass &y) {
178 return ((x.termfreq < y.termfreq) ||
179 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
180 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
181}
182
183bool operator>(const termfreqclass &x, const termfreqclass &y) {
184 return ((x.termfreq > y.termfreq) ||
185 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
186 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
187}
188
189// stream output for debugging purposes
190ostream &operator<< (ostream &outs, termfreqclass &t) {
191 outconvertclass text_t2ascii;
192
193 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
194 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
195 outs << " f:" << t.termfreq << "\n";
196
197 return outs;
198}
199
200
201
202// one query result
203
204docresultclass::docresultclass() {
205 clear ();
206}
207
208void docresultclass::clear () {
209 docid="";
210 docnum=-1;
211 docweight=0.0;
212 num_query_terms_matched=0;
213 num_phrase_match=0;
214}
215
216// merges two result classes relating to a single docnum
217docresultclass &docresultclass::combine(const docresultclass &d) {
218 docweight += d.docweight; // budget!
219 num_query_terms_matched += d.num_query_terms_matched;
220 num_phrase_match += d.num_phrase_match;
221
222 return *this;
223}
224
225docresultclass &docresultclass::operator=(const docresultclass &d) {
226 docid = d.docid;
227 docnum = d.docnum;
228 docweight = d.docweight;
229 num_query_terms_matched = d.num_query_terms_matched;
230 num_phrase_match = d.num_phrase_match;
231
232 return *this;
233}
234
235
236bool operator==(const docresultclass &x, const docresultclass &y) {
237 return ((x.docid == y.docid) && (x.docnum == y.docnum) && (x.docweight == y.docweight) &&
238 (x.num_query_terms_matched == y.num_query_terms_matched) &&
239 (x.num_phrase_match == y.num_phrase_match));
240}
241
242bool operator<(const docresultclass &x, const docresultclass &y) {
243 return ((x.docid < y.docid) ||
244 ((x.docid == y.docid) &&
245 ((x.docnum < y.docnum) ||
246 ((x.docnum == y.docnum) &&
247 ((x.docweight < y.docweight) ||
248 ((x.docweight == y.docweight) &&
249 ((x.num_query_terms_matched < y.num_query_terms_matched) ||
250 ((x.num_query_terms_matched == y.num_query_terms_matched) &&
251 ((x.num_phrase_match < y.num_phrase_match))))))))));
252}
253
254
255// stream output for debugging purposes
256ostream &operator<< (ostream &outs, docresultclass &a) {
257 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
258 return outs;
259}
260
261
262
263// many document results
264
265docresultsclass::docresultsclass () {
266 clear ();
267}
268
269void docresultsclass::clear () {
270 docset.erase(docset.begin(), docset.end());
271 docorder.erase(docorder.begin(), docorder.end());
272}
273
274void docresultsclass::docnum_order() {
275 docorder.erase(docorder.begin(), docorder.end());
276
277 docresultmap::iterator here = docset.begin();
278 docresultmap::iterator end = docset.end();
279 while (here != end) {
280 docorder.push_back ((*here).first);
281 ++here;
282 }
283}
284
285void docresultsclass::combine_and (const docresultsclass &d) {
286 docorder.erase(docorder.begin(), docorder.end());
287
288 // put the resulting set in tempresults
289 docresultmap tempresults;
290
291 docresultmap::const_iterator d_here = d.docset.begin();
292 docresultmap::const_iterator d_end = d.docset.end();
293 docresultmap::iterator found = docset.end();
294 while (d_here != d_end) {
295 found = docset.find((*d_here).first);
296 if (found != docset.end()) {
297 (*found).second.combine ((*d_here).second);
298 tempresults[(*found).first] = (*found).second;
299 }
300 ++d_here;
301 }
302
303 // then copy it back to docset
304 docset = tempresults;
305}
306
307void docresultsclass::combine_or (const docresultsclass &d) {
308 docorder.erase(docorder.begin(), docorder.end());
309
310 docresultmap::const_iterator d_here = d.docset.begin();
311 docresultmap::const_iterator d_end = d.docset.end();
312 docresultmap::iterator found = docset.end();
313 while (d_here != d_end) {
314 found = docset.find((*d_here).first);
315 if (found != docset.end()) {
316 (*found).second.combine ((*d_here).second);
317 } else {
318 docset[(*d_here).first] = (*d_here).second;
319 }
320 ++d_here;
321 }
322}
323
324void docresultsclass::combine_not (const docresultsclass &d) {
325 docorder.erase(docorder.begin(), docorder.end());
326
327 docresultmap::const_iterator d_here = d.docset.begin();
328 docresultmap::const_iterator d_end = d.docset.end();
329 docresultmap::iterator found = docset.end();
330 while (d_here != d_end) {
331 found = docset.find((*d_here).first);
332 if (found != docset.end()) docset.erase (found);
333 ++d_here;
334 }
335}
336
337docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
338 docset = d.docset;
339 docorder = d.docorder;
340
341 return *this;
342}
343
344
345
346
347// query results
348
349void queryresultsclass::clear () {
350 error_message = g_EmptyText;
351 docs_matched = 0;
352 is_approx = Exact;
353 syntax_error = false;
354 postprocessed = false;
355
356 docs.clear();
357 orgterms.erase(orgterms.begin(),orgterms.end());
358 terms.erase(terms.begin(),terms.end());
359}
360
361queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
362 error_message = q.error_message;
363 docs_matched = q.docs_matched;
364 is_approx = q.is_approx;
365 syntax_error = q.syntax_error;
366 postprocessed = q.postprocessed;
367
368 docs = q.docs;
369 terms = q.terms;
370 termvariants = q.termvariants;
371
372 return *this;
373}
374
375void queryresultsclass::sortuniqqueryterms() {
376 termfreqclassarray tempterms = orgterms;
377 text_tset seenterms;
378 terms.erase(terms.begin(), terms.end());
379
380 // sort the terms to get the frequencies in ascending order
381 sort (tempterms.begin(), tempterms.end());
382
383 // insert first occurance of each term (maximum)
384 termfreqclassarray::reverse_iterator here = tempterms.rbegin();
385 termfreqclassarray::reverse_iterator end = tempterms.rend();
386 while (here != end) {
387 if (seenterms.find((*here).termstr) == seenterms.end()) {
388 // the termstemstr and utf8equivterms might be different for
389 // different occurances of the term
390 (*here).termstemstr.clear();
391 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
392 (*here).utf8equivterms.end());
393 terms.push_back(*here);
394 seenterms.insert((*here).termstr);
395 }
396 ++here;
397 }
398
399 // now re-sort in ascending order
400 sort (terms.begin(), terms.end());
401}
402
403
404// stream output for debugging purposes
405ostream &operator<< (ostream &outs, queryresultsclass &q) {
406 outs << "*** queryresultsclass\n";
407 outs << "docs\n";
408
409 docresultmap::iterator docshere = q.docs.docset.begin();
410 docresultmap::iterator docsend = q.docs.docset.end();
411 while (docshere != docsend) {
412 outs << (*docshere).second;
413 ++docshere;
414 }
415
416 outs << "orgterms\n";
417 termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
418 termfreqclassarray::iterator orgtermsend = q.orgterms.end();
419 while (orgtermshere != orgtermsend) {
420 outs << (*orgtermshere);
421 ++orgtermshere;
422 }
423
424 outs << "terms\n";
425 termfreqclassarray::iterator termshere = q.terms.begin();
426 termfreqclassarray::iterator termsend = q.terms.end();
427 while (termshere != termsend) {
428 outs << (*termshere);
429 ++termshere;
430 }
431
432 outs << "\n";
433
434 return outs;
435}
Note: See TracBrowser for help on using the repository browser.