root/trunk/gsdl/src/colservr/queryinfo.cpp @ 12868

Revision 12868, 12.1 KB (checked in by kjdon, 14 years ago)

AccentFolding? patch, thanks to Juan Grigera. added accentfolding to queryparamclass

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryinfo.h"
27
28
29// query parameters
30
31queryparamclass::queryparamclass () {
32  clear ();
33}
34
35void queryparamclass::clear () {
36  combinequery.clear();
37  collection.clear();
38  index.clear();
39  subcollection.clear();
40  language.clear();
41  level.clear();
42  querystring.clear();
43  search_type = 0; // 0 = boolean, 1 = ranked
44  match_mode = 0; // 0 = some, 1 = all
45  casefolding = 0;
46  stemming = 0;
47  accentfolding = 0;
48  maxdocs = -1;    // all
49  maxnumeric = 4;  // must default to the same value as mg_passes
50  filterstring.clear();
51  sortfield.clear();
52  fuzziness.clear();
53  startresults = 1; // all
54  endresults = 10; // all
55}
56
57
58queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
59  combinequery = q.combinequery;
60  collection = q.collection;
61  index = q.index;
62  subcollection = q.subcollection;
63  language = q.language;
64  level = q.level;
65  querystring = q.querystring;
66  search_type = q.search_type;
67  match_mode = q.match_mode;
68  casefolding = q.casefolding;
69  stemming = q.stemming;
70  accentfolding = q.accentfolding;
71  maxdocs = q.maxdocs;
72  maxnumeric = q.maxnumeric;
73  filterstring = q.filterstring;
74  sortfield = q.sortfield;
75  fuzziness = q.fuzziness;
76  startresults = q.startresults;
77  endresults = q.endresults;
78  return *this;
79}
80
81
82bool operator==(const queryparamclass &x, const queryparamclass &y) {
83  return ((x.combinequery == y.combinequery) &&
84      (x.collection == y.collection) &&
85      (x.index == y.index) &&
86      (x.subcollection == y.subcollection) &&
87      (x.language == y.language) &&
88      (x.level == y.level) &&
89      (x.querystring == y.querystring) &&
90      (x.search_type == y.search_type) &&
91      (x.match_mode == y.match_mode) &&
92      (x.casefolding == y.casefolding) &&
93      (x.stemming == y.stemming) &&
94      (x.accentfolding == y.accentfolding) &&
95      (x.maxdocs == y.maxdocs) &&
96      (x.maxnumeric == y.maxnumeric) &&
97      (x.filterstring == y.filterstring) &&
98          (x.sortfield == y.sortfield) &&
99          (x.fuzziness == y.fuzziness) &&
100          (x.startresults == y.startresults) &&
101          (x.startresults == y.startresults));
102}
103
104bool operator!=(const queryparamclass &x, const queryparamclass &y) {
105  return !(x == y);
106}
107
108
109ostream &operator<< (ostream &outs, queryparamclass &q) {
110  outconvertclass text_t2ascii;
111
112  outs << "*** queryparamclass\n";
113  outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
114  outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
115  outs << text_t2ascii << " index = \"" << q.index << "\"\n";
116  outs << text_t2ascii << " level = \"" << q.level << "\"\n";
117  outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
118  outs << text_t2ascii << " language = \"" << q.language << "\"\n";
119  outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
120  outs << " search_type = \"" << q.search_type << "\"\n";
121  outs << " match_mode = \"" << q.match_mode << "\"\n";
122  outs << " casefolding = \"" << q.casefolding << "\"\n";
123  outs << " stemming = \"" << q.stemming << "\"\n";
124  outs << " accentfolding = \"" << q.accentfolding << "\"\n";
125  outs << " maxdocs = \"" << q.maxdocs << "\"\n";
126  outs << " maxnumeric = \"" << q.maxnumeric << "\"\n";
127  outs << " filterstring = \"" << q.filterstring << "\"\n";
128  outs << " sortfield = \"" << q.sortfield << "\"\n";
129  outs << " fuzziness = \"" << q.fuzziness << "\"\n";
130  outs << " startresults = \"" << q.startresults << "\"\n";
131  outs << " endresults = \"" << q.endresults << "\"\n";
132  outs << "\n";
133
134  return outs;
135}
136
137
138
139
140// term frequencies
141
142termfreqclass::termfreqclass () {
143  clear();
144}
145
146void termfreqclass::clear() {
147  termstr.clear();
148  termstemstr.clear();
149  utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
150  termfreq = 0;
151}
152
153termfreqclass &termfreqclass::operator=(const termfreqclass &t)  {
154  termstr = t.termstr;
155  termstemstr = t.termstemstr;
156  utf8equivterms = t.utf8equivterms;
157  termfreq = t.termfreq;
158
159  return *this;
160}
161   
162bool operator==(const termfreqclass &x, const termfreqclass &y) {
163  return ((x.termstr == y.termstr) &&
164      (x.termstemstr == y.termstemstr) &&
165      (x.termfreq == y.termfreq));
166}
167
168bool operator!=(const termfreqclass &x, const termfreqclass &y) {
169  return !(x == y);
170}
171
172// ordered by termfreq and then by termstr
173bool operator<(const termfreqclass &x, const termfreqclass &y) {
174  return ((x.termfreq < y.termfreq) ||
175      ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
176      ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
177}
178
179bool operator>(const termfreqclass &x, const termfreqclass &y) {
180  return ((x.termfreq > y.termfreq) ||
181      ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
182      ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
183}
184
185// stream output for debugging purposes
186ostream &operator<< (ostream &outs, termfreqclass &t) {
187  outconvertclass text_t2ascii;
188
189  outs << text_t2ascii << " t:\"" << t.termstr << "\"";
190  outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
191  outs << " f:" << t.termfreq << "\n";
192
193  return outs;
194}
195
196
197
198// one query result
199
200docresultclass::docresultclass() {
201  clear ();
202}
203
204void docresultclass::clear () {
205  docnum=-1;
206  docweight=0.0;
207  num_query_terms_matched=0;
208  num_phrase_match=0;
209}
210
211// merges two result classes relating to a single docnum
212docresultclass &docresultclass::combine(const docresultclass &d) {
213  docweight += d.docweight; // budget!
214  num_query_terms_matched += d.num_query_terms_matched;
215  num_phrase_match += d.num_phrase_match;
216
217  return *this;
218}
219
220docresultclass &docresultclass::operator=(const docresultclass &d) {
221  docnum = d.docnum;
222  docweight = d.docweight;
223  num_query_terms_matched = d.num_query_terms_matched;
224  num_phrase_match = d.num_phrase_match;
225
226  return *this;
227}
228
229
230bool operator==(const docresultclass &x, const docresultclass &y) {
231  return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
232      (x.num_query_terms_matched == y.num_query_terms_matched) &&
233      (x.num_phrase_match == y.num_phrase_match));
234}
235
236bool operator<(const docresultclass &x, const docresultclass &y) {
237  return ((x.docnum < y.docnum) ||
238      ((x.docnum == y.docnum) &&
239       ((x.docweight < y.docweight) ||
240        ((x.docweight == y.docweight) &&
241         ((x.num_query_terms_matched < y.num_query_terms_matched) ||
242          ((x.num_query_terms_matched == y.num_query_terms_matched) &&
243           ((x.num_phrase_match < y.num_phrase_match))))))));
244}
245
246
247// stream output for debugging purposes
248ostream &operator<< (ostream &outs, docresultclass &a) {
249  outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
250  return outs;
251}
252
253
254
255// many document results
256
257docresultsclass::docresultsclass () {
258  clear ();
259}
260
261void docresultsclass::clear () {
262  docset.erase(docset.begin(), docset.end());
263  docorder.erase(docorder.begin(), docorder.end());
264}
265
266void docresultsclass::docnum_order() {
267  docorder.erase(docorder.begin(), docorder.end());
268 
269  docresultmap::iterator here = docset.begin();
270  docresultmap::iterator end = docset.end();
271  while (here != end) {
272    docorder.push_back ((*here).first);
273    ++here;
274  }
275}
276
277void docresultsclass::combine_and (const docresultsclass &d) {
278  docorder.erase(docorder.begin(), docorder.end());
279
280  // put the resulting set in tempresults
281  docresultmap tempresults;
282 
283  docresultmap::const_iterator d_here = d.docset.begin();
284  docresultmap::const_iterator d_end = d.docset.end();
285  docresultmap::iterator found = docset.end();
286  while (d_here != d_end) {
287    found = docset.find((*d_here).first);
288    if (found != docset.end()) {
289      (*found).second.combine ((*d_here).second);
290      tempresults[(*found).first] = (*found).second;
291    }
292    ++d_here;
293  }
294
295  // then copy it back to docset
296  docset = tempresults;
297}
298
299void docresultsclass::combine_or (const docresultsclass &d) {
300  docorder.erase(docorder.begin(), docorder.end());
301   
302  docresultmap::const_iterator d_here = d.docset.begin();
303  docresultmap::const_iterator d_end = d.docset.end();
304  docresultmap::iterator found = docset.end();
305  while (d_here != d_end) {
306    found = docset.find((*d_here).first);
307    if (found != docset.end()) {
308      (*found).second.combine ((*d_here).second);
309    } else {
310      docset[(*d_here).first] = (*d_here).second;
311    }
312    ++d_here;
313  }
314}
315
316void docresultsclass::combine_not (const docresultsclass &d) {
317  docorder.erase(docorder.begin(), docorder.end());
318
319 docresultmap::const_iterator d_here = d.docset.begin();
320 docresultmap::const_iterator d_end = d.docset.end();
321 docresultmap::iterator found = docset.end();
322 while (d_here != d_end) {
323   found = docset.find((*d_here).first);
324   if (found != docset.end()) docset.erase (found);
325   ++d_here;
326 }
327}
328
329docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
330  docset = d.docset;
331  docorder = d.docorder;
332
333  return *this;
334}
335
336
337
338
339// query results
340
341void queryresultsclass::clear () {
342  error_message = g_EmptyText;
343  docs_matched = 0;
344  is_approx = Exact;
345  syntax_error = false;
346  postprocessed = false;
347
348  docs.clear();
349  orgterms.erase(orgterms.begin(),orgterms.end());
350  terms.erase(terms.begin(),terms.end());
351}
352
353queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
354  error_message = q.error_message;
355  docs_matched = q.docs_matched;
356  is_approx = q.is_approx;
357  syntax_error = q.syntax_error;
358  postprocessed = q.postprocessed;
359
360  docs = q.docs;
361  terms = q.terms;
362  termvariants = q.termvariants;
363
364  return *this;
365}
366
367void queryresultsclass::sortuniqqueryterms() {
368  termfreqclassarray tempterms = orgterms;
369  text_tset seenterms;
370  terms.erase(terms.begin(), terms.end());
371
372  // sort the terms to get the frequencies in ascending order
373  sort (tempterms.begin(), tempterms.end());
374
375  // insert first occurance of each term (maximum)
376  termfreqclassarray::reverse_iterator here = tempterms.rbegin();
377  termfreqclassarray::reverse_iterator end = tempterms.rend();
378  while (here != end) {
379    if (seenterms.find((*here).termstr) == seenterms.end()) {
380      // the termstemstr and utf8equivterms might be different for
381      // different occurances of the term
382      (*here).termstemstr.clear();
383      (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
384                   (*here).utf8equivterms.end());
385      terms.push_back(*here);
386      seenterms.insert((*here).termstr);
387    }
388    ++here;
389  }
390
391  // now re-sort in ascending order
392  sort (terms.begin(), terms.end());
393}
394
395
396// stream output for debugging purposes
397ostream &operator<< (ostream &outs, queryresultsclass &q) {
398  outs << "*** queryresultsclass\n";
399  outs << "docs\n";
400
401  docresultmap::iterator docshere = q.docs.docset.begin();
402  docresultmap::iterator docsend = q.docs.docset.end();
403  while (docshere != docsend) {
404    outs << (*docshere).second;
405    ++docshere;
406  }
407
408  outs << "orgterms\n";
409  termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
410  termfreqclassarray::iterator orgtermsend = q.orgterms.end();
411  while (orgtermshere != orgtermsend) {
412    outs << (*orgtermshere);
413    ++orgtermshere;
414  }
415
416  outs << "terms\n";
417  termfreqclassarray::iterator termshere = q.terms.begin();
418  termfreqclassarray::iterator termsend = q.terms.end();
419  while (termshere != termsend) {
420    outs << (*termshere);
421    ++termshere;
422  }
423
424  outs << "\n";
425
426  return outs;
427}
Note: See TracBrowser for help on using the browser.