source: main/trunk/greenstone2/runtime-src/src/colservr/queryinfo.cpp@ 25234

Last change on this file since 25234 was 16947, checked in by mdewsnip, 16 years ago

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.2 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryinfo.h"
27
28
29// query parameters
30
31queryparamclass::queryparamclass () {
32 clear ();
33}
34
35void queryparamclass::clear () {
36 combinequery.clear();
37 collection.clear();
38 index.clear();
39 subcollection.clear();
40 language.clear();
41 level.clear();
42 querystring.clear();
43 search_type = 0; // 0 = boolean, 1 = ranked
44 match_mode = 0; // 0 = some, 1 = all
45 casefolding = 0;
46 stemming = 0;
47 accentfolding = 0;
48 maxdocs = -1; // all
49 maxnumeric = 4; // must default to the same value as mg_passes
50 filterstring.clear();
51 sortfield.clear();
52 fuzziness.clear();
53 startresults = 1; // all
54 endresults = 10; // all
55}
56
57
58queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
59 combinequery = q.combinequery;
60 collection = q.collection;
61 index = q.index;
62 subcollection = q.subcollection;
63 language = q.language;
64 level = q.level;
65 querystring = q.querystring;
66 search_type = q.search_type;
67 match_mode = q.match_mode;
68 casefolding = q.casefolding;
69 stemming = q.stemming;
70 accentfolding = q.accentfolding;
71 maxdocs = q.maxdocs;
72 maxnumeric = q.maxnumeric;
73 filterstring = q.filterstring;
74 sortfield = q.sortfield;
75 fuzziness = q.fuzziness;
76 startresults = q.startresults;
77 endresults = q.endresults;
78 return *this;
79}
80
81
82bool operator==(const queryparamclass &x, const queryparamclass &y) {
83 return ((x.combinequery == y.combinequery) &&
84 (x.collection == y.collection) &&
85 (x.index == y.index) &&
86 (x.subcollection == y.subcollection) &&
87 (x.language == y.language) &&
88 (x.level == y.level) &&
89 (x.querystring == y.querystring) &&
90 (x.search_type == y.search_type) &&
91 (x.match_mode == y.match_mode) &&
92 (x.casefolding == y.casefolding) &&
93 (x.stemming == y.stemming) &&
94 (x.accentfolding == y.accentfolding) &&
95 (x.maxdocs == y.maxdocs) &&
96 (x.maxnumeric == y.maxnumeric) &&
97 (x.filterstring == y.filterstring) &&
98 (x.sortfield == y.sortfield) &&
99 (x.fuzziness == y.fuzziness) &&
100 (x.startresults == y.startresults) &&
101 (x.startresults == y.startresults));
102}
103
104bool operator!=(const queryparamclass &x, const queryparamclass &y) {
105 return !(x == y);
106}
107
108
109ostream &operator<< (ostream &outs, queryparamclass &q) {
110 outconvertclass text_t2ascii;
111
112 outs << "*** queryparamclass\n";
113 outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
114 outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
115 outs << text_t2ascii << " index = \"" << q.index << "\"\n";
116 outs << text_t2ascii << " level = \"" << q.level << "\"\n";
117 outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
118 outs << text_t2ascii << " language = \"" << q.language << "\"\n";
119 outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
120 outs << " search_type = \"" << q.search_type << "\"\n";
121 outs << " match_mode = \"" << q.match_mode << "\"\n";
122 outs << " casefolding = \"" << q.casefolding << "\"\n";
123 outs << " stemming = \"" << q.stemming << "\"\n";
124 outs << " accentfolding = \"" << q.accentfolding << "\"\n";
125 outs << " maxdocs = \"" << q.maxdocs << "\"\n";
126 outs << " maxnumeric = \"" << q.maxnumeric << "\"\n";
127 outs << " filterstring = \"" << q.filterstring << "\"\n";
128 outs << " sortfield = \"" << q.sortfield << "\"\n";
129 outs << " fuzziness = \"" << q.fuzziness << "\"\n";
130 outs << " startresults = \"" << q.startresults << "\"\n";
131 outs << " endresults = \"" << q.endresults << "\"\n";
132 outs << "\n";
133
134 return outs;
135}
136
137
138
139
140// term frequencies
141
142termfreqclass::termfreqclass () {
143 clear();
144}
145
146void termfreqclass::clear() {
147 termstr.clear();
148 termstemstr.clear();
149 utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
150 termfreq = 0;
151}
152
153termfreqclass &termfreqclass::operator=(const termfreqclass &t) {
154 termstr = t.termstr;
155 termstemstr = t.termstemstr;
156 utf8equivterms = t.utf8equivterms;
157 termfreq = t.termfreq;
158
159 return *this;
160}
161
162bool operator==(const termfreqclass &x, const termfreqclass &y) {
163 return ((x.termstr == y.termstr) &&
164 (x.termstemstr == y.termstemstr) &&
165 (x.termfreq == y.termfreq));
166}
167
168bool operator!=(const termfreqclass &x, const termfreqclass &y) {
169 return !(x == y);
170}
171
172// ordered by termfreq and then by termstr
173bool operator<(const termfreqclass &x, const termfreqclass &y) {
174 return ((x.termfreq < y.termfreq) ||
175 ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
176 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
177}
178
179bool operator>(const termfreqclass &x, const termfreqclass &y) {
180 return ((x.termfreq > y.termfreq) ||
181 ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
182 ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
183}
184
185// stream output for debugging purposes
186ostream &operator<< (ostream &outs, termfreqclass &t) {
187 outconvertclass text_t2ascii;
188
189 outs << text_t2ascii << " t:\"" << t.termstr << "\"";
190 outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
191 outs << " f:" << t.termfreq << "\n";
192
193 return outs;
194}
195
196
197
198// one query result
199
200docresultclass::docresultclass() {
201 clear ();
202}
203
204void docresultclass::clear () {
205 docid="";
206 docnum=-1;
207 docweight=0.0;
208 num_query_terms_matched=0;
209 num_phrase_match=0;
210}
211
212// merges two result classes relating to a single docnum
213docresultclass &docresultclass::combine(const docresultclass &d) {
214 docweight += d.docweight; // budget!
215 num_query_terms_matched += d.num_query_terms_matched;
216 num_phrase_match += d.num_phrase_match;
217
218 return *this;
219}
220
221docresultclass &docresultclass::operator=(const docresultclass &d) {
222 docid = d.docid;
223 docnum = d.docnum;
224 docweight = d.docweight;
225 num_query_terms_matched = d.num_query_terms_matched;
226 num_phrase_match = d.num_phrase_match;
227
228 return *this;
229}
230
231
232bool operator==(const docresultclass &x, const docresultclass &y) {
233 return ((x.docid == y.docid) && (x.docnum == y.docnum) && (x.docweight == y.docweight) &&
234 (x.num_query_terms_matched == y.num_query_terms_matched) &&
235 (x.num_phrase_match == y.num_phrase_match));
236}
237
238bool operator<(const docresultclass &x, const docresultclass &y) {
239 return ((x.docid < y.docid) ||
240 ((x.docid == y.docid) &&
241 ((x.docnum < y.docnum) ||
242 ((x.docnum == y.docnum) &&
243 ((x.docweight < y.docweight) ||
244 ((x.docweight == y.docweight) &&
245 ((x.num_query_terms_matched < y.num_query_terms_matched) ||
246 ((x.num_query_terms_matched == y.num_query_terms_matched) &&
247 ((x.num_phrase_match < y.num_phrase_match))))))))));
248}
249
250
251// stream output for debugging purposes
252ostream &operator<< (ostream &outs, docresultclass &a) {
253 outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
254 return outs;
255}
256
257
258
259// many document results
260
261docresultsclass::docresultsclass () {
262 clear ();
263}
264
265void docresultsclass::clear () {
266 docset.erase(docset.begin(), docset.end());
267 docorder.erase(docorder.begin(), docorder.end());
268}
269
270void docresultsclass::docnum_order() {
271 docorder.erase(docorder.begin(), docorder.end());
272
273 docresultmap::iterator here = docset.begin();
274 docresultmap::iterator end = docset.end();
275 while (here != end) {
276 docorder.push_back ((*here).first);
277 ++here;
278 }
279}
280
281void docresultsclass::combine_and (const docresultsclass &d) {
282 docorder.erase(docorder.begin(), docorder.end());
283
284 // put the resulting set in tempresults
285 docresultmap tempresults;
286
287 docresultmap::const_iterator d_here = d.docset.begin();
288 docresultmap::const_iterator d_end = d.docset.end();
289 docresultmap::iterator found = docset.end();
290 while (d_here != d_end) {
291 found = docset.find((*d_here).first);
292 if (found != docset.end()) {
293 (*found).second.combine ((*d_here).second);
294 tempresults[(*found).first] = (*found).second;
295 }
296 ++d_here;
297 }
298
299 // then copy it back to docset
300 docset = tempresults;
301}
302
303void docresultsclass::combine_or (const docresultsclass &d) {
304 docorder.erase(docorder.begin(), docorder.end());
305
306 docresultmap::const_iterator d_here = d.docset.begin();
307 docresultmap::const_iterator d_end = d.docset.end();
308 docresultmap::iterator found = docset.end();
309 while (d_here != d_end) {
310 found = docset.find((*d_here).first);
311 if (found != docset.end()) {
312 (*found).second.combine ((*d_here).second);
313 } else {
314 docset[(*d_here).first] = (*d_here).second;
315 }
316 ++d_here;
317 }
318}
319
320void docresultsclass::combine_not (const docresultsclass &d) {
321 docorder.erase(docorder.begin(), docorder.end());
322
323 docresultmap::const_iterator d_here = d.docset.begin();
324 docresultmap::const_iterator d_end = d.docset.end();
325 docresultmap::iterator found = docset.end();
326 while (d_here != d_end) {
327 found = docset.find((*d_here).first);
328 if (found != docset.end()) docset.erase (found);
329 ++d_here;
330 }
331}
332
333docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
334 docset = d.docset;
335 docorder = d.docorder;
336
337 return *this;
338}
339
340
341
342
343// query results
344
345void queryresultsclass::clear () {
346 error_message = g_EmptyText;
347 docs_matched = 0;
348 is_approx = Exact;
349 syntax_error = false;
350 postprocessed = false;
351
352 docs.clear();
353 orgterms.erase(orgterms.begin(),orgterms.end());
354 terms.erase(terms.begin(),terms.end());
355}
356
357queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
358 error_message = q.error_message;
359 docs_matched = q.docs_matched;
360 is_approx = q.is_approx;
361 syntax_error = q.syntax_error;
362 postprocessed = q.postprocessed;
363
364 docs = q.docs;
365 terms = q.terms;
366 termvariants = q.termvariants;
367
368 return *this;
369}
370
371void queryresultsclass::sortuniqqueryterms() {
372 termfreqclassarray tempterms = orgterms;
373 text_tset seenterms;
374 terms.erase(terms.begin(), terms.end());
375
376 // sort the terms to get the frequencies in ascending order
377 sort (tempterms.begin(), tempterms.end());
378
379 // insert first occurance of each term (maximum)
380 termfreqclassarray::reverse_iterator here = tempterms.rbegin();
381 termfreqclassarray::reverse_iterator end = tempterms.rend();
382 while (here != end) {
383 if (seenterms.find((*here).termstr) == seenterms.end()) {
384 // the termstemstr and utf8equivterms might be different for
385 // different occurances of the term
386 (*here).termstemstr.clear();
387 (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
388 (*here).utf8equivterms.end());
389 terms.push_back(*here);
390 seenterms.insert((*here).termstr);
391 }
392 ++here;
393 }
394
395 // now re-sort in ascending order
396 sort (terms.begin(), terms.end());
397}
398
399
400// stream output for debugging purposes
401ostream &operator<< (ostream &outs, queryresultsclass &q) {
402 outs << "*** queryresultsclass\n";
403 outs << "docs\n";
404
405 docresultmap::iterator docshere = q.docs.docset.begin();
406 docresultmap::iterator docsend = q.docs.docset.end();
407 while (docshere != docsend) {
408 outs << (*docshere).second;
409 ++docshere;
410 }
411
412 outs << "orgterms\n";
413 termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
414 termfreqclassarray::iterator orgtermsend = q.orgterms.end();
415 while (orgtermshere != orgtermsend) {
416 outs << (*orgtermshere);
417 ++orgtermshere;
418 }
419
420 outs << "terms\n";
421 termfreqclassarray::iterator termshere = q.terms.begin();
422 termfreqclassarray::iterator termsend = q.terms.end();
423 while (termshere != termsend) {
424 outs << (*termshere);
425 ++termshere;
426 }
427
428 outs << "\n";
429
430 return outs;
431}
Note: See TracBrowser for help on using the repository browser.