source: gsdl/trunk/runtime-src/src/colservr/queryinfo.h@ 16947

Last change on this file since 16947 was 16947, checked in by mdewsnip, 16 years ago

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.2 KB
RevLine 
[110]1/**********************************************************************
2 *
3 * queryinfo.h --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[534]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[110]9 *
[534]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[110]24 *********************************************************************/
25
26
27#ifndef QUERYINFO_H
28#define QUERYINFO_H
29
30
[114]31#include "gsdlconf.h"
[110]32#include "text_t.h"
[398]33#include "comtypes.h"
[110]34
[114]35#if defined(GSDL_USE_OBJECTSPACE)
36# include <ospace\std\vector>
37# include <ospace\std\algorithm>
38#elif defined(GSDL_USE_STL_H)
39# include <vector.h>
40# if defined(GSDL_USE_ALGO_H)
41# include <algo.h>
42# else
43# include <algorithm.h>
44# endif
45#else
[110]46# include <vector>
47# include <algorithm>
48#endif
49
50// query parameters
51
[351]52struct queryparamclass {
53 text_t combinequery;
[110]54 text_t collection;
[351]55
56 // search_index = index+subcollection+language
57 text_t index;
58 text_t subcollection;
59 text_t language;
[1319]60 text_t level; // for new mg stuff
[110]61 text_t querystring;
62 int search_type; // 0 = boolean, 1 = ranked
[503]63 int match_mode; // 0 = some, 1 = all
[110]64 int casefolding;
65 int stemming;
[12868]66 int accentfolding;
[110]67 int maxdocs;
[4193]68 int maxnumeric;
[12410]69 text_t filterstring; // Filter specified (currently only used by Lucene)
[12276]70 text_t sortfield; // Field to use for sorting result set (currently used by lucene)
[12770]71 text_t fuzziness; // Search fuzziness amount between 0.0 and 1.0 (only used by Lucene)
[110]72
[12655]73 int startresults;
74 int endresults;
75
[351]76 queryparamclass ();
77 void clear ();
[110]78 queryparamclass &operator=(const queryparamclass &q);
79};
80
81bool operator==(const queryparamclass &x, const queryparamclass &y);
82bool operator!=(const queryparamclass &x, const queryparamclass &y);
83
84// stream output for debugging purposes
85ostream &operator<< (ostream &outs, queryparamclass &q);
86
87
88
89// term frequencies
90
[351]91struct termfreqclass {
[110]92 text_t termstr;
[319]93 text_t termstemstr;
[326]94 text_tset utf8equivterms; // kept as utf8 string for fast matching
[110]95 unsigned int termfreq;
[351]96
97 termfreqclass ();
98 void clear();
[110]99 termfreqclass &operator=(const termfreqclass &t);
100};
101
[394]102typedef vector<termfreqclass> termfreqclassarray;
103
[110]104bool operator==(const termfreqclass &x, const termfreqclass &y);
105bool operator!=(const termfreqclass &x, const termfreqclass &y);
106bool operator<(const termfreqclass &x, const termfreqclass &y);
107bool operator>(const termfreqclass &x, const termfreqclass &y);
108
109// stream output for debugging purposes
110ostream &operator<< (ostream &outs, termfreqclass &q);
111
112
113
114// one query result
115
[351]116struct docresultclass {
[16947]117 text_t docid; // currently used by Lucene only
[110]118 int docnum;
119 float docweight;
[326]120 unsigned int num_query_terms_matched; // not available on all versions of mg
[351]121 int num_phrase_match; // not available on all versions of mg
122
123 docresultclass();
[503]124 ~docresultclass() {}
[351]125 void clear ();
126
127 // merges two result classes relating to a single docnum
128 docresultclass &combine(const docresultclass &d);
129
130 docresultclass &operator=(const docresultclass &d);
[110]131};
132
[503]133bool operator==(const docresultclass &x, const docresultclass &y);
134bool operator<(const docresultclass &x, const docresultclass &y);
135
136
[110]137// stream output for debugging purposes
138ostream &operator<< (ostream &outs, docresultclass &a);
139
140
[351]141struct ltint {
142 bool operator()(const int &t1, const int &t2) const
143 { return t1 < t2; }
144};
[110]145
[16445]146typedef map<text_t, docresultclass, lttext_t> docresultmap;
[351]147
148
149
150// many document results
151
152struct docresultsclass {
153 docresultmap docset;
[16445]154 vector<text_t> docorder;
[351]155
156 docresultsclass ();
157 void clear ();
158 void docnum_order();
159
160 void combine_and (const docresultsclass &d);
161 void combine_or (const docresultsclass &d);
162 void combine_not (const docresultsclass &d);
163
164 docresultsclass &operator=(const docresultsclass &d);
165};
166
167
168
169
[110]170// query results
171
[351]172struct queryresultsclass {
[311]173 queryresultsclass () {clear();}
174
[12421]175 text_t error_message; // Currently only used by Lucene
[311]176 int docs_matched; // not available on all versions of mg
[398]177 isapprox is_approx;
178 // bool is_approx; // not available on all versions of mg
[4217]179 bool syntax_error; // whether the query string was invalid
[326]180 bool postprocessed; // whether this record has been post-processed
[311]181
[351]182 docresultsclass docs;
[394]183 termfreqclassarray orgterms; // terms before they are sorted and uniqued
184 termfreqclassarray terms;
[351]185 text_tset termvariants;
[12380]186 text_tset stopwords;
187
[110]188 void clear ();
189 queryresultsclass &operator=(const queryresultsclass &q);
[311]190
[319]191 void sortuniqqueryterms();
[110]192};
193
194// stream output for debugging purposes
195ostream &operator<< (ostream &outs, queryresultsclass &q);
196
197
198#endif
Note: See TracBrowser for help on using the repository browser.