source: main/trunk/greenstone2/runtime-src/src/colservr/queryinfo.h@ 22452

Last change on this file since 22452 was 16947, checked in by mdewsnip, 16 years ago

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.2 KB
Line 
1/**********************************************************************
2 *
3 * queryinfo.h --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#ifndef QUERYINFO_H
28#define QUERYINFO_H
29
30
31#include "gsdlconf.h"
32#include "text_t.h"
33#include "comtypes.h"
34
35#if defined(GSDL_USE_OBJECTSPACE)
36# include <ospace\std\vector>
37# include <ospace\std\algorithm>
38#elif defined(GSDL_USE_STL_H)
39# include <vector.h>
40# if defined(GSDL_USE_ALGO_H)
41# include <algo.h>
42# else
43# include <algorithm.h>
44# endif
45#else
46# include <vector>
47# include <algorithm>
48#endif
49
50// query parameters
51
52struct queryparamclass {
53 text_t combinequery;
54 text_t collection;
55
56 // search_index = index+subcollection+language
57 text_t index;
58 text_t subcollection;
59 text_t language;
60 text_t level; // for new mg stuff
61 text_t querystring;
62 int search_type; // 0 = boolean, 1 = ranked
63 int match_mode; // 0 = some, 1 = all
64 int casefolding;
65 int stemming;
66 int accentfolding;
67 int maxdocs;
68 int maxnumeric;
69 text_t filterstring; // Filter specified (currently only used by Lucene)
70 text_t sortfield; // Field to use for sorting result set (currently used by lucene)
71 text_t fuzziness; // Search fuzziness amount between 0.0 and 1.0 (only used by Lucene)
72
73 int startresults;
74 int endresults;
75
76 queryparamclass ();
77 void clear ();
78 queryparamclass &operator=(const queryparamclass &q);
79};
80
81bool operator==(const queryparamclass &x, const queryparamclass &y);
82bool operator!=(const queryparamclass &x, const queryparamclass &y);
83
84// stream output for debugging purposes
85ostream &operator<< (ostream &outs, queryparamclass &q);
86
87
88
89// term frequencies
90
91struct termfreqclass {
92 text_t termstr;
93 text_t termstemstr;
94 text_tset utf8equivterms; // kept as utf8 string for fast matching
95 unsigned int termfreq;
96
97 termfreqclass ();
98 void clear();
99 termfreqclass &operator=(const termfreqclass &t);
100};
101
102typedef vector<termfreqclass> termfreqclassarray;
103
104bool operator==(const termfreqclass &x, const termfreqclass &y);
105bool operator!=(const termfreqclass &x, const termfreqclass &y);
106bool operator<(const termfreqclass &x, const termfreqclass &y);
107bool operator>(const termfreqclass &x, const termfreqclass &y);
108
109// stream output for debugging purposes
110ostream &operator<< (ostream &outs, termfreqclass &q);
111
112
113
114// one query result
115
116struct docresultclass {
117 text_t docid; // currently used by Lucene only
118 int docnum;
119 float docweight;
120 unsigned int num_query_terms_matched; // not available on all versions of mg
121 int num_phrase_match; // not available on all versions of mg
122
123 docresultclass();
124 ~docresultclass() {}
125 void clear ();
126
127 // merges two result classes relating to a single docnum
128 docresultclass &combine(const docresultclass &d);
129
130 docresultclass &operator=(const docresultclass &d);
131};
132
133bool operator==(const docresultclass &x, const docresultclass &y);
134bool operator<(const docresultclass &x, const docresultclass &y);
135
136
137// stream output for debugging purposes
138ostream &operator<< (ostream &outs, docresultclass &a);
139
140
141struct ltint {
142 bool operator()(const int &t1, const int &t2) const
143 { return t1 < t2; }
144};
145
146typedef map<text_t, docresultclass, lttext_t> docresultmap;
147
148
149
150// many document results
151
152struct docresultsclass {
153 docresultmap docset;
154 vector<text_t> docorder;
155
156 docresultsclass ();
157 void clear ();
158 void docnum_order();
159
160 void combine_and (const docresultsclass &d);
161 void combine_or (const docresultsclass &d);
162 void combine_not (const docresultsclass &d);
163
164 docresultsclass &operator=(const docresultsclass &d);
165};
166
167
168
169
170// query results
171
172struct queryresultsclass {
173 queryresultsclass () {clear();}
174
175 text_t error_message; // Currently only used by Lucene
176 int docs_matched; // not available on all versions of mg
177 isapprox is_approx;
178 // bool is_approx; // not available on all versions of mg
179 bool syntax_error; // whether the query string was invalid
180 bool postprocessed; // whether this record has been post-processed
181
182 docresultsclass docs;
183 termfreqclassarray orgterms; // terms before they are sorted and uniqued
184 termfreqclassarray terms;
185 text_tset termvariants;
186 text_tset stopwords;
187
188 void clear ();
189 queryresultsclass &operator=(const queryresultsclass &q);
190
191 void sortuniqqueryterms();
192};
193
194// stream output for debugging purposes
195ostream &operator<< (ostream &outs, queryresultsclass &q);
196
197
198#endif
Note: See TracBrowser for help on using the repository browser.