root/gsdl/trunk/runtime-src/src/colservr/queryinfo.h @ 16947

Revision 16947, 5.2 KB (checked in by mdewsnip, 12 years ago)

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * queryinfo.h --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#ifndef QUERYINFO_H
28#define QUERYINFO_H
29
30
31#include "gsdlconf.h"
32#include "text_t.h"
33#include "comtypes.h"
34
35#if defined(GSDL_USE_OBJECTSPACE)
36#  include <ospace\std\vector>
37#  include <ospace\std\algorithm>
38#elif defined(GSDL_USE_STL_H)
39#  include <vector.h>
40#  if defined(GSDL_USE_ALGO_H)
41#    include <algo.h>
42#  else
43#    include <algorithm.h>
44#  endif
45#else
46#  include <vector>
47#  include <algorithm>
48#endif
49
50// query parameters
51
52struct queryparamclass {
53  text_t combinequery;
54  text_t collection;
55
56  // search_index = index+subcollection+language
57  text_t index;
58  text_t subcollection;
59  text_t language;
60  text_t level; // for new mg stuff
61  text_t querystring;
62  int search_type; // 0 = boolean, 1 = ranked
63  int match_mode; // 0 = some, 1 = all
64  int casefolding;
65  int stemming;
66  int accentfolding;
67  int maxdocs;
68  int maxnumeric;
69  text_t filterstring; // Filter specified (currently only used by Lucene)
70  text_t sortfield; // Field to use for sorting result set (currently used by lucene)
71  text_t fuzziness; // Search fuzziness amount between 0.0 and 1.0 (only used by Lucene)
72
73  int startresults;
74  int endresults;
75
76  queryparamclass ();
77  void clear ();
78  queryparamclass &operator=(const queryparamclass &q);
79};
80
81bool operator==(const queryparamclass &x, const queryparamclass &y);
82bool operator!=(const queryparamclass &x, const queryparamclass &y);
83
84// stream output for debugging purposes
85ostream &operator<< (ostream &outs, queryparamclass &q);
86
87
88
89// term frequencies
90
91struct termfreqclass {
92  text_t termstr;
93  text_t termstemstr;
94  text_tset utf8equivterms; // kept as utf8 string for fast matching
95  unsigned int termfreq;
96
97  termfreqclass ();
98  void clear();
99  termfreqclass &operator=(const termfreqclass &t);
100};
101
102typedef vector<termfreqclass> termfreqclassarray;
103
104bool operator==(const termfreqclass &x, const termfreqclass &y);
105bool operator!=(const termfreqclass &x, const termfreqclass &y);
106bool operator<(const termfreqclass &x, const termfreqclass &y);
107bool operator>(const termfreqclass &x, const termfreqclass &y);
108
109// stream output for debugging purposes
110ostream &operator<< (ostream &outs, termfreqclass &q);
111
112
113
114// one query result
115
116struct docresultclass {
117  text_t docid;  // currently used by Lucene only
118  int docnum;
119  float docweight;
120  unsigned int num_query_terms_matched;  // not available on all versions of mg
121  int num_phrase_match;                  // not available on all versions of mg
122
123  docresultclass();
124  ~docresultclass() {}
125  void clear ();
126 
127  // merges two result classes relating to a single docnum
128  docresultclass &combine(const docresultclass &d);
129
130  docresultclass &operator=(const docresultclass &d);
131};
132
133bool operator==(const docresultclass &x, const docresultclass &y);
134bool operator<(const docresultclass &x, const docresultclass &y);
135
136
137// stream output for debugging purposes
138ostream &operator<< (ostream &outs, docresultclass &a);
139
140
141struct ltint {
142  bool operator()(const int &t1, const int &t2) const
143  { return t1 < t2; }
144};
145
146typedef map<text_t, docresultclass, lttext_t> docresultmap;
147
148
149
150// many document results
151
152struct docresultsclass {
153  docresultmap docset;
154  vector<text_t> docorder;
155
156  docresultsclass ();
157  void clear ();
158  void docnum_order();
159
160  void combine_and (const docresultsclass &d);
161  void combine_or (const docresultsclass &d);
162  void combine_not (const docresultsclass &d);
163 
164  docresultsclass &operator=(const docresultsclass &d);
165};
166
167
168
169
170// query results
171
172struct queryresultsclass {
173  queryresultsclass () {clear();}
174
175  text_t error_message;  // Currently only used by Lucene
176  int docs_matched; // not available on all versions of mg
177  isapprox is_approx;
178  //  bool is_approx;   // not available on all versions of mg
179  bool syntax_error; // whether the query string was invalid
180  bool postprocessed; // whether this record has been post-processed
181 
182  docresultsclass docs;
183  termfreqclassarray orgterms; // terms before they are sorted and uniqued
184  termfreqclassarray terms;
185  text_tset termvariants;
186  text_tset stopwords;
187
188  void clear ();
189  queryresultsclass &operator=(const queryresultsclass &q);
190 
191  void sortuniqqueryterms();
192};
193
194// stream output for debugging purposes
195ostream &operator<< (ostream &outs, queryresultsclass &q);
196
197
198#endif
Note: See TracBrowser for help on using the browser.