source: trunk/indexers/mgpp/text/Terms.h@ 3365

Last change on this file since 3365 was 3365, checked in by kjdon, 22 years ago

Initial revision

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#ifndef TERMS_H
23#define TERMS_H
24
25#include "IndexData.h"
26
27#if defined(GSDL_USE_OBJECTSPACE)
28# include <ospace\std\iostream>
29#elif defined(GSDL_USE_IOS_H)
30# include <iostream.h>
31#else
32# include <iostream>
33#endif
34
35
36class QueryInfo {
37public:
38 // general query information
39 UCArray docLevel;
40 unsigned long maxDocs; // 0 = all
41 bool sortByRank;
42 bool exactWeights;
43
44 // information need to return
45 bool needRankInfo;
46 bool needTermFreqs;
47
48 void Clear ();
49 QueryInfo () { Clear (); }
50};
51
52
53class TermFreqData {
54public:
55 UCArray tag; // level tag or query tag
56 UCArray term; // unstemmed term
57 int stemMethod;
58 UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
59 unsigned long matchDocs; // tf for level - num levels
60 // containing this term
61 unsigned long termFreq; // overall term freq - num words that
62 // are this term
63 void Clear ();
64 TermFreqData () { Clear (); }
65};
66
67ostream &operator<< (ostream &s, const TermFreqData &t);
68bool operator== (const TermFreqData &t1, const TermFreqData &t2);
69
70typedef vector<TermFreqData> TermFreqArray;
71
72
73typedef vector<float> RankArray;
74typedef vector<unsigned long> DocNumArray;
75
76class QueryResult {
77public:
78 DocNumArray docs;
79 RankArray ranks; // used for accumulators during query
80
81 TermFreqArray termFreqs;
82
83 unsigned long actualNumDocs;
84 void Clear ();
85 QueryResult ();
86 void printShort(ostream &s);
87};
88
89ostream &operator<< (ostream &s, const QueryResult &r);
90bool operator== (const QueryResult &r1, const QueryResult &r2);
91
92typedef vector<unsigned long> FragNumArray;
93typedef vector<unsigned long> FragFreqArray;
94
95class FragData {
96public:
97 unsigned long matchDocs; // ft for level
98 FragNumArray fragNums;
99 FragFreqArray fragFreqs;
100
101 void Clear ();
102 FragData () { Clear (); }
103};
104
105
106class FragRange {
107public:
108 unsigned long rangeStart;
109 unsigned long rangeEnd;
110
111 void Clear () { rangeStart = rangeEnd = 0; }
112 FragRange () { Clear (); }
113};
114
115typedef vector<FragRange> FragRangeArray;
116
117
118
119void FindWordNumbers (IndexData &indexData,
120 const UCArray &term,
121 unsigned long stemMethod,
122 vector<unsigned long> &equivWords);
123
124void ReadTermFragData (IndexData &indexData,
125 bool needFragFreqs,
126 unsigned long termNum,
127 FragData &fragData,
128 FragRangeArray *fragLimits,
129 UCArray &termWord);
130
131void CombineFragData (bool needFragFreqs,
132 const FragData &f1,
133 const FragData &f2,
134 FragData &outFragData);
135
136// output will be in fragData (as this is an and operation)
137void AndCombineFragData (bool needFragFreqs,
138 FragData &fragData,
139 const FragData &comFragData,
140 signed long startRange,
141 signed long endRange,
142 const FragRangeArray *fragLimits);
143
144void FragsToQueryResult (IndexData &indexData,
145 const QueryInfo &queryInfo,
146 const FragData &termData,
147 const UCArray &tag,
148 const UCArray &term,
149 unsigned long stemMethod,
150 unsigned long termWeight,
151 UCArrayVector &equivTerms,
152 QueryResult &result);
153
154void AndFragsToQueryResult (IndexData &indexData,
155 const QueryInfo &queryInfo,
156 const FragData &termData,
157 const UCArray &tag,
158 const UCArray &term,
159 unsigned long stemMethod,
160 unsigned long termWeight,
161 UCArrayVector &equivTerms,
162 QueryResult &result);
163
164void RemoveUnwantedResults (IndexData &indexData,
165 const QueryInfo &queryInfo,
166 const FragData &termData,
167 QueryResult &result);
168
169//-----------------------------------------------------------------
170// new QueryResult class to handle retrieval of doc and level nums.
171// Use this class with extended version of MGQuery
172
173class ExtQueryResult : public QueryResult {
174public:
175 DocNumArray levels; // used for returning a different granularity, eg
176 // search sections but return Document numbers, or search Documents,
177 // return Section numbers.
178
179 void Clear ();
180 ExtQueryResult ();
181};
182
183ostream &operator<< (ostream &s, const ExtQueryResult &r);
184bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
185
186//------------------------------------------------------------
187// new functions to handle full text browse
188
189class BrowseQueryResult {
190 public:
191 TermFreqArray termFreqs;
192 void Clear();
193 BrowseQueryResult ();
194
195};
196
197
198ostream &operator<< (ostream &s, const BrowseQueryResult &r);
199bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);
200
201void FindNearestWordNumber (IndexData &indexData,
202 const UCArray &term,
203 unsigned long &number);
204
205void GetTermList(IndexData &indexData,
206 unsigned long startTerm,
207 unsigned long numTerms,
208 TermFreqArray &terms);
209
210void GetTermList (IndexData &indexData,
211 unsigned long startTerm,
212 unsigned long numTerms,
213 UCArrayVector &terms);
214
215#endif
Note: See TracBrowser for help on using the repository browser.