source: trunk/gsdl/src/mgpp/text/Terms.h@ 1836

Last change on this file since 1836 was 1836, checked in by kjm18, 23 years ago

added support for equiv terms for highlighting. THe QueryResult.TermFreqData
has UCArrayVector equivTerms now.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Terms.h 1836 2001-01-14 23:56:57Z kjm18 $
21 *
22 **************************************************************************/
23
24#ifndef TERMS_H
25#define TERMS_H
26
27#include "IndexData.h"
28
29#if defined(GSDL_USE_OBJECTSPACE)
30# include <ospace\std\iostream>
31#elif defined(GSDL_USE_IOS_H)
32# include <iostream.h>
33#else
34# include <iostream>
35#endif
36
37
38class QueryInfo {
39public:
40 // general query information
41 UCArray docLevel;
42 unsigned long maxDocs; // 0 = all
43 bool sortByRank;
44 bool exactWeights;
45
46 // information need to return
47 bool needRankInfo;
48 bool needTermFreqs;
49
50 void Clear ();
51 QueryInfo () { Clear (); }
52};
53
54
55class TermFreqData {
56public:
57 UCArray tag; // level tag or query tag
58 UCArray term; // unstemmed term
59 int stemMethod;
60 UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
61 unsigned long matchDocs; // tf for level - num levels
62 // containing this term
63 unsigned long termFreq; // overall term freq - num words that
64 // are this term
65 void Clear ();
66 TermFreqData () { Clear (); }
67};
68
69ostream &operator<< (ostream &s, const TermFreqData &t);
70bool operator== (const TermFreqData &t1, const TermFreqData &t2);
71
72typedef vector<TermFreqData> TermFreqArray;
73
74
75typedef vector<float> RankArray;
76typedef vector<unsigned long> DocNumArray;
77
78class QueryResult {
79public:
80 DocNumArray docs;
81 RankArray ranks; // used for accumulators during query
82
83 TermFreqArray termFreqs;
84
85 unsigned long actualNumDocs;
86 void Clear ();
87 QueryResult ();
88};
89
90ostream &operator<< (ostream &s, const QueryResult &r);
91bool operator== (const QueryResult &r1, const QueryResult &r2);
92
93typedef vector<unsigned long> FragNumArray;
94typedef vector<unsigned long> FragFreqArray;
95
96class FragData {
97public:
98 unsigned long matchDocs; // ft for level
99 FragNumArray fragNums;
100 FragFreqArray fragFreqs;
101
102 void Clear ();
103 FragData () { Clear (); }
104};
105
106
107class FragRange {
108public:
109 unsigned long rangeStart;
110 unsigned long rangeEnd;
111
112 void Clear () { rangeStart = rangeEnd = 0; }
113 FragRange () { Clear (); }
114};
115
116typedef vector<FragRange> FragRangeArray;
117
118
119
120void FindWordNumbers (IndexData &indexData,
121 const UCArray &term,
122 unsigned long stemMethod,
123 vector<unsigned long> &equivWords);
124
125void ReadTermFragData (IndexData &indexData,
126 bool needFragFreqs,
127 unsigned long termNum,
128 FragData &fragData,
129 FragRangeArray *fragLimits,
130 UCArray &termWord);
131
132void CombineFragData (bool needFragFreqs,
133 const FragData &f1,
134 const FragData &f2,
135 FragData &outFragData);
136
137// output will be in fragData (as this is an and operation)
138void AndCombineFragData (bool needFragFreqs,
139 FragData &fragData,
140 const FragData &comFragData,
141 signed long startRange,
142 signed long endRange,
143 const FragRangeArray *fragLimits);
144
145void FragsToQueryResult (IndexData &indexData,
146 const QueryInfo &queryInfo,
147 const FragData &termData,
148 const UCArray &tag,
149 const UCArray &term,
150 unsigned long stemMethod,
151 unsigned long termWeight,
152 UCArrayVector &equivTerms,
153 QueryResult &result);
154
155void AndFragsToQueryResult (IndexData &indexData,
156 const QueryInfo &queryInfo,
157 const FragData &termData,
158 const UCArray &tag,
159 const UCArray &term,
160 unsigned long stemMethod,
161 unsigned long termWeight,
162 UCArrayVector &equivTerms,
163 QueryResult &result);
164
165void RemoveUnwantedResults (IndexData &indexData,
166 const QueryInfo &queryInfo,
167 const FragData &termData,
168 QueryResult &result);
169
170//-----------------------------------------------------------------
171// new QueryResult class to handle retrieval of doc and level nums.
172// Use this class with extended version of MGQuery
173
174class ExtQueryResult : public QueryResult {
175public:
176 DocNumArray levels; // used for returning a different granularity, eg
177 // search sections but return Document numbers, or search Documents,
178 // return Section numbers.
179
180 void Clear ();
181 ExtQueryResult ();
182};
183
184ostream &operator<< (ostream &s, const ExtQueryResult &r);
185bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
186
187//------------------------------------------------------------
188// new functions to handle full text browse
189
190class BrowseQueryResult {
191 public:
192 TermFreqArray termFreqs;
193 void Clear();
194 BrowseQueryResult ();
195
196};
197
198
199ostream &operator<< (ostream &s, const BrowseQueryResult &r);
200bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);
201
202void FindNearestWordNumber (IndexData &indexData,
203 const UCArray &term,
204 unsigned long &number);
205
206void GetTermList(IndexData &indexData,
207 unsigned long startTerm,
208 unsigned long numTerms,
209 TermFreqArray &terms);
210
211void GetTermList (IndexData &indexData,
212 unsigned long startTerm,
213 unsigned long numTerms,
214 UCArrayVector &terms);
215
216#endif
217
218
219
220
221
Note: See TracBrowser for help on using the repository browser.