source: trunk/gsdl/src/mgpp/text/Terms.h@ 1847

Last change on this file since 1847 was 1847, checked in by kjm18, 23 years ago

added a QueryResult::printShort routine - outputs termfreq stuff, but not
the doc results (used for timing expts)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Terms.h 1847 2001-01-22 01:47:56Z kjm18 $
21 *
22 **************************************************************************/
23
24#ifndef TERMS_H
25#define TERMS_H
26
27#include "IndexData.h"
28
29#if defined(GSDL_USE_OBJECTSPACE)
30# include <ospace\std\iostream>
31#elif defined(GSDL_USE_IOS_H)
32# include <iostream.h>
33#else
34# include <iostream>
35#endif
36
37
38class QueryInfo {
39public:
40 // general query information
41 UCArray docLevel;
42 unsigned long maxDocs; // 0 = all
43 bool sortByRank;
44 bool exactWeights;
45
46 // information need to return
47 bool needRankInfo;
48 bool needTermFreqs;
49
50 void Clear ();
51 QueryInfo () { Clear (); }
52};
53
54
55class TermFreqData {
56public:
57 UCArray tag; // level tag or query tag
58 UCArray term; // unstemmed term
59 int stemMethod;
60 UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
61 unsigned long matchDocs; // tf for level - num levels
62 // containing this term
63 unsigned long termFreq; // overall term freq - num words that
64 // are this term
65 void Clear ();
66 TermFreqData () { Clear (); }
67};
68
69ostream &operator<< (ostream &s, const TermFreqData &t);
70bool operator== (const TermFreqData &t1, const TermFreqData &t2);
71
72typedef vector<TermFreqData> TermFreqArray;
73
74
75typedef vector<float> RankArray;
76typedef vector<unsigned long> DocNumArray;
77
78class QueryResult {
79public:
80 DocNumArray docs;
81 RankArray ranks; // used for accumulators during query
82
83 TermFreqArray termFreqs;
84
85 unsigned long actualNumDocs;
86 void Clear ();
87 QueryResult ();
88 void printShort(ostream &s);
89};
90
91ostream &operator<< (ostream &s, const QueryResult &r);
92bool operator== (const QueryResult &r1, const QueryResult &r2);
93
94typedef vector<unsigned long> FragNumArray;
95typedef vector<unsigned long> FragFreqArray;
96
97class FragData {
98public:
99 unsigned long matchDocs; // ft for level
100 FragNumArray fragNums;
101 FragFreqArray fragFreqs;
102
103 void Clear ();
104 FragData () { Clear (); }
105};
106
107
108class FragRange {
109public:
110 unsigned long rangeStart;
111 unsigned long rangeEnd;
112
113 void Clear () { rangeStart = rangeEnd = 0; }
114 FragRange () { Clear (); }
115};
116
117typedef vector<FragRange> FragRangeArray;
118
119
120
121void FindWordNumbers (IndexData &indexData,
122 const UCArray &term,
123 unsigned long stemMethod,
124 vector<unsigned long> &equivWords);
125
126void ReadTermFragData (IndexData &indexData,
127 bool needFragFreqs,
128 unsigned long termNum,
129 FragData &fragData,
130 FragRangeArray *fragLimits,
131 UCArray &termWord);
132
133void CombineFragData (bool needFragFreqs,
134 const FragData &f1,
135 const FragData &f2,
136 FragData &outFragData);
137
138// output will be in fragData (as this is an and operation)
139void AndCombineFragData (bool needFragFreqs,
140 FragData &fragData,
141 const FragData &comFragData,
142 signed long startRange,
143 signed long endRange,
144 const FragRangeArray *fragLimits);
145
146void FragsToQueryResult (IndexData &indexData,
147 const QueryInfo &queryInfo,
148 const FragData &termData,
149 const UCArray &tag,
150 const UCArray &term,
151 unsigned long stemMethod,
152 unsigned long termWeight,
153 UCArrayVector &equivTerms,
154 QueryResult &result);
155
156void AndFragsToQueryResult (IndexData &indexData,
157 const QueryInfo &queryInfo,
158 const FragData &termData,
159 const UCArray &tag,
160 const UCArray &term,
161 unsigned long stemMethod,
162 unsigned long termWeight,
163 UCArrayVector &equivTerms,
164 QueryResult &result);
165
166void RemoveUnwantedResults (IndexData &indexData,
167 const QueryInfo &queryInfo,
168 const FragData &termData,
169 QueryResult &result);
170
171//-----------------------------------------------------------------
172// new QueryResult class to handle retrieval of doc and level nums.
173// Use this class with extended version of MGQuery
174
175class ExtQueryResult : public QueryResult {
176public:
177 DocNumArray levels; // used for returning a different granularity, eg
178 // search sections but return Document numbers, or search Documents,
179 // return Section numbers.
180
181 void Clear ();
182 ExtQueryResult ();
183};
184
185ostream &operator<< (ostream &s, const ExtQueryResult &r);
186bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
187
188//------------------------------------------------------------
189// new functions to handle full text browse
190
191class BrowseQueryResult {
192 public:
193 TermFreqArray termFreqs;
194 void Clear();
195 BrowseQueryResult ();
196
197};
198
199
200ostream &operator<< (ostream &s, const BrowseQueryResult &r);
201bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);
202
203void FindNearestWordNumber (IndexData &indexData,
204 const UCArray &term,
205 unsigned long &number);
206
207void GetTermList(IndexData &indexData,
208 unsigned long startTerm,
209 unsigned long numTerms,
210 TermFreqArray &terms);
211
212void GetTermList (IndexData &indexData,
213 unsigned long startTerm,
214 unsigned long numTerms,
215 UCArrayVector &terms);
216
217#endif
218
219
220
221
222
Note: See TracBrowser for help on using the repository browser.