source: trunk/gsdl/src/mgpp/text/Terms.h@ 1300

Last change on this file since 1300 was 1300, checked in by kjm18, 24 years ago

added full text browsing functionality

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Terms.h 1300 2000-07-24 02:46:11Z kjm18 $
21 *
22 **************************************************************************/
23
24#ifndef TERMS_H
25#define TERMS_H
26
27#include "IndexData.h"
28
29#if defined(GSDL_USE_OBJECTSPACE)
30# include <ospace\std\iostream>
31#elif defined(GSDL_USE_IOS_H)
32# include <iostream.h>
33#else
34# include <iostream>
35#endif
36
37
38class QueryInfo {
39public:
40 // general query information
41 UCArray docLevel;
42 unsigned long maxDocs; // 0 = all
43 bool sortByRank;
44 bool exactWeights;
45
46 // information need to return
47 bool needRankInfo;
48 bool needTermFreqs;
49
50 void Clear ();
51 QueryInfo () { Clear (); }
52};
53
54
55class TermFreqData {
56public:
57 UCArray tag; // level tag or query tag
58 UCArray term; // unstemmed term
59 int stemMethod;
60 unsigned long matchDocs; // tf for level - num levels
61 // containing this term
62 unsigned long termFreq; // overall term freq - num words that
63 // are this term
64 void Clear ();
65 TermFreqData () { Clear (); }
66};
67
68ostream &operator<< (ostream &s, const TermFreqData &t);
69bool operator== (const TermFreqData &t1, const TermFreqData &t2);
70
71typedef vector<TermFreqData> TermFreqArray;
72
73
74typedef vector<float> RankArray;
75typedef vector<unsigned long> DocNumArray;
76
77class QueryResult {
78public:
79 DocNumArray docs;
80 RankArray ranks; // used for accumulators during query
81
82 TermFreqArray termFreqs;
83
84 void Clear ();
85 QueryResult ();
86};
87
88ostream &operator<< (ostream &s, const QueryResult &r);
89bool operator== (const QueryResult &r1, const QueryResult &r2);
90
91typedef vector<unsigned long> FragNumArray;
92typedef vector<unsigned long> FragFreqArray;
93
94class FragData {
95public:
96 unsigned long matchDocs; // ft for level
97 FragNumArray fragNums;
98 FragFreqArray fragFreqs;
99
100 void Clear ();
101 FragData () { Clear (); }
102};
103
104
105class FragRange {
106public:
107 unsigned long rangeStart;
108 unsigned long rangeEnd;
109
110 void Clear () { rangeStart = rangeEnd = 0; }
111 FragRange () { Clear (); }
112};
113
114typedef vector<FragRange> FragRangeArray;
115
116
117
118void FindWordNumbers (IndexData &indexData,
119 const UCArray &term,
120 unsigned long stemMethod,
121 vector<unsigned long> &equivWords);
122
123void ReadTermFragData (IndexData &indexData,
124 bool needFragFreqs,
125 unsigned long termNum,
126 FragData &fragData,
127 FragRangeArray *fragLimits);
128
129void CombineFragData (bool needFragFreqs,
130 const FragData &f1,
131 const FragData &f2,
132 FragData &outFragData);
133
134// output will be in fragData (as this is an and operation)
135void AndCombineFragData (bool needFragFreqs,
136 FragData &fragData,
137 const FragData &comFragData,
138 signed long startRange,
139 signed long endRange,
140 const FragRangeArray *fragLimits);
141
142void FragsToQueryResult (IndexData &indexData,
143 const QueryInfo &queryInfo,
144 const FragData &termData,
145 const UCArray &tag,
146 const UCArray &term,
147 unsigned long stemMethod,
148 unsigned long termWeight,
149 QueryResult &result);
150
151void AndFragsToQueryResult (IndexData &indexData,
152 const QueryInfo &queryInfo,
153 const FragData &termData,
154 const UCArray &tag,
155 const UCArray &term,
156 unsigned long stemMethod,
157 unsigned long termWeight,
158 QueryResult &result);
159
160void RemoveUnwantedResults (IndexData &indexData,
161 const QueryInfo &queryInfo,
162 const FragData &termData,
163 QueryResult &result);
164
165//-----------------------------------------------------------------
166// new QueryResult class to handle retrieval of doc and level nums.
167// Use this class with extended version of MGQuery
168
169class ExtQueryResult : public QueryResult {
170public:
171 DocNumArray levels; // used for returning a different granularity, eg
172 // search sections but return Document numbers, or search Documents,
173 // return Section numbers.
174
175 void Clear ();
176 ExtQueryResult ();
177};
178
179ostream &operator<< (ostream &s, const ExtQueryResult &r);
180bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
181
182//------------------------------------------------------------
183// new functions to handle full text browse
184
185class BrowseQueryResult {
186 public:
187 TermFreqArray termFreqs;
188 void Clear();
189 BrowseQueryResult ();
190
191};
192
193
194ostream &operator<< (ostream &s, const BrowseQueryResult &r);
195bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);
196
197void FindNearestWordNumber (IndexData &indexData,
198 const UCArray &term,
199 unsigned long &number);
200
201void GetTermList(IndexData &indexData,
202 unsigned long startTerm,
203 unsigned long numTerms,
204 TermFreqArray &terms);
205
206void GetTermList (IndexData &indexData,
207 unsigned long startTerm,
208 unsigned long numTerms,
209 UCArrayVector &terms);
210
211#endif
212
213
214
215
216
Note: See TracBrowser for help on using the repository browser.