source: main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mgpp/text/Terms.h@ 23508

Last change on this file since 23508 was 23508, checked in by sjm84, 13 years ago

Committing 64 bit changes into the branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#ifndef TERMS_H
23#define TERMS_H
24
25#include "IndexData.h"
26#include "mglong.h"
27
28#if defined(GSDL_USE_OBJECTSPACE)
29# include <ospace\std\iostream>
30#elif defined(GSDL_USE_IOS_H)
31# include <iostream.h>
32#else
33# include <iostream>
34#endif
35
36
37class QueryInfo {
38public:
39 // general query information
40 UCArray docLevel;
41 mg_u_long maxDocs; // 0 = all
42 bool sortByRank;
43 bool exactWeights;
44
45 // information need to return
46 bool needRankInfo;
47 bool needTermFreqs;
48
49 void Clear ();
50 QueryInfo () { Clear (); }
51};
52
53
54class TermFreqData {
55public:
56 UCArray tag; // level tag or query tag
57 UCArray term; // unstemmed term
58 int stemMethod;
59 UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
60 mg_u_long matchDocs; // tf for level - num levels
61 // containing this term
62 mg_u_long termFreq; // overall term freq - num words that
63 // are this term
64 void Clear ();
65 TermFreqData () { Clear (); }
66};
67
68ostream &operator<< (ostream &s, const TermFreqData &t);
69bool operator== (const TermFreqData &t1, const TermFreqData &t2);
70
71typedef vector<TermFreqData> TermFreqArray;
72
73
74typedef vector<float> RankArray;
75typedef vector<mg_u_long> DocNumArray;
76
77class QueryResult {
78public:
79 DocNumArray docs;
80 RankArray ranks; // used for accumulators during query
81
82 TermFreqArray termFreqs;
83
84 mg_u_long actualNumDocs;
85 void Clear ();
86 QueryResult ();
87 void printShort(ostream &s);
88};
89
90ostream &operator<< (ostream &s, const QueryResult &r);
91bool operator== (const QueryResult &r1, const QueryResult &r2);
92
93typedef vector<mg_u_long> FragNumArray;
94typedef vector<mg_u_long> FragFreqArray;
95
96class FragData {
97public:
98 mg_u_long matchDocs; // ft for level
99 FragNumArray fragNums;
100 FragFreqArray fragFreqs;
101
102 void Clear ();
103 FragData () { Clear (); }
104};
105
106
107class FragRange {
108public:
109 mg_u_long rangeStart;
110 mg_u_long rangeEnd;
111
112 void Clear () { rangeStart = rangeEnd = 0; }
113 FragRange () { Clear (); }
114};
115
116typedef vector<FragRange> FragRangeArray;
117
118
119
120void FindWordNumbers (IndexData &indexData,
121 const UCArray &term,
122 mg_u_long stemMethod,
123 vector<mg_u_long> &equivWords);
124
125void ReadTermFragData (IndexData &indexData,
126 bool needFragFreqs,
127 mg_u_long termNum,
128 FragData &fragData,
129 FragRangeArray *fragLimits,
130 UCArray &termWord);
131
132void CombineFragData (bool needFragFreqs,
133 const FragData &f1,
134 const FragData &f2,
135 FragData &outFragData);
136
137// output will be in fragData (as this is an and operation)
138void AndCombineFragData (bool needFragFreqs,
139 FragData &fragData,
140 const FragData &comFragData,
141 mg_s_long startRange,
142 mg_s_long endRange,
143 const FragRangeArray *fragLimits);
144
145void FragsToQueryResult (IndexData &indexData,
146 const QueryInfo &queryInfo,
147 const FragData &termData,
148 const UCArray &tag,
149 const UCArray &term,
150 mg_u_long stemMethod,
151 mg_u_long termWeight,
152 UCArrayVector &equivTerms,
153 QueryResult &result);
154
155void AndFragsToQueryResult (IndexData &indexData,
156 const QueryInfo &queryInfo,
157 const FragData &termData,
158 const UCArray &tag,
159 const UCArray &term,
160 mg_u_long stemMethod,
161 mg_u_long termWeight,
162 UCArrayVector &equivTerms,
163 QueryResult &result);
164
165void RemoveUnwantedResults (IndexData &indexData,
166 const QueryInfo &queryInfo,
167 const FragData &termData,
168 QueryResult &result);
169
170//-----------------------------------------------------------------
171// new QueryResult class to handle retrieval of doc and level nums.
172// Use this class with extended version of MGQuery
173
174class ExtQueryResult : public QueryResult {
175public:
176 DocNumArray levels; // used for returning a different granularity, eg
177 // search sections but return Document numbers, or search Documents,
178 // return Section numbers.
179
180 void Clear ();
181 ExtQueryResult ();
182};
183
184ostream &operator<< (ostream &s, const ExtQueryResult &r);
185bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
186
187//------------------------------------------------------------
188// new functions to handle full text browse
189
190class BrowseQueryResult {
191 public:
192 TermFreqArray termFreqs;
193 void Clear();
194 BrowseQueryResult ();
195
196};
197
198
199ostream &operator<< (ostream &s, const BrowseQueryResult &r);
200bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2);
201
202void FindNearestWordNumber (IndexData &indexData,
203 const UCArray &term,
204 mg_u_long &number);
205
206void GetTermList(IndexData &indexData,
207 mg_u_long startTerm,
208 mg_u_long numTerms,
209 TermFreqArray &terms);
210
211void GetTermList (IndexData &indexData,
212 mg_u_long startTerm,
213 mg_u_long numTerms,
214 UCArrayVector &terms);
215
216#endif
Note: See TracBrowser for help on using the repository browser.