source: trunk/gsdl/src/mgpp/text/Terms.h@ 927

Last change on this file since 927 was 927, checked in by kjm18, 24 years ago

added feature to retrieve doc nums at a different level than the level
queried at. eg query at Document level, but retrieve section level docnums
bug in mg_perf_hash_build.cpp fixed

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**************************************************************************
2 *
3 * Terms.h -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Terms.h 927 2000-02-15 22:45:22Z kjm18 $
21 *
22 **************************************************************************/
23
24#ifndef TERMS_H
25#define TERMS_H
26
27#include "IndexData.h"
28
29#if defined(GSDL_USE_OBJECTSPACE)
30# include <ospace\std\iostream>
31#elif defined(GSDL_USE_IOS_H)
32# include <iostream.h>
33#else
34# include <iostream>
35#endif
36
37
38class QueryInfo {
39public:
40 // general query information
41 UCArray docLevel;
42 unsigned long maxDocs; // 0 = all
43 bool sortByRank;
44 bool exactWeights;
45
46 // information need to return
47 bool needRankInfo;
48 bool needTermFreqs;
49
50 void Clear ();
51 QueryInfo () { Clear (); }
52};
53
54
55class TermFreqData {
56public:
57 UCArray tag; // level tag or query tag
58 UCArray term; // unstemmed term
59 int stemMethod;
60 unsigned long matchDocs; // tf for level
61
62 void Clear ();
63 TermFreqData () { Clear (); }
64};
65
66ostream &operator<< (ostream &s, const TermFreqData &t);
67bool operator== (const TermFreqData &t1, const TermFreqData &t2);
68
69typedef vector<TermFreqData> TermFreqArray;
70
71
72typedef vector<float> RankArray;
73typedef vector<unsigned long> DocNumArray;
74
75class QueryResult {
76public:
77 DocNumArray docs;
78 RankArray ranks; // used for accumulators during query
79
80 TermFreqArray termFreqs;
81
82 void Clear ();
83 QueryResult ();
84};
85
86ostream &operator<< (ostream &s, const QueryResult &r);
87bool operator== (const QueryResult &r1, const QueryResult &r2);
88
89typedef vector<unsigned long> FragNumArray;
90typedef vector<unsigned long> FragFreqArray;
91
92class FragData {
93public:
94 unsigned long matchDocs; // ft for level
95 FragNumArray fragNums;
96 FragFreqArray fragFreqs;
97
98 void Clear ();
99 FragData () { Clear (); }
100};
101
102
103class FragRange {
104public:
105 unsigned long rangeStart;
106 unsigned long rangeEnd;
107
108 void Clear () { rangeStart = rangeEnd = 0; }
109 FragRange () { Clear (); }
110};
111
112typedef vector<FragRange> FragRangeArray;
113
114
115
116void FindWordNumbers (IndexData &indexData,
117 const UCArray &term,
118 unsigned long stemMethod,
119 vector<unsigned long> &equivWords);
120
121void ReadTermFragData (IndexData &indexData,
122 bool needFragFreqs,
123 unsigned long termNum,
124 FragData &fragData,
125 FragRangeArray *fragLimits);
126
127void CombineFragData (bool needFragFreqs,
128 const FragData &f1,
129 const FragData &f2,
130 FragData &outFragData);
131
132// output will be in fragData (as this is an and operation)
133void AndCombineFragData (bool needFragFreqs,
134 FragData &fragData,
135 const FragData &comFragData,
136 signed long startRange,
137 signed long endRange,
138 const FragRangeArray *fragLimits);
139
140void FragsToQueryResult (IndexData &indexData,
141 const QueryInfo &queryInfo,
142 const FragData &termData,
143 const UCArray &tag,
144 const UCArray &term,
145 unsigned long stemMethod,
146 unsigned long termWeight,
147 QueryResult &result);
148
149void AndFragsToQueryResult (IndexData &indexData,
150 const QueryInfo &queryInfo,
151 const FragData &termData,
152 const UCArray &tag,
153 const UCArray &term,
154 unsigned long stemMethod,
155 unsigned long termWeight,
156 QueryResult &result);
157
158void RemoveUnwantedResults (IndexData &indexData,
159 const QueryInfo &queryInfo,
160 const FragData &termData,
161 QueryResult &result);
162
163
164// new QueryResult class to handle retrieval of doc and level nums.
165// Use this class with extended version of MGQuery
166
167class ExtQueryResult : public QueryResult {
168public:
169 DocNumArray levels; // used for returning a different granularity, eg
170 // search sections but return Document numbers, or search Documents,
171 // return Section numbers.
172
173 void Clear ();
174 ExtQueryResult ();
175};
176
177ostream &operator<< (ostream &s, const ExtQueryResult &r);
178bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2);
179
180#endif
181
Note: See TracBrowser for help on using the repository browser.