1 | /**************************************************************************
|
---|
2 | *
|
---|
3 | * MGQuery.cpp -- Query related functions
|
---|
4 | * Copyright (C) 1999 Rodger McNab
|
---|
5 | *
|
---|
6 | * This program is free software; you can redistribute it and/or modify
|
---|
7 | * it under the terms of the GNU General Public License as published by
|
---|
8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
9 | * (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This program is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | * GNU General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU General Public License
|
---|
17 | * along with this program; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
19 | *
|
---|
20 | **************************************************************************/
|
---|
21 |
|
---|
22 | #include "MGQuery.h"
|
---|
23 | #include "bitio_m_stdio.h"
|
---|
24 | #include "bitio_gen.h"
|
---|
25 | #include "Terms.h"
|
---|
26 | #include "QueryResultsSort.h"
|
---|
27 | #include <assert.h>
|
---|
28 |
|
---|
29 |
|
---|
30 | void PrintIndent (ostream &s, int indent) {
|
---|
31 | while (indent-- > 0) s << " ";
|
---|
32 | }
|
---|
33 |
|
---|
34 |
|
---|
35 | void PrintIndentText (ostream &s, char *text, int indent) {
|
---|
36 | PrintIndent (s, indent);
|
---|
37 | s << text;
|
---|
38 | }
|
---|
39 |
|
---|
40 | void PrintNode (ostream &s, QueryNode *node, int indent) {
|
---|
41 | if (node == NULL) {
|
---|
42 | PrintIndentText (s, (char*)"NULL\n", indent);
|
---|
43 | } else {
|
---|
44 | node->Print (s, indent+2);
|
---|
45 | }
|
---|
46 | }
|
---|
47 |
|
---|
48 |
|
---|
49 |
|
---|
50 | QueryNode::QueryNode () {
|
---|
51 | }
|
---|
52 |
|
---|
53 | QueryNode::~QueryNode () {
|
---|
54 | }
|
---|
55 |
|
---|
56 | void QueryNode::Calculate (IndexData &/*indexData*/,
|
---|
57 | const QueryInfo &/*queryInfo*/,
|
---|
58 | QueryResult &result) const {
|
---|
59 | result.Clear();
|
---|
60 | }
|
---|
61 |
|
---|
62 | void QueryNode::Free () {
|
---|
63 | }
|
---|
64 |
|
---|
65 | void QueryNode::Print (ostream &/*s*/, int /*indent*/) const {
|
---|
66 | }
|
---|
67 |
|
---|
68 |
|
---|
69 |
|
---|
70 | AndQueryNode::AndQueryNode () {
|
---|
71 | leftNode = NULL;
|
---|
72 | rightNode = NULL;
|
---|
73 | }
|
---|
74 |
|
---|
75 | AndQueryNode::~AndQueryNode () {
|
---|
76 | Free ();
|
---|
77 | }
|
---|
78 |
|
---|
79 | void AndQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
80 | QueryResult &result) const {
|
---|
81 | result.Clear();
|
---|
82 |
|
---|
83 | // an And between nothing and something is nothing...
|
---|
84 | if (leftNode == NULL || rightNode == NULL) return;
|
---|
85 |
|
---|
86 | // calculate the result from the left tree and the result
|
---|
87 | // from the right tree
|
---|
88 | QueryResult rightResult;
|
---|
89 | leftNode->Calculate (indexData, queryInfo, result);
|
---|
90 | rightNode->Calculate (indexData, queryInfo, rightResult);
|
---|
91 |
|
---|
92 | // merge the results, this can be done in place as the results
|
---|
93 | // will always shrink with an And
|
---|
94 |
|
---|
95 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
96 | if (haveAccum && (result.ranks.size() != result.docs.size() ||
|
---|
97 | rightResult.ranks.size() != rightResult.docs.size())) {
|
---|
98 | // shouldn't ever get here
|
---|
99 | haveAccum = false;
|
---|
100 | assert (0);
|
---|
101 | }
|
---|
102 |
|
---|
103 | // combine document numbers and corresponding ranks
|
---|
104 | mg_u_long leftI = 0;
|
---|
105 | mg_u_long rightI = 0;
|
---|
106 | mg_u_long outI = 0;
|
---|
107 | while (leftI < result.docs.size() &&
|
---|
108 | rightI < rightResult.docs.size()) {
|
---|
109 | if (result.docs[leftI] < rightResult.docs[rightI]) {
|
---|
110 | ++leftI;
|
---|
111 | } else if (result.docs[leftI] > rightResult.docs[rightI]) {
|
---|
112 | ++rightI;
|
---|
113 | } else {
|
---|
114 | // the documents are equal
|
---|
115 | result.docs[outI] = result.docs[leftI];
|
---|
116 | if (haveAccum)
|
---|
117 | result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
|
---|
118 | ++leftI;
|
---|
119 | ++rightI;
|
---|
120 | ++outI;
|
---|
121 | }
|
---|
122 | }
|
---|
123 |
|
---|
124 | // erase unused document numbers and ranks
|
---|
125 | result.docs.erase(result.docs.begin()+outI, result.docs.end());
|
---|
126 | if (haveAccum)
|
---|
127 | result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
|
---|
128 |
|
---|
129 | // combine term frequency information
|
---|
130 | if (queryInfo.needTermFreqs)
|
---|
131 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
132 | rightResult.termFreqs.begin(),
|
---|
133 | rightResult.termFreqs.end());
|
---|
134 | }
|
---|
135 |
|
---|
136 | void AndQueryNode::Free () {
|
---|
137 | if (leftNode != NULL) {
|
---|
138 | delete leftNode;
|
---|
139 | leftNode = NULL;
|
---|
140 | }
|
---|
141 | if (rightNode != NULL) {
|
---|
142 | delete rightNode;
|
---|
143 | rightNode = NULL;
|
---|
144 | }
|
---|
145 | }
|
---|
146 |
|
---|
147 | void AndQueryNode::Print (ostream &s, int indent) const {
|
---|
148 | PrintIndentText (s, (char*)"leftNode:\n", indent);
|
---|
149 | PrintNode (s, leftNode, indent+2);
|
---|
150 | PrintIndentText (s, (char*)"AND\n", indent);
|
---|
151 | PrintIndentText (s, (char*)"rightNode:\n", indent);
|
---|
152 | PrintNode (s, rightNode, indent+2);
|
---|
153 | }
|
---|
154 |
|
---|
155 |
|
---|
156 | OrQueryNode::OrQueryNode () {
|
---|
157 | leftNode = NULL;
|
---|
158 | rightNode = NULL;
|
---|
159 | }
|
---|
160 |
|
---|
161 | OrQueryNode::~OrQueryNode () {
|
---|
162 | Free ();
|
---|
163 | }
|
---|
164 |
|
---|
165 | void OrQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
166 | QueryResult &result) const {
|
---|
167 | result.Clear();
|
---|
168 |
|
---|
169 | // calculate the result from the left tree and the result
|
---|
170 | // from the right tree
|
---|
171 | QueryResult leftResult;
|
---|
172 | QueryResult rightResult;
|
---|
173 | if (leftNode != NULL)
|
---|
174 | leftNode->Calculate (indexData, queryInfo, leftResult);
|
---|
175 | if (rightNode != NULL)
|
---|
176 | rightNode->Calculate (indexData, queryInfo, rightResult);
|
---|
177 |
|
---|
178 | // merge the results
|
---|
179 |
|
---|
180 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
181 | if (haveAccum && (leftResult.ranks.size() != leftResult.docs.size() ||
|
---|
182 | rightResult.ranks.size() != rightResult.docs.size())) {
|
---|
183 | // shouldn't ever get here
|
---|
184 | haveAccum = false;
|
---|
185 | assert (0);
|
---|
186 | }
|
---|
187 |
|
---|
188 | // combine document numbers and corresponding ranks
|
---|
189 | mg_u_long leftSize = leftResult.docs.size();
|
---|
190 | mg_u_long rightSize = rightResult.docs.size();
|
---|
191 | mg_u_long leftI = 0;
|
---|
192 | mg_u_long rightI = 0;
|
---|
193 | mg_u_long leftDocNum = 0;
|
---|
194 | mg_u_long rightDocNum = 0;
|
---|
195 | while (leftI < leftSize || rightI < rightSize) {
|
---|
196 | // check leftI
|
---|
197 | if (leftI < leftResult.docs.size())
|
---|
198 | leftDocNum = leftResult.docs[leftI];
|
---|
199 | else leftDocNum = (mg_u_long)ULONG_MAX;
|
---|
200 |
|
---|
201 | // check rightI
|
---|
202 | if (rightI < rightResult.docs.size())
|
---|
203 | rightDocNum = rightResult.docs[rightI];
|
---|
204 | else rightDocNum = (mg_u_long)ULONG_MAX;
|
---|
205 |
|
---|
206 | // combine
|
---|
207 | if (leftDocNum < rightDocNum) {
|
---|
208 | result.docs.push_back (leftDocNum);
|
---|
209 | if (haveAccum)
|
---|
210 | result.ranks.push_back (leftResult.ranks[leftI]);
|
---|
211 | ++leftI;
|
---|
212 |
|
---|
213 | } else if (leftDocNum > rightDocNum) {
|
---|
214 | result.docs.push_back (rightDocNum);
|
---|
215 | if (haveAccum)
|
---|
216 | result.ranks.push_back (rightResult.ranks[rightI]);
|
---|
217 | ++rightI;
|
---|
218 |
|
---|
219 | } else { // equal
|
---|
220 | result.docs.push_back (leftDocNum);
|
---|
221 | if (haveAccum)
|
---|
222 | result.ranks.push_back (leftResult.ranks[leftI] +
|
---|
223 | rightResult.ranks[rightI]);
|
---|
224 | ++leftI;
|
---|
225 | ++rightI;
|
---|
226 | }
|
---|
227 | }
|
---|
228 |
|
---|
229 | // combine term frequency information
|
---|
230 | if (queryInfo.needTermFreqs) {
|
---|
231 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
232 | leftResult.termFreqs.begin(),
|
---|
233 | leftResult.termFreqs.end());
|
---|
234 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
235 | rightResult.termFreqs.begin(),
|
---|
236 | rightResult.termFreqs.end());
|
---|
237 | }
|
---|
238 | }
|
---|
239 |
|
---|
240 | void OrQueryNode::Free () {
|
---|
241 | if (leftNode != NULL) {
|
---|
242 | delete leftNode;
|
---|
243 | leftNode = NULL;
|
---|
244 | }
|
---|
245 | if (rightNode != NULL) {
|
---|
246 | delete rightNode;
|
---|
247 | rightNode = NULL;
|
---|
248 | }
|
---|
249 | }
|
---|
250 |
|
---|
251 | void OrQueryNode::Print (ostream &s, int indent) const {
|
---|
252 | PrintIndentText (s, (char*)"leftNode:\n", indent);
|
---|
253 | PrintNode (s, leftNode, indent+2);
|
---|
254 | PrintIndentText (s, (char*)"OR\n", indent);
|
---|
255 | PrintIndentText (s, (char*)"rightNode:\n", indent);
|
---|
256 | PrintNode (s, rightNode, indent+2);
|
---|
257 | }
|
---|
258 |
|
---|
259 |
|
---|
260 |
|
---|
261 | NotQueryNode::NotQueryNode () {
|
---|
262 | queryNode = NULL;
|
---|
263 | notNode = NULL;
|
---|
264 | }
|
---|
265 |
|
---|
266 | NotQueryNode::~NotQueryNode () {
|
---|
267 | Free ();
|
---|
268 | }
|
---|
269 |
|
---|
270 | void NotQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
271 | QueryResult &result) const {
|
---|
272 | result.Clear();
|
---|
273 |
|
---|
274 | // check for nothing
|
---|
275 | if (queryNode == NULL) return;
|
---|
276 | if (notNode == NULL) {
|
---|
277 | queryNode->Calculate (indexData, queryInfo, result);
|
---|
278 | return;
|
---|
279 | }
|
---|
280 |
|
---|
281 | // calculate the result from the query tree and the result
|
---|
282 | // from the not tree
|
---|
283 | QueryResult notResult;
|
---|
284 | queryNode->Calculate (indexData, queryInfo, result);
|
---|
285 | notNode->Calculate (indexData, queryInfo, notResult);
|
---|
286 |
|
---|
287 | // merge the results, this can be done in place as the results
|
---|
288 | // will always shrink with a Not
|
---|
289 |
|
---|
290 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
291 | if (haveAccum && (result.ranks.size() != result.docs.size() ||
|
---|
292 | notResult.ranks.size() != notResult.docs.size())) {
|
---|
293 | // shouldn't ever get here
|
---|
294 | haveAccum = false;
|
---|
295 | assert (0);
|
---|
296 | }
|
---|
297 |
|
---|
298 | // combine document numbers and corresponding ranks
|
---|
299 | mg_u_long queryI = 0;
|
---|
300 | mg_u_long notI = 0;
|
---|
301 | mg_u_long outI = 0;
|
---|
302 | while (queryI < result.docs.size() &&
|
---|
303 | notI < notResult.docs.size()) {
|
---|
304 | if (result.docs[queryI] < notResult.docs[notI]) {
|
---|
305 | // found a document not in the notResults
|
---|
306 | result.docs[outI] = result.docs[queryI];
|
---|
307 | if (haveAccum)
|
---|
308 | result.ranks[outI] = result.ranks[queryI];
|
---|
309 | ++queryI;
|
---|
310 | ++outI;
|
---|
311 | } else if (result.docs[queryI] > notResult.docs[notI]) {
|
---|
312 | ++notI;
|
---|
313 | } else {
|
---|
314 | // the documents are equal, ignore both
|
---|
315 | ++queryI;
|
---|
316 | ++notI;
|
---|
317 | }
|
---|
318 | }
|
---|
319 |
|
---|
320 | // erase unused document numbers and ranks
|
---|
321 | result.docs.erase(result.docs.begin()+outI, result.docs.end());
|
---|
322 | if (haveAccum)
|
---|
323 | result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
|
---|
324 |
|
---|
325 | // combine term frequency information
|
---|
326 | if (queryInfo.needTermFreqs)
|
---|
327 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
328 | notResult.termFreqs.begin(),
|
---|
329 | notResult.termFreqs.end());
|
---|
330 | }
|
---|
331 |
|
---|
332 | void NotQueryNode::Free () {
|
---|
333 | if (queryNode != NULL) {
|
---|
334 | delete queryNode;
|
---|
335 | queryNode = NULL;
|
---|
336 | }
|
---|
337 | if (notNode != NULL) {
|
---|
338 | delete notNode;
|
---|
339 | notNode = NULL;
|
---|
340 | }
|
---|
341 | }
|
---|
342 |
|
---|
343 | void NotQueryNode::Print (ostream &s, int indent) const {
|
---|
344 | PrintIndentText (s, (char*)"queryNode:\n", indent);
|
---|
345 | PrintNode (s, queryNode, indent+2);
|
---|
346 | PrintIndentText (s, (char*)"NOT\n", indent);
|
---|
347 | PrintIndentText (s, (char*)"notNode:\n", indent);
|
---|
348 | PrintNode (s, notNode, indent+2);
|
---|
349 | }
|
---|
350 |
|
---|
351 |
|
---|
352 | void TagNode::Clear () {
|
---|
353 | UCArrayClear (tagName);
|
---|
354 | }
|
---|
355 |
|
---|
356 | void TagNode::Calculate (IndexData &indexData,
|
---|
357 | FragRangeArray &fragRange) const {
|
---|
358 | fragRange.erase (fragRange.begin(), fragRange.end());
|
---|
359 | if (tagName.empty()) return;
|
---|
360 |
|
---|
361 | // get information about this tag
|
---|
362 | block_dict_el tagEl;
|
---|
363 | mg_u_long tagElNum;
|
---|
364 | if (!SearchBlockDictEl (indexData.dictFile, indexData.biTags,
|
---|
365 | indexData.bdh.entries_per_tblk,
|
---|
366 | indexData.bdh.tag_dict_size,
|
---|
367 | tagName, tagEl, tagElNum))
|
---|
368 | return;
|
---|
369 |
|
---|
370 | // seek to the appropriate place in the inverted file
|
---|
371 | fseek (indexData.invfFile, tagEl.invf_ptr, SEEK_SET);
|
---|
372 |
|
---|
373 | stdio_bitio_buffer buffer(indexData.invfFile);
|
---|
374 |
|
---|
375 | mg_u_long pTag = tagEl.frag_occur*2;
|
---|
376 | mg_u_long B = BIO_Bblock_Init (indexData.bdh.num_frags+pTag, pTag);
|
---|
377 | mg_u_long fragNum = 0;
|
---|
378 | mg_u_long i;
|
---|
379 | FragRange thisFrag;
|
---|
380 | for (i=0; i<tagEl.frag_occur; ++i) {
|
---|
381 | // get start
|
---|
382 | mg_u_long delta = buffer.bblock_decode (B, NULL)-1;
|
---|
383 | fragNum += delta;
|
---|
384 |
|
---|
385 | thisFrag.rangeStart = fragNum;
|
---|
386 |
|
---|
387 | // get end
|
---|
388 | delta = buffer.bblock_decode (B, NULL)-1;
|
---|
389 | fragNum += delta;
|
---|
390 |
|
---|
391 | thisFrag.rangeEnd = fragNum;
|
---|
392 | fragRange.push_back (thisFrag);
|
---|
393 | }
|
---|
394 |
|
---|
395 | buffer.done();
|
---|
396 | }
|
---|
397 |
|
---|
398 | void TagNode::Free () {
|
---|
399 | Clear ();
|
---|
400 | }
|
---|
401 |
|
---|
402 | void TagNode::Print (ostream &s, int indent) const {
|
---|
403 | PrintIndent (s, indent);
|
---|
404 | s << "TAG: \"" << tagName << "\"\n";
|
---|
405 | }
|
---|
406 |
|
---|
407 |
|
---|
408 | void TermNode::Clear () {
|
---|
409 | UCArrayClear (term);
|
---|
410 | termWeight = 1;
|
---|
411 | stemMethod = 0;
|
---|
412 | startRange = NO_TERM_RANGE_START;
|
---|
413 | endRange = NO_TERM_RANGE_END;
|
---|
414 | }
|
---|
415 |
|
---|
416 | TermNode::TermNode () {
|
---|
417 | Clear ();
|
---|
418 | }
|
---|
419 |
|
---|
420 | void TermNode::Calculate (IndexData &indexData,
|
---|
421 | bool needFragFreqs,
|
---|
422 | FragRangeArray *fragLimits,
|
---|
423 | FragData &fragData,
|
---|
424 | UCArrayVector &equivTerms) const {
|
---|
425 | fragData.Clear ();
|
---|
426 | equivTerms.erase(equivTerms.begin(), equivTerms.end());
|
---|
427 |
|
---|
428 | // get a list of term numbers
|
---|
429 | vector<mg_u_long> equivNums;
|
---|
430 | FindWordNumbers (indexData, term, stemMethod, equivNums);
|
---|
431 |
|
---|
432 | // get the information for each word and merge it with
|
---|
433 | // previous results
|
---|
434 | FragData tempFragData1;
|
---|
435 | FragData tempFragData2;
|
---|
436 | UCArray equivWord;
|
---|
437 | vector<mg_u_long>::iterator here = equivNums.begin();
|
---|
438 | vector<mg_u_long>::iterator end = equivNums.end();
|
---|
439 | while (here != end) {
|
---|
440 | // get the information for this word
|
---|
441 | ReadTermFragData (indexData, needFragFreqs, *here,
|
---|
442 | tempFragData1, fragLimits, equivWord);
|
---|
443 | equivTerms.push_back(equivWord);
|
---|
444 | // combine with last results
|
---|
445 | tempFragData2 = fragData;
|
---|
446 | CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
|
---|
447 |
|
---|
448 | ++here;
|
---|
449 | }
|
---|
450 | }
|
---|
451 |
|
---|
452 | void TermNode::Free () {
|
---|
453 | Clear ();
|
---|
454 | }
|
---|
455 |
|
---|
456 | void TermNode::Print (ostream &s, int indent) const {
|
---|
457 | PrintIndent (s, indent);
|
---|
458 | s << "TERM: \"" << term << "\"\n";
|
---|
459 | PrintIndent (s, indent+2);
|
---|
460 | s << "termWeight: " << termWeight << "\n";
|
---|
461 | PrintIndent (s, indent+2);
|
---|
462 | s << "stemMethod: " << stemMethod << "\n";
|
---|
463 | PrintIndent (s, indent+2);
|
---|
464 | s << "startRange: " << startRange << "\n";
|
---|
465 | PrintIndent (s, indent+2);
|
---|
466 | s << "endRange: " << endRange << "\n";
|
---|
467 | }
|
---|
468 |
|
---|
469 |
|
---|
470 | ProxMatchQueryNode::ProxMatchQueryNode () {
|
---|
471 | tagNodePtr = NULL;
|
---|
472 | }
|
---|
473 |
|
---|
474 | ProxMatchQueryNode::~ProxMatchQueryNode () {
|
---|
475 | Free ();
|
---|
476 | }
|
---|
477 |
|
---|
478 | void ProxMatchQueryNode::Calculate (IndexData &indexData,
|
---|
479 | const QueryInfo &queryInfo,
|
---|
480 | QueryResult &result) const {
|
---|
481 | result.Clear ();
|
---|
482 |
|
---|
483 | bool needFragFreqs = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
484 |
|
---|
485 | // read in the tag if needed
|
---|
486 | FragRangeArray fragLimits;
|
---|
487 | FragRangeArray *fragLimitsPtr = NULL;
|
---|
488 | if (tagNodePtr == NULL && terms.size() > 1) {
|
---|
489 | // multiple terms must be compared relative to some tag
|
---|
490 | // otherwise phrase matches could span documents
|
---|
491 | TagNode tempTagNode;
|
---|
492 | tempTagNode.tagName = indexData.curLevel;
|
---|
493 | tempTagNode.Calculate (indexData, fragLimits);
|
---|
494 | fragLimitsPtr = &fragLimits;
|
---|
495 |
|
---|
496 | } else if (tagNodePtr != NULL) {
|
---|
497 | (*tagNodePtr).Calculate (indexData, fragLimits);
|
---|
498 | fragLimitsPtr = &fragLimits;
|
---|
499 | }
|
---|
500 |
|
---|
501 | UCArray tagOrLevel = indexData.curLevel;
|
---|
502 | if (tagNodePtr != NULL) tagOrLevel = (*tagNodePtr).tagName;
|
---|
503 |
|
---|
504 | // read in the first term
|
---|
505 | FragData termData;
|
---|
506 | UCArrayVector equivTerms;
|
---|
507 | TermNodeArray::const_iterator termHere=terms.begin(), termEnd = terms.end();
|
---|
508 | if (termHere != termEnd) {
|
---|
509 | (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr,
|
---|
510 | termData, equivTerms);
|
---|
511 |
|
---|
512 | // convert initial fragment information
|
---|
513 | FragsToQueryResult (indexData,
|
---|
514 | queryInfo,
|
---|
515 | termData,
|
---|
516 | tagOrLevel,
|
---|
517 | (*termHere).term,
|
---|
518 | (*termHere).stemMethod,
|
---|
519 | (*termHere).termWeight,
|
---|
520 | equivTerms,
|
---|
521 | result);
|
---|
522 |
|
---|
523 | ++termHere;
|
---|
524 |
|
---|
525 | if (termHere == termEnd) return; // nothing more to do
|
---|
526 | }
|
---|
527 |
|
---|
528 | // read and combine the rest of the terms
|
---|
529 | FragData comTermData;
|
---|
530 | while (termHere != termEnd) {
|
---|
531 | (*termHere).Calculate (indexData, needFragFreqs,
|
---|
532 | fragLimitsPtr, comTermData, equivTerms);
|
---|
533 |
|
---|
534 | AndFragsToQueryResult (indexData,
|
---|
535 | queryInfo,
|
---|
536 | comTermData,
|
---|
537 | tagOrLevel,
|
---|
538 | (*termHere).term,
|
---|
539 | (*termHere).stemMethod,
|
---|
540 | (*termHere).termWeight,
|
---|
541 | equivTerms,
|
---|
542 | result);
|
---|
543 |
|
---|
544 | AndCombineFragData (needFragFreqs, termData, comTermData,
|
---|
545 | (*termHere).startRange,
|
---|
546 | (*termHere).endRange,
|
---|
547 | fragLimitsPtr);
|
---|
548 | ++termHere;
|
---|
549 | }
|
---|
550 |
|
---|
551 | // remove unwanted document numbers
|
---|
552 | RemoveUnwantedResults (indexData,
|
---|
553 | queryInfo,
|
---|
554 | termData,
|
---|
555 | result);
|
---|
556 | }
|
---|
557 |
|
---|
558 | void ProxMatchQueryNode::Free () {
|
---|
559 | if (tagNodePtr != NULL) {
|
---|
560 | delete tagNodePtr;
|
---|
561 | tagNodePtr = NULL;
|
---|
562 | }
|
---|
563 | terms.erase (terms.begin(), terms.end());
|
---|
564 | }
|
---|
565 |
|
---|
566 | void ProxMatchQueryNode::Print (ostream &s, int indent) const {
|
---|
567 | PrintIndentText (s, (char*)"PROXMATCH\n", indent);
|
---|
568 | if (tagNodePtr != NULL) tagNodePtr->Print (s, indent+2);
|
---|
569 |
|
---|
570 | TermNodeArray::const_iterator here = terms.begin();
|
---|
571 | TermNodeArray::const_iterator end = terms.end();
|
---|
572 | while (here != end) {
|
---|
573 | (*here).Print (s, indent+2);
|
---|
574 | ++here;
|
---|
575 | }
|
---|
576 | }
|
---|
577 |
|
---|
578 |
|
---|
579 | void BrowseQueryNode::Clear () {
|
---|
580 | UCArrayClear(term);
|
---|
581 | }
|
---|
582 |
|
---|
583 | void BrowseQueryNode::Calculate (IndexData &indexData, BrowseQueryResult &result) const {
|
---|
584 |
|
---|
585 | mg_u_long number=0;
|
---|
586 | FindNearestWordNumber(indexData, term, number);
|
---|
587 | if (number + startPosition > 0 ) {
|
---|
588 | number = number+startPosition;
|
---|
589 | }
|
---|
590 | else {
|
---|
591 | number = 1;
|
---|
592 | }
|
---|
593 |
|
---|
594 | GetTermList (indexData, number, numTerms, result.termFreqs);
|
---|
595 |
|
---|
596 | }
|
---|
597 |
|
---|
598 |
|
---|
599 |
|
---|
600 | void BrowseQueryNode::Free () {
|
---|
601 | Clear();
|
---|
602 | }
|
---|
603 |
|
---|
604 |
|
---|
605 | void BrowseQueryNode::Print (ostream &s, int indent) const {
|
---|
606 | PrintIndentText(s, (char*)"BROWSEQUERYNODE\n", indent);
|
---|
607 | PrintIndent (s, indent+2);
|
---|
608 | s << "TERM:"<<term<<"\n";
|
---|
609 | PrintIndent (s, indent+2);
|
---|
610 | s << "Start position: "<< startPosition<<", Num terms: "<< numTerms<<"\n";
|
---|
611 |
|
---|
612 |
|
---|
613 | }
|
---|
614 |
|
---|
615 |
|
---|
616 |
|
---|
617 | void MGQuery (IndexData &indexData,
|
---|
618 | const QueryInfo &queryInfo,
|
---|
619 | const QueryNode *queryTree,
|
---|
620 | QueryResult &result) {
|
---|
621 | result.Clear ();
|
---|
622 |
|
---|
623 | // make sure level is current
|
---|
624 | if (!indexData.LoadLevel (queryInfo.docLevel)) {
|
---|
625 | return;
|
---|
626 | }
|
---|
627 | // do query
|
---|
628 | if (queryTree == NULL) return;
|
---|
629 |
|
---|
630 | (*queryTree).Calculate (indexData, queryInfo, result);
|
---|
631 |
|
---|
632 | // make weights into ranks if needed
|
---|
633 | mg_u_long i;
|
---|
634 | if (queryInfo.sortByRank || queryInfo.needRankInfo) {
|
---|
635 | for (i=0; i<result.ranks.size(); ++i) {
|
---|
636 | result.ranks[i] /=
|
---|
637 | indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
|
---|
638 | }
|
---|
639 | }
|
---|
640 |
|
---|
641 | mg_u_long resultsSize = queryInfo.maxDocs;
|
---|
642 | if (resultsSize == 0 || resultsSize > result.docs.size())
|
---|
643 | resultsSize = result.docs.size();
|
---|
644 |
|
---|
645 | result.actualNumDocs = result.docs.size(); // the total number of docs
|
---|
646 | //returned, before pruning based on maxDocs
|
---|
647 |
|
---|
648 | // sort results by rank if needed
|
---|
649 | GTRank gtRank;
|
---|
650 | if (queryInfo.sortByRank) {
|
---|
651 | // need in ascending order for SelectAddHeap
|
---|
652 | MakeHeap (result.docs.begin(), result.ranks.begin(),
|
---|
653 | resultsSize, gtRank);
|
---|
654 | SelectAddHeap (result.docs.begin(), result.ranks.begin(), resultsSize,
|
---|
655 | result.docs.begin()+resultsSize,
|
---|
656 | result.ranks.begin()+resultsSize,
|
---|
657 | (mg_u_long) result.ranks.size()-resultsSize,
|
---|
658 | gtRank);
|
---|
659 |
|
---|
660 | // sort into descending order
|
---|
661 | SortHeap (result.docs.begin(), result.ranks.begin(), resultsSize, gtRank);
|
---|
662 | }
|
---|
663 |
|
---|
664 | // get exact weights if needed
|
---|
665 | if (queryInfo.exactWeights &&
|
---|
666 | (queryInfo.sortByRank || queryInfo.needRankInfo)) {
|
---|
667 | mg_u_long exactDiskPtr =
|
---|
668 | indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
|
---|
669 |
|
---|
670 | for (i=0; i<resultsSize; ++i) {
|
---|
671 | result.ranks[i] = result.ranks[i] *
|
---|
672 | indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
|
---|
673 | GetExactDocWeight (indexData.exactWeightsFile, exactDiskPtr,
|
---|
674 | result.docs[i]);
|
---|
675 | }
|
---|
676 |
|
---|
677 | // re-sort top candidates based on exact weights
|
---|
678 | if (queryInfo.sortByRank) {
|
---|
679 | MakeHeap (result.docs.begin(), result.ranks.begin(),
|
---|
680 | resultsSize, gtRank);
|
---|
681 | SortHeap (result.docs.begin(), result.ranks.begin(),
|
---|
682 | resultsSize, gtRank);
|
---|
683 | }
|
---|
684 | }
|
---|
685 |
|
---|
686 | // get rid of unwanted ranking results
|
---|
687 | if (result.ranks.empty()) {
|
---|
688 | // do nothing
|
---|
689 | } else if (!queryInfo.needRankInfo) {
|
---|
690 | result.ranks.erase(result.ranks.begin(), result.ranks.end());
|
---|
691 | } else {
|
---|
692 | result.ranks.erase(result.ranks.begin()+resultsSize, result.ranks.end());
|
---|
693 | }
|
---|
694 |
|
---|
695 | // remove extra docs that are unwanted
|
---|
696 | result.docs.erase(result.docs.begin()+resultsSize, result.docs.end());
|
---|
697 | }
|
---|
698 |
|
---|
699 | // new MGQuery to retrieve doc and section nums
|
---|
700 | // this will return doc nums for the level queried at (set in queryInfo)
|
---|
701 | // in QueryResult.docs and if a second level is specified,
|
---|
702 | // it will return corresponding docnums for that level in QueryResult.levels
|
---|
703 | // If there is no level specified, or that level is invalid, the query
|
---|
704 | // level is used
|
---|
705 | void MGQuery (IndexData &indexData,
|
---|
706 | const QueryInfo &queryInfo,
|
---|
707 | const QueryNode *queryTree,
|
---|
708 | ExtQueryResult &realresult, UCArray &level) {
|
---|
709 | realresult.Clear ();
|
---|
710 | QueryResult result; // temp result
|
---|
711 |
|
---|
712 | // do the normal query
|
---|
713 | MGQuery (indexData, queryInfo, queryTree, result);
|
---|
714 |
|
---|
715 | // now that have the final result stuff, convert to ExtQueryResult,
|
---|
716 | // add in level nums if needed
|
---|
717 |
|
---|
718 | realresult.docs = result.docs;
|
---|
719 | realresult.ranks = result.ranks;
|
---|
720 | realresult.termFreqs = result.termFreqs;
|
---|
721 | realresult.actualNumDocs = result.actualNumDocs;
|
---|
722 |
|
---|
723 | if (queryInfo.docLevel == level || level.empty()) {
|
---|
724 | realresult.levels = result.docs;
|
---|
725 | return;
|
---|
726 | }
|
---|
727 |
|
---|
728 | // else need to convert from queryInfo.docLevel to level
|
---|
729 |
|
---|
730 | // the original level info
|
---|
731 | FragLevelConvert sectionLevelConverter = indexData.levelConverter;
|
---|
732 |
|
---|
733 | // the new level info
|
---|
734 | if (!indexData.LoadLevel(level)) {
|
---|
735 | realresult.levels = result.docs;
|
---|
736 | return;
|
---|
737 | }
|
---|
738 |
|
---|
739 | mg_u_long DocNum = 0;
|
---|
740 |
|
---|
741 | for (mg_u_long i=0; i<realresult.docs.size(); ++i) {
|
---|
742 |
|
---|
743 | // do an if ! here????
|
---|
744 | indexData.levelConverter.LevelToLevel(sectionLevelConverter, realresult.docs[i], DocNum);
|
---|
745 | realresult.levels.push_back(DocNum);
|
---|
746 | }
|
---|
747 |
|
---|
748 | }
|
---|
749 |
|
---|
750 |
|
---|
751 |
|
---|
752 | // new function for full text browsing,
|
---|
753 | void MGBrowseQuery (IndexData &indexData, UCArray &level,
|
---|
754 | const BrowseQueryNode &node,
|
---|
755 | BrowseQueryResult &result) {
|
---|
756 |
|
---|
757 | indexData.LoadLevel(level);
|
---|
758 | node.Calculate(indexData, result);
|
---|
759 |
|
---|
760 | }
|
---|