1 | /**************************************************************************
|
---|
2 | *
|
---|
3 | * MGQuery.cpp -- Query related functions
|
---|
4 | * Copyright (C) 1999 Rodger McNab
|
---|
5 | *
|
---|
6 | * This program is free software; you can redistribute it and/or modify
|
---|
7 | * it under the terms of the GNU General Public License as published by
|
---|
8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
9 | * (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This program is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | * GNU General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU General Public License
|
---|
17 | * along with this program; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
19 | *
|
---|
20 | * $Id: MGQuery.cpp 879 2000-01-31 03:01:25Z rjmcnab $
|
---|
21 | *
|
---|
22 | **************************************************************************/
|
---|
23 |
|
---|
24 | #include "MGQuery.h"
|
---|
25 | #include "bitio_m_stdio.h"
|
---|
26 | #include "bitio_gen.h"
|
---|
27 | #include "Terms.h"
|
---|
28 | #include "QueryResultsSort.h"
|
---|
29 | #include <assert.h>
|
---|
30 |
|
---|
31 |
|
---|
32 | void PrintIndent (ostream &s, int indent) {
|
---|
33 | while (indent-- > 0) s << " ";
|
---|
34 | }
|
---|
35 |
|
---|
36 |
|
---|
37 | void PrintIndentText (ostream &s, char *text, int indent) {
|
---|
38 | PrintIndent (s, indent);
|
---|
39 | s << text;
|
---|
40 | }
|
---|
41 |
|
---|
42 | void PrintNode (ostream &s, QueryNode *node, int indent) {
|
---|
43 | if (node == NULL) {
|
---|
44 | PrintIndentText (s, "NULL\n", indent);
|
---|
45 | } else {
|
---|
46 | node->Print (s, indent+2);
|
---|
47 | }
|
---|
48 | }
|
---|
49 |
|
---|
50 |
|
---|
51 |
|
---|
52 | QueryNode::QueryNode () {
|
---|
53 | }
|
---|
54 |
|
---|
55 | QueryNode::~QueryNode () {
|
---|
56 | }
|
---|
57 |
|
---|
58 | void QueryNode::Calculate (IndexData &/*indexData*/,
|
---|
59 | const QueryInfo &/*queryInfo*/,
|
---|
60 | QueryResult &result) const {
|
---|
61 | result.Clear();
|
---|
62 | }
|
---|
63 |
|
---|
64 | void QueryNode::Free () {
|
---|
65 | }
|
---|
66 |
|
---|
67 | void QueryNode::Print (ostream &/*s*/, int /*indent*/) const {
|
---|
68 | }
|
---|
69 |
|
---|
70 |
|
---|
71 |
|
---|
72 | AndQueryNode::AndQueryNode () {
|
---|
73 | leftNode = NULL;
|
---|
74 | rightNode = NULL;
|
---|
75 | }
|
---|
76 |
|
---|
77 | AndQueryNode::~AndQueryNode () {
|
---|
78 | Free ();
|
---|
79 | }
|
---|
80 |
|
---|
81 | void AndQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
82 | QueryResult &result) const {
|
---|
83 | result.Clear();
|
---|
84 |
|
---|
85 | // an And between nothing and something is nothing...
|
---|
86 | if (leftNode == NULL || rightNode == NULL) return;
|
---|
87 |
|
---|
88 | // calculate the result from the left tree and the result
|
---|
89 | // from the right tree
|
---|
90 | QueryResult rightResult;
|
---|
91 | leftNode->Calculate (indexData, queryInfo, result);
|
---|
92 | rightNode->Calculate (indexData, queryInfo, rightResult);
|
---|
93 |
|
---|
94 | // merge the results, this can be done in place as the results
|
---|
95 | // will always shrink with an And
|
---|
96 |
|
---|
97 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
98 | if (haveAccum && (result.ranks.size() != result.docs.size() ||
|
---|
99 | rightResult.ranks.size() != rightResult.docs.size())) {
|
---|
100 | // shouldn't ever get here
|
---|
101 | haveAccum = false;
|
---|
102 | assert (0);
|
---|
103 | }
|
---|
104 |
|
---|
105 | // combine document numbers and corresponding ranks
|
---|
106 | unsigned long leftI = 0;
|
---|
107 | unsigned long rightI = 0;
|
---|
108 | unsigned long outI = 0;
|
---|
109 | while (leftI < result.docs.size() &&
|
---|
110 | rightI < rightResult.docs.size()) {
|
---|
111 | if (result.docs[leftI] < rightResult.docs[rightI]) {
|
---|
112 | leftI++;
|
---|
113 | } else if (result.docs[leftI] > rightResult.docs[rightI]) {
|
---|
114 | rightI++;
|
---|
115 | } else {
|
---|
116 | // the documents are equal
|
---|
117 | result.docs[outI] = result.docs[leftI];
|
---|
118 | if (haveAccum)
|
---|
119 | result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
|
---|
120 | leftI++;
|
---|
121 | rightI++;
|
---|
122 | outI++;
|
---|
123 | }
|
---|
124 | }
|
---|
125 |
|
---|
126 | // erase unused document numbers and ranks
|
---|
127 | result.docs.erase(result.docs.begin()+outI, result.docs.end());
|
---|
128 | if (haveAccum)
|
---|
129 | result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
|
---|
130 |
|
---|
131 | // combine term frequency information
|
---|
132 | if (queryInfo.needTermFreqs)
|
---|
133 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
134 | rightResult.termFreqs.begin(),
|
---|
135 | rightResult.termFreqs.end());
|
---|
136 | }
|
---|
137 |
|
---|
138 | void AndQueryNode::Free () {
|
---|
139 | if (leftNode != NULL) {
|
---|
140 | delete leftNode;
|
---|
141 | leftNode = NULL;
|
---|
142 | }
|
---|
143 | if (rightNode != NULL) {
|
---|
144 | delete rightNode;
|
---|
145 | rightNode = NULL;
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | void AndQueryNode::Print (ostream &s, int indent) const {
|
---|
150 | PrintIndentText (s, "leftNode:\n", indent);
|
---|
151 | PrintNode (s, leftNode, indent+2);
|
---|
152 | PrintIndentText (s, "AND\n", indent);
|
---|
153 | PrintIndentText (s, "rightNode:\n", indent);
|
---|
154 | PrintNode (s, rightNode, indent+2);
|
---|
155 | }
|
---|
156 |
|
---|
157 |
|
---|
158 | OrQueryNode::OrQueryNode () {
|
---|
159 | leftNode = NULL;
|
---|
160 | rightNode = NULL;
|
---|
161 | }
|
---|
162 |
|
---|
163 | OrQueryNode::~OrQueryNode () {
|
---|
164 | Free ();
|
---|
165 | }
|
---|
166 |
|
---|
167 | void OrQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
168 | QueryResult &result) const {
|
---|
169 | result.Clear();
|
---|
170 |
|
---|
171 | // calculate the result from the left tree and the result
|
---|
172 | // from the right tree
|
---|
173 | QueryResult leftResult;
|
---|
174 | QueryResult rightResult;
|
---|
175 | if (leftNode != NULL)
|
---|
176 | leftNode->Calculate (indexData, queryInfo, leftResult);
|
---|
177 | if (rightNode != NULL)
|
---|
178 | rightNode->Calculate (indexData, queryInfo, rightResult);
|
---|
179 |
|
---|
180 | // merge the results
|
---|
181 |
|
---|
182 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
183 | if (haveAccum && (leftResult.ranks.size() != leftResult.docs.size() ||
|
---|
184 | rightResult.ranks.size() != rightResult.docs.size())) {
|
---|
185 | // shouldn't ever get here
|
---|
186 | haveAccum = false;
|
---|
187 | assert (0);
|
---|
188 | }
|
---|
189 |
|
---|
190 | // combine document numbers and corresponding ranks
|
---|
191 | unsigned long leftSize = leftResult.docs.size();
|
---|
192 | unsigned long rightSize = rightResult.docs.size();
|
---|
193 | unsigned long leftI = 0;
|
---|
194 | unsigned long rightI = 0;
|
---|
195 | unsigned long leftDocNum = 0;
|
---|
196 | unsigned long rightDocNum = 0;
|
---|
197 | while (leftI < leftSize || rightI < rightSize) {
|
---|
198 | // check leftI
|
---|
199 | if (leftI < leftResult.docs.size())
|
---|
200 | leftDocNum = leftResult.docs[leftI];
|
---|
201 | else leftDocNum = ULONG_MAX;
|
---|
202 |
|
---|
203 | // check rightI
|
---|
204 | if (rightI < rightResult.docs.size())
|
---|
205 | rightDocNum = rightResult.docs[rightI];
|
---|
206 | else rightDocNum = ULONG_MAX;
|
---|
207 |
|
---|
208 | // combine
|
---|
209 | if (leftDocNum < rightDocNum) {
|
---|
210 | result.docs.push_back (leftDocNum);
|
---|
211 | if (haveAccum)
|
---|
212 | result.ranks.push_back (leftResult.ranks[leftI]);
|
---|
213 | leftI++;
|
---|
214 |
|
---|
215 | } else if (leftDocNum > rightDocNum) {
|
---|
216 | result.docs.push_back (rightDocNum);
|
---|
217 | if (haveAccum)
|
---|
218 | result.ranks.push_back (rightResult.ranks[rightI]);
|
---|
219 | rightI++;
|
---|
220 |
|
---|
221 | } else { // equal
|
---|
222 | result.docs.push_back (leftDocNum);
|
---|
223 | if (haveAccum)
|
---|
224 | result.ranks.push_back (leftResult.ranks[leftI] +
|
---|
225 | rightResult.ranks[rightI]);
|
---|
226 | leftI++;
|
---|
227 | rightI++;
|
---|
228 | }
|
---|
229 | }
|
---|
230 |
|
---|
231 | // combine term frequency information
|
---|
232 | if (queryInfo.needTermFreqs) {
|
---|
233 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
234 | leftResult.termFreqs.begin(),
|
---|
235 | leftResult.termFreqs.end());
|
---|
236 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
237 | rightResult.termFreqs.begin(),
|
---|
238 | rightResult.termFreqs.end());
|
---|
239 | }
|
---|
240 | }
|
---|
241 |
|
---|
242 | void OrQueryNode::Free () {
|
---|
243 | if (leftNode != NULL) {
|
---|
244 | delete leftNode;
|
---|
245 | leftNode = NULL;
|
---|
246 | }
|
---|
247 | if (rightNode != NULL) {
|
---|
248 | delete rightNode;
|
---|
249 | rightNode = NULL;
|
---|
250 | }
|
---|
251 | }
|
---|
252 |
|
---|
253 | void OrQueryNode::Print (ostream &s, int indent) const {
|
---|
254 | PrintIndentText (s, "leftNode:\n", indent);
|
---|
255 | PrintNode (s, leftNode, indent+2);
|
---|
256 | PrintIndentText (s, "OR\n", indent);
|
---|
257 | PrintIndentText (s, "rightNode:\n", indent);
|
---|
258 | PrintNode (s, rightNode, indent+2);
|
---|
259 | }
|
---|
260 |
|
---|
261 |
|
---|
262 |
|
---|
263 | NotQueryNode::NotQueryNode () {
|
---|
264 | queryNode = NULL;
|
---|
265 | notNode = NULL;
|
---|
266 | }
|
---|
267 |
|
---|
268 | NotQueryNode::~NotQueryNode () {
|
---|
269 | Free ();
|
---|
270 | }
|
---|
271 |
|
---|
272 | void NotQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo,
|
---|
273 | QueryResult &result) const {
|
---|
274 | result.Clear();
|
---|
275 |
|
---|
276 | // check for nothing
|
---|
277 | if (queryNode == NULL) return;
|
---|
278 | if (notNode == NULL) {
|
---|
279 | queryNode->Calculate (indexData, queryInfo, result);
|
---|
280 | return;
|
---|
281 | }
|
---|
282 |
|
---|
283 | // calculate the result from the query tree and the result
|
---|
284 | // from the not tree
|
---|
285 | QueryResult notResult;
|
---|
286 | queryNode->Calculate (indexData, queryInfo, result);
|
---|
287 | notNode->Calculate (indexData, queryInfo, notResult);
|
---|
288 |
|
---|
289 | // merge the results, this can be done in place as the results
|
---|
290 | // will always shrink with a Not
|
---|
291 |
|
---|
292 | bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
293 | if (haveAccum && (result.ranks.size() != result.docs.size() ||
|
---|
294 | notResult.ranks.size() != notResult.docs.size())) {
|
---|
295 | // shouldn't ever get here
|
---|
296 | haveAccum = false;
|
---|
297 | assert (0);
|
---|
298 | }
|
---|
299 |
|
---|
300 | // combine document numbers and corresponding ranks
|
---|
301 | unsigned long queryI = 0;
|
---|
302 | unsigned long notI = 0;
|
---|
303 | unsigned long outI = 0;
|
---|
304 | while (queryI < result.docs.size() &&
|
---|
305 | notI < notResult.docs.size()) {
|
---|
306 | if (result.docs[queryI] < notResult.docs[notI]) {
|
---|
307 | // found a document not in the notResults
|
---|
308 | result.docs[outI] = result.docs[queryI];
|
---|
309 | if (haveAccum)
|
---|
310 | result.ranks[outI] = result.ranks[queryI];
|
---|
311 | queryI++;
|
---|
312 | outI++;
|
---|
313 | } else if (result.docs[queryI] > notResult.docs[notI]) {
|
---|
314 | notI++;
|
---|
315 | } else {
|
---|
316 | // the documents are equal, ignore both
|
---|
317 | queryI++;
|
---|
318 | notI++;
|
---|
319 | }
|
---|
320 | }
|
---|
321 |
|
---|
322 | // erase unused document numbers and ranks
|
---|
323 | result.docs.erase(result.docs.begin()+outI, result.docs.end());
|
---|
324 | if (haveAccum)
|
---|
325 | result.ranks.erase(result.ranks.begin()+outI, result.ranks.end());
|
---|
326 |
|
---|
327 | // combine term frequency information
|
---|
328 | if (queryInfo.needTermFreqs)
|
---|
329 | result.termFreqs.insert (result.termFreqs.end(),
|
---|
330 | notResult.termFreqs.begin(),
|
---|
331 | notResult.termFreqs.end());
|
---|
332 | }
|
---|
333 |
|
---|
334 | void NotQueryNode::Free () {
|
---|
335 | if (queryNode != NULL) {
|
---|
336 | delete queryNode;
|
---|
337 | queryNode = NULL;
|
---|
338 | }
|
---|
339 | if (notNode != NULL) {
|
---|
340 | delete notNode;
|
---|
341 | notNode = NULL;
|
---|
342 | }
|
---|
343 | }
|
---|
344 |
|
---|
345 | void NotQueryNode::Print (ostream &s, int indent) const {
|
---|
346 | PrintIndentText (s, "queryNode:\n", indent);
|
---|
347 | PrintNode (s, queryNode, indent+2);
|
---|
348 | PrintIndentText (s, "NOT\n", indent);
|
---|
349 | PrintIndentText (s, "notNode:\n", indent);
|
---|
350 | PrintNode (s, notNode, indent+2);
|
---|
351 | }
|
---|
352 |
|
---|
353 |
|
---|
354 | void TagNode::Clear () {
|
---|
355 | UCArrayClear (tagName);
|
---|
356 | }
|
---|
357 |
|
---|
358 | void TagNode::Calculate (IndexData &indexData,
|
---|
359 | FragRangeArray &fragRange) const {
|
---|
360 | fragRange.erase (fragRange.begin(), fragRange.end());
|
---|
361 | if (tagName.empty()) return;
|
---|
362 |
|
---|
363 | // get information about this tag
|
---|
364 | block_dict_el tagEl;
|
---|
365 | unsigned long tagElNum;
|
---|
366 | if (!SearchBlockDictEl (indexData.dictFile, indexData.biTags,
|
---|
367 | indexData.bdh.entries_per_tblk,
|
---|
368 | indexData.bdh.tag_dict_size,
|
---|
369 | tagName, tagEl, tagElNum))
|
---|
370 | return;
|
---|
371 |
|
---|
372 | // seek to the appropriate place in the inverted file
|
---|
373 | fseek (indexData.invfFile, tagEl.invf_ptr, SEEK_SET);
|
---|
374 |
|
---|
375 | stdio_bitio_buffer buffer(indexData.invfFile);
|
---|
376 |
|
---|
377 | unsigned long pTag = tagEl.frag_occur*2;
|
---|
378 | unsigned long B = BIO_Bblock_Init (indexData.bdh.num_frags+pTag, pTag);
|
---|
379 | unsigned long fragNum = 0;
|
---|
380 | unsigned long i;
|
---|
381 | FragRange thisFrag;
|
---|
382 | for (i=0; i<tagEl.frag_occur; i++) {
|
---|
383 | // get start
|
---|
384 | unsigned long delta = buffer.bblock_decode (B, NULL)-1;
|
---|
385 | fragNum += delta;
|
---|
386 |
|
---|
387 | thisFrag.rangeStart = fragNum;
|
---|
388 |
|
---|
389 | // get end
|
---|
390 | delta = buffer.bblock_decode (B, NULL)-1;
|
---|
391 | fragNum += delta;
|
---|
392 |
|
---|
393 | thisFrag.rangeEnd = fragNum;
|
---|
394 | fragRange.push_back (thisFrag);
|
---|
395 | }
|
---|
396 |
|
---|
397 | buffer.done();
|
---|
398 | }
|
---|
399 |
|
---|
400 | void TagNode::Free () {
|
---|
401 | Clear ();
|
---|
402 | }
|
---|
403 |
|
---|
404 | void TagNode::Print (ostream &s, int indent) const {
|
---|
405 | PrintIndent (s, indent);
|
---|
406 | s << "TAG: \"" << tagName << "\"\n";
|
---|
407 | }
|
---|
408 |
|
---|
409 |
|
---|
410 | void TermNode::Clear () {
|
---|
411 | UCArrayClear (term);
|
---|
412 | termWeight = 1;
|
---|
413 | stemMethod = 0;
|
---|
414 | startRange = NO_TERM_RANGE_START;
|
---|
415 | endRange = NO_TERM_RANGE_END;
|
---|
416 | }
|
---|
417 |
|
---|
418 | TermNode::TermNode () {
|
---|
419 | Clear ();
|
---|
420 | }
|
---|
421 |
|
---|
422 | void TermNode::Calculate (IndexData &indexData,
|
---|
423 | bool needFragFreqs,
|
---|
424 | FragRangeArray *fragLimits,
|
---|
425 | FragData &fragData) const {
|
---|
426 | fragData.Clear ();
|
---|
427 |
|
---|
428 | // get a list of term numbers
|
---|
429 | vector<unsigned long> equivWords;
|
---|
430 | FindWordNumbers (indexData, term, stemMethod, equivWords);
|
---|
431 |
|
---|
432 | // get the information for each word and merge it with
|
---|
433 | // previous results
|
---|
434 | FragData tempFragData1;
|
---|
435 | FragData tempFragData2;
|
---|
436 | vector<unsigned long>::iterator here = equivWords.begin();
|
---|
437 | vector<unsigned long>::iterator end = equivWords.end();
|
---|
438 | while (here != end) {
|
---|
439 | // get the information for this word
|
---|
440 | ReadTermFragData (indexData, needFragFreqs, *here,
|
---|
441 | tempFragData1, fragLimits);
|
---|
442 |
|
---|
443 | // combine with last results
|
---|
444 | tempFragData2 = fragData;
|
---|
445 | CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
|
---|
446 |
|
---|
447 | here++;
|
---|
448 | }
|
---|
449 | }
|
---|
450 |
|
---|
451 | void TermNode::Free () {
|
---|
452 | Clear ();
|
---|
453 | }
|
---|
454 |
|
---|
455 | void TermNode::Print (ostream &s, int indent) const {
|
---|
456 | PrintIndent (s, indent);
|
---|
457 | s << "TERM: \"" << term << "\"\n";
|
---|
458 | PrintIndent (s, indent+2);
|
---|
459 | s << "termWeight: " << termWeight << "\n";
|
---|
460 | PrintIndent (s, indent+2);
|
---|
461 | s << "stemMethod: " << stemMethod << "\n";
|
---|
462 | PrintIndent (s, indent+2);
|
---|
463 | s << "startRange: " << startRange << "\n";
|
---|
464 | PrintIndent (s, indent+2);
|
---|
465 | s << "endRange: " << endRange << "\n";
|
---|
466 | }
|
---|
467 |
|
---|
468 |
|
---|
469 | ProxMatchQueryNode::ProxMatchQueryNode () {
|
---|
470 | tagNodePtr = NULL;
|
---|
471 | }
|
---|
472 |
|
---|
473 | ProxMatchQueryNode::~ProxMatchQueryNode () {
|
---|
474 | Free ();
|
---|
475 | }
|
---|
476 |
|
---|
477 | void ProxMatchQueryNode::Calculate (IndexData &indexData,
|
---|
478 | const QueryInfo &queryInfo,
|
---|
479 | QueryResult &result) const {
|
---|
480 | result.Clear ();
|
---|
481 |
|
---|
482 | bool needFragFreqs = (queryInfo.sortByRank || queryInfo.needRankInfo);
|
---|
483 |
|
---|
484 | // read in the tag if needed
|
---|
485 | FragRangeArray fragLimits;
|
---|
486 | FragRangeArray *fragLimitsPtr = NULL;
|
---|
487 | if (tagNodePtr == NULL && terms.size() > 1) {
|
---|
488 | // multiple terms must be compared relative to some tag
|
---|
489 | // otherwise phrase matches could span documents
|
---|
490 | TagNode tempTagNode;
|
---|
491 | tempTagNode.tagName = indexData.curLevel;
|
---|
492 | tempTagNode.Calculate (indexData, fragLimits);
|
---|
493 | fragLimitsPtr = &fragLimits;
|
---|
494 |
|
---|
495 | } else if (tagNodePtr != NULL) {
|
---|
496 | (*tagNodePtr).Calculate (indexData, fragLimits);
|
---|
497 | fragLimitsPtr = &fragLimits;
|
---|
498 | }
|
---|
499 |
|
---|
500 | UCArray tagOrLevel = indexData.curLevel;
|
---|
501 | if (tagNodePtr != NULL) tagOrLevel = (*tagNodePtr).tagName;
|
---|
502 |
|
---|
503 | // read in the first term
|
---|
504 | FragData termData;
|
---|
505 | TermNodeArray::const_iterator termHere=terms.begin(), termEnd = terms.end();
|
---|
506 | if (termHere != termEnd) {
|
---|
507 | (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr, termData);
|
---|
508 |
|
---|
509 | // convert initial fragment information
|
---|
510 | FragsToQueryResult (indexData,
|
---|
511 | queryInfo,
|
---|
512 | termData,
|
---|
513 | tagOrLevel,
|
---|
514 | (*termHere).term,
|
---|
515 | (*termHere).stemMethod,
|
---|
516 | (*termHere).termWeight,
|
---|
517 | result);
|
---|
518 |
|
---|
519 | termHere++;
|
---|
520 |
|
---|
521 | if (termHere == termEnd) return; // nothing more to do
|
---|
522 | }
|
---|
523 |
|
---|
524 | // read and combine the rest of the terms
|
---|
525 | FragData comTermData;
|
---|
526 | while (termHere != termEnd) {
|
---|
527 | (*termHere).Calculate (indexData, needFragFreqs,
|
---|
528 | fragLimitsPtr, comTermData);
|
---|
529 |
|
---|
530 | AndFragsToQueryResult (indexData,
|
---|
531 | queryInfo,
|
---|
532 | comTermData,
|
---|
533 | tagOrLevel,
|
---|
534 | (*termHere).term,
|
---|
535 | (*termHere).stemMethod,
|
---|
536 | (*termHere).termWeight,
|
---|
537 | result);
|
---|
538 |
|
---|
539 | AndCombineFragData (needFragFreqs, termData, comTermData,
|
---|
540 | (*termHere).startRange,
|
---|
541 | (*termHere).endRange,
|
---|
542 | fragLimitsPtr);
|
---|
543 | termHere++;
|
---|
544 | }
|
---|
545 |
|
---|
546 | // remove unwanted document numbers
|
---|
547 | RemoveUnwantedResults (indexData,
|
---|
548 | queryInfo,
|
---|
549 | termData,
|
---|
550 | result);
|
---|
551 | }
|
---|
552 |
|
---|
553 | void ProxMatchQueryNode::Free () {
|
---|
554 | if (tagNodePtr != NULL) {
|
---|
555 | delete tagNodePtr;
|
---|
556 | tagNodePtr = NULL;
|
---|
557 | }
|
---|
558 | terms.erase (terms.begin(), terms.end());
|
---|
559 | }
|
---|
560 |
|
---|
561 | void ProxMatchQueryNode::Print (ostream &s, int indent) const {
|
---|
562 | PrintIndentText (s, "PROXMATCH\n", indent);
|
---|
563 | if (tagNodePtr != NULL) tagNodePtr->Print (s, indent+2);
|
---|
564 |
|
---|
565 | TermNodeArray::const_iterator here = terms.begin();
|
---|
566 | TermNodeArray::const_iterator end = terms.end();
|
---|
567 | while (here != end) {
|
---|
568 | (*here).Print (s, indent+2);
|
---|
569 | here++;
|
---|
570 | }
|
---|
571 | }
|
---|
572 |
|
---|
573 |
|
---|
574 | void MGQuery (IndexData &indexData,
|
---|
575 | const QueryInfo &queryInfo,
|
---|
576 | const QueryNode *queryTree,
|
---|
577 | QueryResult &result) {
|
---|
578 | result.Clear ();
|
---|
579 |
|
---|
580 | // make sure level is current
|
---|
581 | indexData.LoadLevel (queryInfo.docLevel);
|
---|
582 |
|
---|
583 | // do query
|
---|
584 | if (queryTree != NULL)
|
---|
585 | (*queryTree).Calculate (indexData, queryInfo, result);
|
---|
586 |
|
---|
587 | // make weights into ranks if needed
|
---|
588 | unsigned long i;
|
---|
589 | if (queryInfo.sortByRank || queryInfo.needRankInfo) {
|
---|
590 | for (i=0; i<result.ranks.size(); i++) {
|
---|
591 | result.ranks[i] /=
|
---|
592 | indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
|
---|
593 | }
|
---|
594 | }
|
---|
595 |
|
---|
596 | unsigned long resultsSize = queryInfo.maxDocs;
|
---|
597 | if (resultsSize == 0 || resultsSize > result.docs.size())
|
---|
598 | resultsSize = result.docs.size();
|
---|
599 |
|
---|
600 | // sort results by rank if needed
|
---|
601 | GTRank gtRank;
|
---|
602 | if (queryInfo.sortByRank) {
|
---|
603 | // need in ascending order for SelectAddHeap
|
---|
604 | MakeHeap (result.docs.begin(), result.ranks.begin(),
|
---|
605 | resultsSize, gtRank);
|
---|
606 | SelectAddHeap (result.docs.begin(), result.ranks.begin(), resultsSize,
|
---|
607 | result.docs.begin()+resultsSize,
|
---|
608 | result.ranks.begin()+resultsSize,
|
---|
609 | result.ranks.size()-resultsSize, gtRank);
|
---|
610 |
|
---|
611 | // sort into descending order
|
---|
612 | SortHeap (result.docs.begin(), result.ranks.begin(), resultsSize, gtRank);
|
---|
613 | }
|
---|
614 |
|
---|
615 | // get exact weights if needed
|
---|
616 | if (queryInfo.exactWeights &&
|
---|
617 | (queryInfo.sortByRank || queryInfo.needRankInfo)) {
|
---|
618 | unsigned long exactDiskPtr =
|
---|
619 | indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
|
---|
620 |
|
---|
621 | for (i=0; i<resultsSize; i++) {
|
---|
622 | result.ranks[i] = result.ranks[i] *
|
---|
623 | indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
|
---|
624 | GetExactDocWeight (indexData.exactWeightsFile, exactDiskPtr,
|
---|
625 | result.docs[i]);
|
---|
626 | }
|
---|
627 |
|
---|
628 | // re-sort top candidates based on exact weights
|
---|
629 | if (queryInfo.sortByRank) {
|
---|
630 | MakeHeap (result.docs.begin(), result.ranks.begin(),
|
---|
631 | resultsSize, gtRank);
|
---|
632 | SortHeap (result.docs.begin(), result.ranks.begin(),
|
---|
633 | resultsSize, gtRank);
|
---|
634 | }
|
---|
635 | }
|
---|
636 |
|
---|
637 | // get rid of unwanted ranking results
|
---|
638 | if (result.ranks.empty()) {
|
---|
639 | // do nothing
|
---|
640 | } else if (!queryInfo.needRankInfo) {
|
---|
641 | result.ranks.erase(result.ranks.begin(), result.ranks.end());
|
---|
642 | } else {
|
---|
643 | result.ranks.erase(result.ranks.begin()+resultsSize, result.ranks.end());
|
---|
644 | }
|
---|
645 | result.docs.erase(result.docs.begin()+resultsSize, result.docs.end());
|
---|
646 | }
|
---|
647 |
|
---|