/************************************************************************** * * MGQuery.cpp -- Query related functions * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: MGQuery.cpp 861 2000-01-18 23:24:19Z rjmcnab $ * **************************************************************************/ #include "MGQuery.h" #include "bitio_m_stdio.h" #include "bitio_gen.h" #include "Terms.h" #include "QueryResultsSort.h" #include void PrintIndent (ostream &s, int indent) { while (indent-- > 0) s << " "; } void PrintIndentText (ostream &s, char *text, int indent) { PrintIndent (s, indent); s << text; } void PrintNode (ostream &s, QueryNode *node, int indent) { if (node == NULL) { PrintIndentText (s, "NULL\n", indent); } else { node->Print (s, indent+2); } } QueryNode::QueryNode () { } QueryNode::~QueryNode () { } void QueryNode::Calculate (IndexData &/*indexData*/, const QueryInfo &/*queryInfo*/, QueryResult &result) const { result.Clear(); } void QueryNode::Free () { } void QueryNode::Print (ostream &/*s*/, int /*indent*/) const { } AndQueryNode::AndQueryNode () { leftNode = NULL; rightNode = NULL; } AndQueryNode::~AndQueryNode () { Free (); } void AndQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo, QueryResult &result) const { result.Clear(); // an And between nothing and something is nothing... if (leftNode == NULL || rightNode == NULL) return; // calculate the result from the left tree and the result // from the right tree QueryResult rightResult; leftNode->Calculate (indexData, queryInfo, result); rightNode->Calculate (indexData, queryInfo, rightResult); // merge the results, this can be done in place as the results // will always shrink with an And bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo); if (haveAccum && (result.ranks.size() != result.docs.size() || rightResult.ranks.size() != rightResult.docs.size())) { // shouldn't ever get here haveAccum = false; assert (0); } // combine document numbers and corresponding ranks unsigned long leftI = 0; unsigned long rightI = 0; unsigned long outI = 0; while (leftI < result.docs.size() && rightI < rightResult.docs.size()) { if (result.docs[leftI] < rightResult.docs[rightI]) { leftI++; } else if (result.docs[leftI] > rightResult.docs[rightI]) { rightI++; } else { // the documents are equal result.docs[outI] = result.docs[leftI]; if (haveAccum) result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI]; leftI++; rightI++; outI++; } } // erase unused document numbers and ranks result.docs.erase(result.docs.begin()+outI, result.docs.end()); if (haveAccum) result.ranks.erase(result.ranks.begin()+outI, result.ranks.end()); // combine term frequency information if (queryInfo.needTermFreqs) result.termFreqs.insert (result.termFreqs.end(), rightResult.termFreqs.begin(), rightResult.termFreqs.end()); } void AndQueryNode::Free () { if (leftNode != NULL) { delete leftNode; leftNode = NULL; } if (rightNode != NULL) { delete rightNode; rightNode = NULL; } } void AndQueryNode::Print (ostream &s, int indent) const { PrintIndentText (s, "leftNode:\n", indent); PrintNode (s, leftNode, indent+2); PrintIndentText (s, "AND\n", indent); PrintIndentText (s, "rightNode:\n", indent); PrintNode (s, rightNode, indent+2); } OrQueryNode::OrQueryNode () { leftNode = NULL; rightNode = NULL; } OrQueryNode::~OrQueryNode () { Free (); } void OrQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo, QueryResult &result) const { result.Clear(); // calculate the result from the left tree and the result // from the right tree QueryResult leftResult; QueryResult rightResult; if (leftNode != NULL) leftNode->Calculate (indexData, queryInfo, leftResult); if (rightNode != NULL) rightNode->Calculate (indexData, queryInfo, rightResult); // merge the results bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo); if (haveAccum && (leftResult.ranks.size() != leftResult.docs.size() || rightResult.ranks.size() != rightResult.docs.size())) { // shouldn't ever get here haveAccum = false; assert (0); } // combine document numbers and corresponding ranks unsigned long leftSize = leftResult.docs.size(); unsigned long rightSize = rightResult.docs.size(); unsigned long leftI = 0; unsigned long rightI = 0; unsigned long leftDocNum = 0; unsigned long rightDocNum = 0; while (leftI < leftSize || rightI < rightSize) { // check leftI if (leftI < leftResult.docs.size()) leftDocNum = leftResult.docs[leftI]; else leftDocNum = ULONG_MAX; // check rightI if (rightI < rightResult.docs.size()) rightDocNum = rightResult.docs[rightI]; else rightDocNum = ULONG_MAX; // combine if (leftDocNum < rightDocNum) { result.docs.push_back (leftDocNum); if (haveAccum) result.ranks.push_back (leftResult.ranks[leftI]); leftI++; } else if (leftDocNum > rightDocNum) { result.docs.push_back (rightDocNum); if (haveAccum) result.ranks.push_back (rightResult.ranks[rightI]); rightI++; } else { // equal result.docs.push_back (leftDocNum); if (haveAccum) result.ranks.push_back (leftResult.ranks[leftI] + rightResult.ranks[rightI]); leftI++; rightI++; } } // combine term frequency information if (queryInfo.needTermFreqs) { result.termFreqs.insert (result.termFreqs.end(), leftResult.termFreqs.begin(), leftResult.termFreqs.end()); result.termFreqs.insert (result.termFreqs.end(), rightResult.termFreqs.begin(), rightResult.termFreqs.end()); } } void OrQueryNode::Free () { if (leftNode != NULL) { delete leftNode; leftNode = NULL; } if (rightNode != NULL) { delete rightNode; rightNode = NULL; } } void OrQueryNode::Print (ostream &s, int indent) const { PrintIndentText (s, "leftNode:\n", indent); PrintNode (s, leftNode, indent+2); PrintIndentText (s, "OR\n", indent); PrintIndentText (s, "rightNode:\n", indent); PrintNode (s, rightNode, indent+2); } NotQueryNode::NotQueryNode () { queryNode = NULL; notNode = NULL; } NotQueryNode::~NotQueryNode () { Free (); } void NotQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo, QueryResult &result) const { result.Clear(); // check for nothing if (queryNode == NULL) return; if (notNode == NULL) { queryNode->Calculate (indexData, queryInfo, result); return; } // calculate the result from the query tree and the result // from the not tree QueryResult notResult; queryNode->Calculate (indexData, queryInfo, result); notNode->Calculate (indexData, queryInfo, notResult); // merge the results, this can be done in place as the results // will always shrink with a Not bool haveAccum = (queryInfo.sortByRank || queryInfo.needRankInfo); if (haveAccum && (result.ranks.size() != result.docs.size() || notResult.ranks.size() != notResult.docs.size())) { // shouldn't ever get here haveAccum = false; assert (0); } // combine document numbers and corresponding ranks unsigned long queryI = 0; unsigned long notI = 0; unsigned long outI = 0; while (queryI < result.docs.size() && notI < notResult.docs.size()) { if (result.docs[queryI] < notResult.docs[notI]) { // found a document not in the notResults result.docs[outI] = result.docs[queryI]; if (haveAccum) result.ranks[outI] = result.ranks[queryI]; queryI++; outI++; } else if (result.docs[queryI] > notResult.docs[notI]) { notI++; } else { // the documents are equal, ignore both queryI++; notI++; } } // erase unused document numbers and ranks result.docs.erase(result.docs.begin()+outI, result.docs.end()); if (haveAccum) result.ranks.erase(result.ranks.begin()+outI, result.ranks.end()); // combine term frequency information if (queryInfo.needTermFreqs) result.termFreqs.insert (result.termFreqs.end(), notResult.termFreqs.begin(), notResult.termFreqs.end()); } void NotQueryNode::Free () { if (queryNode != NULL) { delete queryNode; queryNode = NULL; } if (notNode != NULL) { delete notNode; notNode = NULL; } } void NotQueryNode::Print (ostream &s, int indent) const { PrintIndentText (s, "queryNode:\n", indent); PrintNode (s, queryNode, indent+2); PrintIndentText (s, "NOT\n", indent); PrintIndentText (s, "notNode:\n", indent); PrintNode (s, notNode, indent+2); } void TagNode::Clear () { UCArrayClear (tagName); } void TagNode::Calculate (IndexData &indexData, FragRangeArray &fragRange) const { fragRange.erase (fragRange.begin(), fragRange.end()); if (tagName.empty()) return; // get information about this tag block_dict_el tagEl; unsigned long tagElNum; if (!SearchBlockDictEl (indexData.dictFile, indexData.biTags, indexData.bdh.entries_per_tblk, indexData.bdh.tag_dict_size, tagName, tagEl, tagElNum)) return; // seek to the appropriate place in the inverted file fseek (indexData.invfFile, tagEl.invf_ptr, SEEK_SET); stdio_bitio_buffer buffer(indexData.invfFile); unsigned long pTag = tagEl.frag_occur*2; unsigned long B = BIO_Bblock_Init (indexData.bdh.num_frags+pTag, pTag); unsigned long fragNum = 0; unsigned long i; FragRange thisFrag; for (i=0; i equivWords; FindWordNumbers (indexData, term, stemMethod, equivWords); // get the information for each word and merge it with // previous results FragData tempFragData1; FragData tempFragData2; vector::iterator here = equivWords.begin(); vector::iterator end = equivWords.end(); while (here != end) { // get the information for this word ReadTermFragData (indexData, needFragFreqs, *here, tempFragData1, fragLimits); // combine with last results tempFragData2 = fragData; CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData); here++; } } void TermNode::Free () { Clear (); } void TermNode::Print (ostream &s, int indent) const { PrintIndent (s, indent); s << "TERM: \"" << term << "\"\n"; PrintIndent (s, indent+2); s << "termWeight: " << termWeight << "\n"; PrintIndent (s, indent+2); s << "stemMethod: " << stemMethod << "\n"; PrintIndent (s, indent+2); s << "startRange: " << startRange << "\n"; PrintIndent (s, indent+2); s << "endRange: " << endRange << "\n"; } ProxMatchQueryNode::ProxMatchQueryNode () { tagNodePtr = NULL; } ProxMatchQueryNode::~ProxMatchQueryNode () { Free (); } void ProxMatchQueryNode::Calculate (IndexData &indexData, const QueryInfo &queryInfo, QueryResult &result) const { result.Clear (); bool needFragFreqs = (queryInfo.sortByRank || queryInfo.needRankInfo); // read in the tag if needed FragRangeArray fragLimits; FragRangeArray *fragLimitsPtr = NULL; if (tagNodePtr != NULL) { (*tagNodePtr).Calculate (indexData, fragLimits); fragLimitsPtr = &fragLimits; } UCArray tagOrLevel = indexData.curLevel; if (tagNodePtr != NULL) tagOrLevel = (*tagNodePtr).tagName; // read in the first term FragData termData; TermNodeArray::const_iterator termHere=terms.begin(), termEnd = terms.end(); if (termHere != termEnd) { (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr, termData); // convert initial fragment information FragsToQueryResult (indexData, queryInfo, termData, tagOrLevel, (*termHere).term, (*termHere).stemMethod, (*termHere).termWeight, result); termHere++; if (termHere == termEnd) return; // nothing more to do } // read and combine the rest of the terms FragData comTermData; while (termHere != termEnd) { (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr, comTermData); AndFragsToQueryResult (indexData, queryInfo, comTermData, tagOrLevel, (*termHere).term, (*termHere).stemMethod, (*termHere).termWeight, result); AndCombineFragData (needFragFreqs, termData, comTermData, (*termHere).startRange, (*termHere).endRange, fragLimitsPtr); termHere++; } // remove unwanted document numbers RemoveUnwantedResults (indexData, queryInfo, termData, result); } void ProxMatchQueryNode::Free () { if (tagNodePtr != NULL) { delete tagNodePtr; tagNodePtr = NULL; } terms.erase (terms.begin(), terms.end()); } void ProxMatchQueryNode::Print (ostream &s, int indent) const { PrintIndentText (s, "PROXMATCH\n", indent); if (tagNodePtr != NULL) tagNodePtr->Print (s, indent+2); TermNodeArray::const_iterator here = terms.begin(); TermNodeArray::const_iterator end = terms.end(); while (here != end) { (*here).Print (s, indent+2); here++; } } void MGQuery (IndexData &indexData, const QueryInfo &queryInfo, const QueryNode *queryTree, QueryResult &result) { result.Clear (); // make sure level is current indexData.LoadLevel (queryInfo.docLevel); // do query if (queryTree != NULL) (*queryTree).Calculate (indexData, queryInfo, result); // make weights into ranks if needed unsigned long i; if (queryInfo.sortByRank || queryInfo.needRankInfo) { for (i=0; i result.docs.size()) resultsSize = result.docs.size(); // sort results by rank if needed GTRank gtRank; if (queryInfo.sortByRank) { // need in ascending order for SelectAddHeap MakeHeap (result.docs.begin(), result.ranks.begin(), resultsSize, gtRank); SelectAddHeap (result.docs.begin(), result.ranks.begin(), resultsSize, result.docs.begin()+resultsSize, result.ranks.begin()+resultsSize, result.ranks.size()-resultsSize, gtRank); // sort into descending order SortHeap (result.docs.begin(), result.ranks.begin(), resultsSize, gtRank); } // get exact weights if needed if (queryInfo.exactWeights && (queryInfo.sortByRank || queryInfo.needRankInfo)) { unsigned long exactDiskPtr = indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr; for (i=0; i