/************************************************************************** * * Terms.h -- Query related functions * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #ifndef TERMS_H #define TERMS_H #include "IndexData.h" #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_IOS_H) # include #else # include #endif class QueryInfo { public: // general query information UCArray docLevel; unsigned long maxDocs; // 0 = all bool sortByRank; bool exactWeights; // information need to return bool needRankInfo; bool needTermFreqs; void Clear (); QueryInfo () { Clear (); } }; class TermFreqData { public: UCArray tag; // level tag or query tag UCArray term; // unstemmed term int stemMethod; UCArrayVector equivTerms; // the stemmed and casefolded variants of the term unsigned long matchDocs; // tf for level - num levels // containing this term unsigned long termFreq; // overall term freq - num words that // are this term void Clear (); TermFreqData () { Clear (); } }; ostream &operator<< (ostream &s, const TermFreqData &t); bool operator== (const TermFreqData &t1, const TermFreqData &t2); typedef vector TermFreqArray; typedef vector RankArray; typedef vector DocNumArray; class QueryResult { public: DocNumArray docs; RankArray ranks; // used for accumulators during query TermFreqArray termFreqs; unsigned long actualNumDocs; void Clear (); QueryResult (); void printShort(ostream &s); }; ostream &operator<< (ostream &s, const QueryResult &r); bool operator== (const QueryResult &r1, const QueryResult &r2); typedef vector FragNumArray; typedef vector FragFreqArray; class FragData { public: unsigned long matchDocs; // ft for level FragNumArray fragNums; FragFreqArray fragFreqs; void Clear (); FragData () { Clear (); } }; class FragRange { public: unsigned long rangeStart; unsigned long rangeEnd; void Clear () { rangeStart = rangeEnd = 0; } FragRange () { Clear (); } }; typedef vector FragRangeArray; void FindWordNumbers (IndexData &indexData, const UCArray &term, unsigned long stemMethod, vector &equivWords); void ReadTermFragData (IndexData &indexData, bool needFragFreqs, unsigned long termNum, FragData &fragData, FragRangeArray *fragLimits, UCArray &termWord); void CombineFragData (bool needFragFreqs, const FragData &f1, const FragData &f2, FragData &outFragData); // output will be in fragData (as this is an and operation) void AndCombineFragData (bool needFragFreqs, FragData &fragData, const FragData &comFragData, signed long startRange, signed long endRange, const FragRangeArray *fragLimits); void FragsToQueryResult (IndexData &indexData, const QueryInfo &queryInfo, const FragData &termData, const UCArray &tag, const UCArray &term, unsigned long stemMethod, unsigned long termWeight, UCArrayVector &equivTerms, QueryResult &result); void AndFragsToQueryResult (IndexData &indexData, const QueryInfo &queryInfo, const FragData &termData, const UCArray &tag, const UCArray &term, unsigned long stemMethod, unsigned long termWeight, UCArrayVector &equivTerms, QueryResult &result); void RemoveUnwantedResults (IndexData &indexData, const QueryInfo &queryInfo, const FragData &termData, QueryResult &result); //----------------------------------------------------------------- // new QueryResult class to handle retrieval of doc and level nums. // Use this class with extended version of MGQuery class ExtQueryResult : public QueryResult { public: DocNumArray levels; // used for returning a different granularity, eg // search sections but return Document numbers, or search Documents, // return Section numbers. void Clear (); ExtQueryResult (); }; ostream &operator<< (ostream &s, const ExtQueryResult &r); bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2); //------------------------------------------------------------ // new functions to handle full text browse class BrowseQueryResult { public: TermFreqArray termFreqs; void Clear(); BrowseQueryResult (); }; ostream &operator<< (ostream &s, const BrowseQueryResult &r); bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2); void FindNearestWordNumber (IndexData &indexData, const UCArray &term, unsigned long &number); void GetTermList(IndexData &indexData, unsigned long startTerm, unsigned long numTerms, TermFreqArray &terms); void GetTermList (IndexData &indexData, unsigned long startTerm, unsigned long numTerms, UCArrayVector &terms); #endif