Changeset 1836


Ignore:
Timestamp:
2001-01-15T12:56:57+13:00 (23 years ago)
Author:
kjm18
Message:

added support for equiv terms for highlighting. THe QueryResult.TermFreqData
has UCArrayVector equivTerms now.

Location:
trunk/gsdl/src/mgpp/text
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/MGQuery.cpp

    r1688 r1836  
    423423              bool needFragFreqs,
    424424              FragRangeArray *fragLimits,
    425               FragData &fragData) const {
     425              FragData &fragData,
     426              UCArrayVector &equivTerms) const {
    426427  fragData.Clear ();
     428  equivTerms.erase(equivTerms.begin(), equivTerms.end());
    427429
    428430  // get a list of term numbers
    429   vector<unsigned long> equivWords;
    430   FindWordNumbers (indexData, term, stemMethod, equivWords);
     431  vector<unsigned long> equivNums;
     432  FindWordNumbers (indexData, term, stemMethod, equivNums);
    431433
    432434  // get the information for each word and merge it with
     
    434436  FragData tempFragData1;
    435437  FragData tempFragData2;
    436   vector<unsigned long>::iterator here = equivWords.begin();
    437   vector<unsigned long>::iterator end = equivWords.end();
     438  UCArray equivWord;
     439  vector<unsigned long>::iterator here = equivNums.begin();
     440  vector<unsigned long>::iterator end = equivNums.end();
    438441  while (here != end) {
    439442    // get the information for this word
    440443    ReadTermFragData (indexData, needFragFreqs, *here,
    441               tempFragData1, fragLimits);
    442 
     444              tempFragData1, fragLimits, equivWord);
     445    equivTerms.push_back(equivWord);
    443446    // combine with last results
    444447    tempFragData2 = fragData;
     
    503506  // read in the first term
    504507  FragData termData;
     508  UCArrayVector equivTerms;
    505509  TermNodeArray::const_iterator termHere=terms.begin(), termEnd = terms.end();
    506510  if (termHere != termEnd) {
    507     (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr, termData);
     511    (*termHere).Calculate (indexData, needFragFreqs, fragLimitsPtr,
     512               termData, equivTerms);
    508513
    509514    // convert initial fragment information
     
    515520            (*termHere).stemMethod,
    516521            (*termHere).termWeight,
     522            equivTerms,
    517523            result);
    518524 
     
    526532  while (termHere != termEnd) {
    527533    (*termHere).Calculate (indexData, needFragFreqs,
    528                fragLimitsPtr, comTermData);
     534               fragLimitsPtr, comTermData, equivTerms);
    529535
    530536    AndFragsToQueryResult (indexData,
     
    535541               (*termHere).stemMethod,
    536542               (*termHere).termWeight,
     543               equivTerms,
    537544               result);
    538545   
  • trunk/gsdl/src/mgpp/text/MGQuery.h

    r1300 r1836  
    139139          bool needFragFreqs,
    140140          FragRangeArray *fragLimits,
    141           FragData &fragData) const;
     141          FragData &fragData,
     142          UCArrayVector &equivTerms) const;
    142143  void Free ();
    143144  void Print (ostream &s, int indent=0) const;
  • trunk/gsdl/src/mgpp/text/Terms.cpp

    r1775 r1836  
    4242  UCArrayClear (tag);
    4343  UCArrayClear (term);
     44  equivTerms.erase(equivTerms.begin(), equivTerms.end());
    4445  stemMethod = 0;
    4546  matchDocs = 0;
     
    4950ostream &operator<< (ostream &s, const TermFreqData &t) {
    5051  s << "<" << t.tag << ">\"" << t.term << "\"stem("
    51     << t.stemMethod << ")docs(" << t.matchDocs << ")"
     52    << t.stemMethod << ")equiv terms(";
     53 
     54  unsigned long i;
     55  for (i=0; i<t.equivTerms.size(); i++) {
     56    s << t.equivTerms[i] << ", ";
     57  }
     58  s <<")docs(" << t.matchDocs << ")"
    5259    << "count("<<t.termFreq<<")";
    5360  return s;
     
    5865      (t1.term == t2.term) &&
    5966      (t1.stemMethod == t2.stemMethod) &&
     67      (t1.equivTerms == t2.equivTerms) &&
    6068      (t1.matchDocs == t2.matchDocs) &&
    6169      (t1.termFreq == t2.termFreq));
     
    279287               unsigned long termNum,
    280288               FragData &fragData,
    281                FragRangeArray *fragLimits) {
     289               FragRangeArray *fragLimits,
     290               UCArray & termWord) {
    282291  fragData.Clear();
    283292
     
    295304
    296305  fragData.matchDocs = wordDictEl.levelFreqs[indexData.curLevelNum];
    297 
     306  termWord = wordDictEl.el;
    298307  // seek to the appropriate place in the inverted file
    299308  fseek (indexData.invfFile, wordDictEl.invf_ptr, SEEK_SET);
     
    462471             unsigned long stemMethod,
    463472             unsigned long termWeight,
     473             UCArrayVector &equivTerms,
    464474             QueryResult &result) {
    465475  bool needRanks = (queryInfo.sortByRank || queryInfo.needRankInfo);
     
    527537    termFreqData.term = term;
    528538    termFreqData.stemMethod = stemMethod;
     539    termFreqData.equivTerms = equivTerms;
    529540    termFreqData.matchDocs = termData.matchDocs;
    530541    termFreqData.termFreq = overallwordfreq; // will be zero if needRankInfo
     
    541552                unsigned long stemMethod,
    542553                unsigned long termWeight,
     554                UCArrayVector &equivTerms,
    543555                QueryResult &result) {
    544556  bool needRanks = (queryInfo.sortByRank || queryInfo.needRankInfo);
     
    636648    termFreqData.term = term;
    637649    termFreqData.stemMethod = stemMethod;
     650    termFreqData.equivTerms = equivTerms;
    638651    termFreqData.matchDocs = termData.matchDocs;
    639652    termFreqData.termFreq = overallwordfreq;
  • trunk/gsdl/src/mgpp/text/Terms.h

    r1688 r1836  
    5858  UCArray term; // unstemmed term
    5959  int stemMethod;
     60  UCArrayVector equivTerms; // the stemmed and casefolded variants of the term
    6061  unsigned long matchDocs; // tf for level - num levels
    6162               // containing this term
     
    126127               unsigned long termNum,
    127128               FragData &fragData,
    128                FragRangeArray *fragLimits);
     129               FragRangeArray *fragLimits,
     130               UCArray &termWord);
    129131
    130132void CombineFragData (bool needFragFreqs,
     
    148150             unsigned long stemMethod,
    149151             unsigned long termWeight,
     152             UCArrayVector &equivTerms,
    150153             QueryResult &result);
    151154
     
    157160                unsigned long stemMethod,
    158161                unsigned long termWeight,
     162                UCArrayVector &equivTerms,
    159163                QueryResult &result);
    160164
Note: See TracChangeset for help on using the changeset viewer.