Changeset 1124 for trunk/gsdl/src


Ignore:
Timestamp:
2000-04-18T16:04:29+12:00 (24 years ago)
Author:
kjm18
Message:

added termFreq - overall word count rather than document count

Location:
trunk/gsdl/src/mgpp/text
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/Terms.cpp

    r927 r1124  
    4444  stemMethod = 0;
    4545  matchDocs = 0;
     46  termFreq = 0;
    4647}
    4748
    4849ostream &operator<< (ostream &s, const TermFreqData &t) {
    4950  s << "<" << t.tag << ">\"" << t.term << "\"stem("
    50     << t.stemMethod << ")docs(" << t.matchDocs << ")";
     51    << t.stemMethod << ")docs(" << t.matchDocs << ")"
     52    << "count("<<t.termFreq<<")";
    5153  return s;
    5254}
     
    5658      (t1.term == t2.term) &&
    5759      (t1.stemMethod == t2.stemMethod) &&
    58       (t1.matchDocs == t2.matchDocs));
     60      (t1.matchDocs == t2.matchDocs) &&
     61      (t1.termFreq == t2.termFreq));
    5962}
    6063
     
    442445  unsigned long termDocFreq = 0;
    443446  unsigned long lastLevelDocNum = 0;
    444 
     447  unsigned long overallwordfreq = 0;
    445448 
    446449  while (termDataI < termDataSize) {
     
    463466      if (needRanks)
    464467    termDocFreq += termData.fragFreqs[termDataI];
     468      overallwordfreq += termData.fragFreqs[termDataI];
    465469    }
    466470   
     
    484488    termFreqData.stemMethod = stemMethod;
    485489    termFreqData.matchDocs = termData.matchDocs;
     490    termFreqData.termFreq = overallwordfreq;
    486491    result.termFreqs.push_back (termFreqData);
    487492  }
     
    517522  unsigned long termDocFreq = 0;
    518523  unsigned long lastLevelDocNum = 0;
    519 
     524  unsigned long overallwordfreq = 0;
    520525  unsigned long resultI = 0;
    521526  unsigned long resultSize = result.docs.size();
     
    552557      if (needRanks)
    553558    termDocFreq += termData.fragFreqs[termDataI];
     559     overallwordfreq += termData.fragFreqs[termDataI];
    554560    }
    555561   
    556562    termDataI++;
    557   }
     563  } // while
    558564
    559565  if (lastLevelDocNum > 0) {
     
    590596    termFreqData.stemMethod = stemMethod;
    591597    termFreqData.matchDocs = termData.matchDocs;
     598    termFreqData.termFreq = overallwordfreq;
    592599    result.termFreqs.push_back (termFreqData);
    593600  }
  • trunk/gsdl/src/mgpp/text/Terms.h

    r927 r1124  
    5858  UCArray term; // unstemmed term
    5959  int stemMethod;
    60   unsigned long matchDocs; // tf for level
    61 
     60  unsigned long matchDocs; // tf for level - num levels
     61               // containing this term
     62  unsigned long termFreq;  // overall term freq - num words that
     63                // are this term
    6264  void Clear ();
    6365  TermFreqData () { Clear (); }
Note: See TracChangeset for help on using the changeset viewer.