Changeset 29629 for main/trunk/greenstone2/commonsrc
 Timestamp:
 15.12.2014 09:57:01 (5 years ago)
 Files:

 1 modified
Legend:
 Unmodified
 Added
 Removed

main/trunk/greenstone2/commonsrc/indexers/mgpp/text/Terms.cpp
r29581 r29629 582 582 583 583 // log (N / ft) 584 float wordLog =585 log((double)indexData.levels.levelInfo[indexData.curLevel].numEntries/586 (double)termData.matchDocs);584 //float wordLog = 585 // log((double)indexData.levels.levelInfo[indexData.curLevel].numEntries/ 586 // (double)termData.matchDocs); 587 587 588 588 // Wqt = fqt * log (N / ft) 589 589 // note: terms are allowed to have a weight of zero so 590 590 // they can be excluded from the ranking 591 float Wqt = termWeight * wordLog;591 //float Wqt = termWeight * wordLog; 592 592 593 593 // Wdt = fdt * log (N / ft) … … 605 605 mg_u_long resultOutI = 0; 606 606 607 mg_u_long actual_num_ term_match_docs = 0;608 607 mg_u_long actual_num_match_docs = 0; 608 vector<mg_u_long> docFreqsArray; 609 609 while (termDataI < termDataSize) { 610 610 if (indexData.levelConverter.FragToLevel (termData.fragNums[termDataI], … … 612 612 if (levelDocNum != lastLevelDocNum) { 613 613 if (lastLevelDocNum > 0) { 614 ++actual_num_ term_match_docs;615 616 Wdt = termDocFreq * wordLog;614 ++actual_num_match_docs; 615 616 //Wdt = termDocFreq * wordLog; 617 617 618 618 // find this document number … … 624 624 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) { 625 625 result.docs[resultOutI] = lastLevelDocNum; 626 if (needRanks) 627 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt; 626 if (needRanks) { 627 // store the doc freq so we can calulate the rank for the new term 628 // once we know the num docs 629 docFreqsArray.push_back(termDocFreq); 630 // just store the old rank for now, and we'll add on the new bit at the end 631 result.ranks[resultOutI] = result.ranks[resultI]; // + Wqt * Wdt; 632 } 628 633 ++resultI; 629 634 ++resultOutI; … … 635 640 } 636 641 637 if (needRanks) 642 if (needRanks) { 638 643 termDocFreq += termData.fragFreqs[termDataI]; 639 overallwordfreq += termData.fragFreqs[termDataI]; 644 } 645 overallwordfreq += termData.fragFreqs[termDataI]; 640 646 } 641 647 … … 644 650 645 651 if (lastLevelDocNum > 0) { 646 ++actual_num_ term_match_docs;652 ++actual_num_match_docs; 647 653 // add the last document information 648 Wdt = termDocFreq * wordLog;654 //Wdt = termDocFreq * wordLog; 649 655 650 656 // find this document number … … 656 662 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) { 657 663 result.docs[resultOutI] = lastLevelDocNum; 658 if (needRanks) 659 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt; 664 if (needRanks) { 665 // store the doc freq so we can calulate the rank for the new term 666 // once we know the num docs 667 docFreqsArray.push_back(termDocFreq); 668 // just store the old rank for now, and we'll add on the new bit at the end 669 result.ranks[resultOutI] = result.ranks[resultI]; // + Wqt * Wdt; 670 } 660 671 ++resultI; 661 672 ++resultOutI; … … 669 680 else 670 681 result.ranks.erase (result.ranks.begin(), result.ranks.end()); 682 683 // Calcalate correct ranks 684 float wordLog = log((double)indexData.levels.levelInfo[indexData.curLevel].numEntries / (double)actual_num_match_docs); 685 float Wqt = termWeight * wordLog; 686 float factor = wordLog * Wqt; 687 688 mg_u_long docFreqI = 0; 689 mg_u_long docFreqSize = docFreqsArray.size(); 690 691 while (docFreqI < docFreqSize) { 692 result.ranks[docFreqI] = result.ranks[docFreqI] + docFreqsArray[docFreqI]*factor; 693 ++docFreqI; 694 } 671 695 672 696 // add the term frequency information … … 678 702 termFreqData.equivTerms = equivTerms; 679 703 //termFreqData.matchDocs = termData.matchDocs; 680 termFreqData.matchDocs = actual_num_ term_match_docs;704 termFreqData.matchDocs = actual_num_match_docs; 681 705 termFreqData.termFreq = overallwordfreq; 682 706 result.termFreqs.push_back (termFreqData);