Changeset 16912 for indexers/trunk


Ignore:
Timestamp:
2008-08-20T15:06:13+12:00 (16 years ago)
Author:
mdewsnip
Message:

Changes made by Richard Managh at DL Consulting Ltd for returning document-level term frequency totals, slightly modified to work with the latest Greenstone.

Location:
indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r16583 r16912  
    141141        HashSet terms = new HashSet();
    142142        query.extractTerms(terms);
     143
     144        HashMap doc_term_freq_map = new HashMap();
    143145       
    144146        Iterator iter = terms.iterator();
     
    149151        // Get the term frequency over all the documents
    150152        TermDocs term_docs = reader.termDocs(term);
    151         int term_freq = term_docs.freq();
     153        int term_freq = 0;
    152154        int match_docs = 0;
    153         if (term_freq != 0) match_docs++;
    154         while (term_docs.next()) {
    155             term_freq += term_docs.freq();
    156             if (term_docs.freq()!= 0) {
     155        while (term_docs.next())
     156        {
     157            if (term_docs.freq() != 0)
     158            {
     159            term_freq += term_docs.freq();
    157160            match_docs++;
     161
     162            // Calculate the document-level term frequency as well
     163            Integer lucene_doc_num_obj = new Integer(term_docs.doc());
     164            int doc_term_freq = 0;
     165                        if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
     166            {
     167                doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
     168            }
     169            doc_term_freq += term_docs.freq();
     170
     171            doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
    158172            }
    159173        }
     
    186200
    187201        for (int i = start_results; i <= hits.length(); i++) {
     202            int lucene_doc_num = hits.id(i - 1);
    188203            Document doc = hits.doc(i - 1);
    189             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1));
     204            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
     205            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1), doc_term_freq);
    190206        }
    191207        }
     
    202218        // Output the matching documents
    203219        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
    204             Document doc = reader.document(hits.scoreDocs[i - 1].doc);
    205             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score);
     220            int lucene_doc_num = hits.scoreDocs[i - 1].doc;
     221            Document doc = reader.document(lucene_doc_num);
     222            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
     223            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
    206224        }
    207225        }
  • indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r16583 r16912  
    110110    }
    111111
    112     public void addDoc(String id, float rank)
     112    public void addDoc(String id, float rank, int termfreq)
    113113    {
    114     docs_.add(new DocInfo(id, rank));
     114    docs_.add(new DocInfo(id, rank, termfreq));
    115115    }
    116116   
     
    225225    public String id_ = "";
    226226    public float rank_ = 0;
    227 
    228     public DocInfo (String id, float rank)
     227    public int termfreq_ = 0;
     228
     229    public DocInfo (String id, float rank, int termfreq)
    229230    {
    230231        id_ = id;
    231232        rank_ = rank;
     233        termfreq_ = termfreq;
    232234    }
    233235
    234236    public String toString()
    235237    {
    236         return "" + id_ + " (" + rank_ + ")";
     238        return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")";
    237239    }
    238240
    239241    public String toXMLString()
    240242    {
    241         return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" />";
     243        return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />";
    242244    }
    243245    }
Note: See TracChangeset for help on using the changeset viewer.