Changeset 16912

Show
Ignore:
Timestamp:
20.08.2008 15:06:13 (11 years ago)
Author:
mdewsnip
Message:

Changes made by Richard Managh at DL Consulting Ltd for returning document-level term frequency totals, slightly modified to work with the latest Greenstone.

Location:
indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r16583 r16912  
    141141        HashSet terms = new HashSet(); 
    142142        query.extractTerms(terms); 
     143 
     144        HashMap doc_term_freq_map = new HashMap(); 
    143145         
    144146        Iterator iter = terms.iterator(); 
     
    149151        // Get the term frequency over all the documents 
    150152        TermDocs term_docs = reader.termDocs(term); 
    151         int term_freq = term_docs.freq(); 
     153        int term_freq = 0; 
    152154        int match_docs = 0; 
    153         if (term_freq != 0) match_docs++; 
    154         while (term_docs.next()) { 
    155             term_freq += term_docs.freq(); 
    156             if (term_docs.freq()!= 0) { 
     155        while (term_docs.next()) 
     156        { 
     157            if (term_docs.freq() != 0) 
     158            { 
     159            term_freq += term_docs.freq(); 
    157160            match_docs++; 
     161 
     162            // Calculate the document-level term frequency as well 
     163            Integer lucene_doc_num_obj = new Integer(term_docs.doc()); 
     164            int doc_term_freq = 0; 
     165                        if (doc_term_freq_map.containsKey(lucene_doc_num_obj)) 
     166            { 
     167                doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue(); 
     168            } 
     169            doc_term_freq += term_docs.freq(); 
     170 
     171            doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq)); 
    158172            } 
    159173        } 
     
    186200 
    187201        for (int i = start_results; i <= hits.length(); i++) { 
     202            int lucene_doc_num = hits.id(i - 1); 
    188203            Document doc = hits.doc(i - 1); 
    189             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1)); 
     204            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue(); 
     205            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1), doc_term_freq); 
    190206        } 
    191207        } 
     
    202218        // Output the matching documents 
    203219        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) { 
    204             Document doc = reader.document(hits.scoreDocs[i - 1].doc); 
    205             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score); 
     220            int lucene_doc_num = hits.scoreDocs[i - 1].doc; 
     221            Document doc = reader.document(lucene_doc_num); 
     222            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue(); 
     223            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score, doc_term_freq); 
    206224        } 
    207225        } 
  • indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/LuceneQueryResult.java

    r16583 r16912  
    110110    } 
    111111 
    112     public void addDoc(String id, float rank) 
     112    public void addDoc(String id, float rank, int termfreq) 
    113113    { 
    114     docs_.add(new DocInfo(id, rank)); 
     114    docs_.add(new DocInfo(id, rank, termfreq)); 
    115115    } 
    116116     
     
    225225    public String id_ = ""; 
    226226    public float rank_ = 0; 
    227  
    228     public DocInfo (String id, float rank) 
     227    public int termfreq_ = 0; 
     228 
     229    public DocInfo (String id, float rank, int termfreq) 
    229230    { 
    230231        id_ = id; 
    231232        rank_ = rank; 
     233        termfreq_ = termfreq; 
    232234    } 
    233235 
    234236    public String toString() 
    235237    { 
    236         return "" + id_ + " (" + rank_ + ")"; 
     238        return "" + id_ + " (" + rank_ + ") (" + termfreq_ + ")"; 
    237239    } 
    238240 
    239241    public String toXMLString() 
    240242    { 
    241         return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" />"; 
     243        return "<Match id=\"" + id_ + "\" rank=\"" + rank_ + "\" termfreq=\"" + termfreq_ + "\" />"; 
    242244    } 
    243245    }