Ignore:
Timestamp:
2008-08-20T15:06:13+12:00 (16 years ago)
Author:
mdewsnip
Message:

Changes made by Richard Managh at DL Consulting Ltd for returning document-level term frequency totals, slightly modified to work with the latest Greenstone.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r16583 r16912  
    141141        HashSet terms = new HashSet();
    142142        query.extractTerms(terms);
     143
     144        HashMap doc_term_freq_map = new HashMap();
    143145       
    144146        Iterator iter = terms.iterator();
     
    149151        // Get the term frequency over all the documents
    150152        TermDocs term_docs = reader.termDocs(term);
    151         int term_freq = term_docs.freq();
     153        int term_freq = 0;
    152154        int match_docs = 0;
    153         if (term_freq != 0) match_docs++;
    154         while (term_docs.next()) {
    155             term_freq += term_docs.freq();
    156             if (term_docs.freq()!= 0) {
     155        while (term_docs.next())
     156        {
     157            if (term_docs.freq() != 0)
     158            {
     159            term_freq += term_docs.freq();
    157160            match_docs++;
     161
     162            // Calculate the document-level term frequency as well
     163            Integer lucene_doc_num_obj = new Integer(term_docs.doc());
     164            int doc_term_freq = 0;
     165                        if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
     166            {
     167                doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
     168            }
     169            doc_term_freq += term_docs.freq();
     170
     171            doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
    158172            }
    159173        }
     
    186200
    187201        for (int i = start_results; i <= hits.length(); i++) {
     202            int lucene_doc_num = hits.id(i - 1);
    188203            Document doc = hits.doc(i - 1);
    189             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1));
     204            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
     205            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.score(i-1), doc_term_freq);
    190206        }
    191207        }
     
    202218        // Output the matching documents
    203219        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
    204             Document doc = reader.document(hits.scoreDocs[i - 1].doc);
    205             lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score);
     220            int lucene_doc_num = hits.scoreDocs[i - 1].doc;
     221            Document doc = reader.document(lucene_doc_num);
     222            int doc_term_freq = ((Integer) doc_term_freq_map.get(new Integer(lucene_doc_num))).intValue();
     223            lucene_query_result.addDoc(doc.get("nodeID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
    206224        }
    207225        }
Note: See TracChangeset for help on using the changeset viewer.