Changeset 12976 for trunk/indexers


Ignore:
Timestamp:
2006-10-03T14:22:03+13:00 (18 years ago)
Author:
mdewsnip
Message:

Rearranged some code to make the fact that the term information is now independent of the search results clearer.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r12975 r12976  
    125125            query = query.rewrite(reader);
    126126
     127            // Return the list of expanded query terms and their frequencies
     128            HashSet terms = new HashSet();
     129            query.extractTerms(terms);
     130            Iterator term_iterator = terms.iterator();
     131            System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
     132            while (term_iterator.hasNext()) {
     133            Term term = (Term) term_iterator.next();
     134
     135            // Get the term frequency over all the documents
     136            TermDocs term_docs = reader.termDocs(term);
     137            int term_freq = term_docs.freq();
     138            while (term_docs.next()) {
     139                term_freq += term_docs.freq();
     140            }
     141
     142            // If you wanted to limit this to just text terms add
     143            // something like this:
     144            // if (term.field().equals(TEXTFIELD))
     145            System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
     146            }
     147           
     148            // Return the list of stop words removed from the query
     149            HashSet terms_including_stop_words = new HashSet();
     150            query_including_stop_words.extractTerms(terms_including_stop_words);
     151            Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
     152            while (terms_including_stop_words_iter.hasNext()) {
     153            Term term = (Term) terms_including_stop_words_iter.next();
     154            if (!terms.contains(term)) {
     155                System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
     156            }
     157            }
     158
    127159            // Perform the query (filter and sorter may be null)
    128160            Hits hits = searcher.search(query, filter, sorter);
     
    152184
    153185            ++counter;
    154             }
    155 
    156             // Return the list of expanded query terms and their frequencies
    157             HashSet terms = new HashSet();
    158             query.extractTerms(terms);
    159             Iterator term_iterator = terms.iterator();
    160             System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
    161             while (term_iterator.hasNext()) {
    162             Term term = (Term) term_iterator.next();
    163 
    164             // Get the term frequency over all the documents
    165             TermDocs term_docs = reader.termDocs(term);
    166             int term_freq = term_docs.freq();
    167             while (term_docs.next()) {
    168                 term_freq += term_docs.freq();
    169             }
    170 
    171             // If you wanted to limit this to just text terms add
    172             // something like this:
    173             // if (term.field().equals(TEXTFIELD))
    174             System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
    175             }
    176            
    177             // Return the list of stop words removed from the query
    178             HashSet terms_including_stop_words = new HashSet();
    179             query_including_stop_words.extractTerms(terms_including_stop_words);
    180             Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
    181             while (terms_including_stop_words_iter.hasNext()) {
    182             Term term = (Term) terms_including_stop_words_iter.next();
    183             if (!terms.contains(term)) {
    184                 System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
    185             }
    186186            }
    187187        }
Note: See TracChangeset for help on using the changeset viewer.