Changeset 12983


Ignore:
Timestamp:
2006-10-03T16:55:27+13:00 (18 years ago)
Author:
mdewsnip
Message:

Moved the stuff for running the query into a new runQuery function, in preparation for allowing the query string to be specified as a command-line argument.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

    r12981 r12983  
    111111            break;
    112112        }
    113         System.out.println("<ResultSet>");
    114         System.out.println("  <QueryString>" + xmlSafe(query_string) + "</QueryString>");
    115         if (filter != null) {
    116             System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
    117         }
    118 
    119         try {
    120             Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
    121             query_including_stop_words = query_including_stop_words.rewrite(reader);
    122 
    123             Query query = parseQuery(reader, query_parser, query_string, fuzziness);
    124             query = query.rewrite(reader);
    125 
    126             // Return the list of expanded query terms and their frequencies
    127             HashSet terms = new HashSet();
    128             query.extractTerms(terms);
    129             Iterator term_iterator = terms.iterator();
    130             System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
    131             while (term_iterator.hasNext()) {
    132             Term term = (Term) term_iterator.next();
    133 
    134             // Get the term frequency over all the documents
    135             TermDocs term_docs = reader.termDocs(term);
    136             int term_freq = term_docs.freq();
    137             while (term_docs.next()) {
    138                 term_freq += term_docs.freq();
    139             }
    140 
    141             // If you wanted to limit this to just text terms add
    142             // something like this:
    143             // if (term.field().equals(TEXTFIELD))
    144             System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
    145             }
    146            
    147             // Return the list of stop words removed from the query
    148             HashSet terms_including_stop_words = new HashSet();
    149             query_including_stop_words.extractTerms(terms_including_stop_words);
    150             Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
    151             while (terms_including_stop_words_iter.hasNext()) {
    152             Term term = (Term) terms_including_stop_words_iter.next();
    153             if (!terms.contains(term)) {
    154                 System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
    155             }
    156             }
    157 
    158             // Simple case for getting all the matching documents
    159             if (end_results == Integer.MAX_VALUE) {
    160             // Perform the query (filter and sorter may be null)
    161             Hits hits = searcher.search(query, filter, sorter);
    162             System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
    163 
    164             // Output the matching documents
    165             System.out.println("  <StartResults num=\"" + start_results + "\" />");
    166             System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
    167             for (int i = start_results; i <= hits.length(); i++) {
    168                 Document doc = hits.doc(i - 1);
    169                 System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
    170             }
    171             }
    172 
    173             // Slightly more complicated case for returning a subset of the matching documents
    174             else {
    175             // Perform the query (filter may be null)
    176             TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
    177             System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
    178 
    179             // Output the matching documents
    180             System.out.println("  <StartResults num=\"" + start_results + "\" />");
    181             System.out.println("  <EndsResults num=\"" + end_results + "\" />");
    182             for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
    183                 Document doc = reader.document(hits.scoreDocs[i - 1].doc);
    184                 System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
    185             }
    186             }
    187         }
    188         catch (ParseException parse_exception) {
    189             System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
    190         }
    191         catch (TooManyClauses too_many_clauses_exception) {
    192             System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
    193         }
    194 
    195         System.out.println("</ResultSet>");
    196         }
    197        
    198         searcher.close();
     113
     114        runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string);
     115        }
    199116    }
    200117    catch (IOException exception) {
     
    202119    }
    203120    }
     121
     122
     123    private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string)
     124    throws IOException
     125    {
     126    try {
     127        Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
     128        query_including_stop_words = query_including_stop_words.rewrite(reader);
     129
     130        Query query = parseQuery(reader, query_parser, query_string, fuzziness);
     131        query = query.rewrite(reader);
     132
     133        // Return the list of expanded query terms and their frequencies
     134        HashSet terms = new HashSet();
     135        query.extractTerms(terms);
     136        Iterator term_iterator = terms.iterator();
     137        System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
     138        while (term_iterator.hasNext()) {
     139        Term term = (Term) term_iterator.next();
     140
     141        // Get the term frequency over all the documents
     142        TermDocs term_docs = reader.termDocs(term);
     143        int term_freq = term_docs.freq();
     144        while (term_docs.next()) {
     145            term_freq += term_docs.freq();
     146        }
     147
     148        // If you wanted to limit this to just text terms add
     149        // something like this:
     150        // if (term.field().equals(TEXTFIELD))
     151        System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
     152        }
     153
     154        // Return the list of stop words removed from the query
     155        HashSet terms_including_stop_words = new HashSet();
     156        query_including_stop_words.extractTerms(terms_including_stop_words);
     157        Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
     158        while (terms_including_stop_words_iter.hasNext()) {
     159        Term term = (Term) terms_including_stop_words_iter.next();
     160        if (!terms.contains(term)) {
     161            System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
     162        }
     163        }
     164
     165        // Simple case for getting all the matching documents
     166        if (end_results == Integer.MAX_VALUE) {
     167        // Perform the query (filter and sorter may be null)
     168        Hits hits = searcher.search(query, filter, sorter);
     169        System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
     170
     171        // Output the matching documents
     172        System.out.println("  <StartResults num=\"" + start_results + "\" />");
     173        System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
     174        for (int i = start_results; i <= hits.length(); i++) {
     175            Document doc = hits.doc(i - 1);
     176            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
     177        }
     178        }
     179
     180        // Slightly more complicated case for returning a subset of the matching documents
     181        else {
     182        // Perform the query (filter may be null)
     183        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
     184        System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
     185
     186        // Output the matching documents
     187        System.out.println("  <StartResults num=\"" + start_results + "\" />");
     188        System.out.println("  <EndsResults num=\"" + end_results + "\" />");
     189        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
     190            Document doc = reader.document(hits.scoreDocs[i - 1].doc);
     191            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
     192        }
     193        }
     194    }
     195    catch (ParseException parse_exception) {
     196        System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
     197    }
     198    catch (TooManyClauses too_many_clauses_exception) {
     199        System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
     200    }
     201
     202    System.out.println("</ResultSet>");
     203    }
     204
    204205
    205206    private static String xmlSafe(String text) {
  • trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r12981 r12983  
    111111            break;
    112112        }
    113         System.out.println("<ResultSet>");
    114         System.out.println("  <QueryString>" + xmlSafe(query_string) + "</QueryString>");
    115         if (filter != null) {
    116             System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
    117         }
    118 
    119         try {
    120             Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
    121             query_including_stop_words = query_including_stop_words.rewrite(reader);
    122 
    123             Query query = parseQuery(reader, query_parser, query_string, fuzziness);
    124             query = query.rewrite(reader);
    125 
    126             // Return the list of expanded query terms and their frequencies
    127             HashSet terms = new HashSet();
    128             query.extractTerms(terms);
    129             Iterator term_iterator = terms.iterator();
    130             System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
    131             while (term_iterator.hasNext()) {
    132             Term term = (Term) term_iterator.next();
    133 
    134             // Get the term frequency over all the documents
    135             TermDocs term_docs = reader.termDocs(term);
    136             int term_freq = term_docs.freq();
    137             while (term_docs.next()) {
    138                 term_freq += term_docs.freq();
    139             }
    140 
    141             // If you wanted to limit this to just text terms add
    142             // something like this:
    143             // if (term.field().equals(TEXTFIELD))
    144             System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
    145             }
    146            
    147             // Return the list of stop words removed from the query
    148             HashSet terms_including_stop_words = new HashSet();
    149             query_including_stop_words.extractTerms(terms_including_stop_words);
    150             Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
    151             while (terms_including_stop_words_iter.hasNext()) {
    152             Term term = (Term) terms_including_stop_words_iter.next();
    153             if (!terms.contains(term)) {
    154                 System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
    155             }
    156             }
    157 
    158             // Simple case for getting all the matching documents
    159             if (end_results == Integer.MAX_VALUE) {
    160             // Perform the query (filter and sorter may be null)
    161             Hits hits = searcher.search(query, filter, sorter);
    162             System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
    163 
    164             // Output the matching documents
    165             System.out.println("  <StartResults num=\"" + start_results + "\" />");
    166             System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
    167             for (int i = start_results; i <= hits.length(); i++) {
    168                 Document doc = hits.doc(i - 1);
    169                 System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
    170             }
    171             }
    172 
    173             // Slightly more complicated case for returning a subset of the matching documents
    174             else {
    175             // Perform the query (filter may be null)
    176             TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
    177             System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
    178 
    179             // Output the matching documents
    180             System.out.println("  <StartResults num=\"" + start_results + "\" />");
    181             System.out.println("  <EndsResults num=\"" + end_results + "\" />");
    182             for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
    183                 Document doc = reader.document(hits.scoreDocs[i - 1].doc);
    184                 System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
    185             }
    186             }
    187         }
    188         catch (ParseException parse_exception) {
    189             System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
    190         }
    191         catch (TooManyClauses too_many_clauses_exception) {
    192             System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
    193         }
    194 
    195         System.out.println("</ResultSet>");
    196         }
    197        
    198         searcher.close();
     113
     114        runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string);
     115        }
    199116    }
    200117    catch (IOException exception) {
     
    202119    }
    203120    }
     121
     122
     123    private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string)
     124    throws IOException
     125    {
     126    try {
     127        Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
     128        query_including_stop_words = query_including_stop_words.rewrite(reader);
     129
     130        Query query = parseQuery(reader, query_parser, query_string, fuzziness);
     131        query = query.rewrite(reader);
     132
     133        // Return the list of expanded query terms and their frequencies
     134        HashSet terms = new HashSet();
     135        query.extractTerms(terms);
     136        Iterator term_iterator = terms.iterator();
     137        System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
     138        while (term_iterator.hasNext()) {
     139        Term term = (Term) term_iterator.next();
     140
     141        // Get the term frequency over all the documents
     142        TermDocs term_docs = reader.termDocs(term);
     143        int term_freq = term_docs.freq();
     144        while (term_docs.next()) {
     145            term_freq += term_docs.freq();
     146        }
     147
     148        // If you wanted to limit this to just text terms add
     149        // something like this:
     150        // if (term.field().equals(TEXTFIELD))
     151        System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
     152        }
     153
     154        // Return the list of stop words removed from the query
     155        HashSet terms_including_stop_words = new HashSet();
     156        query_including_stop_words.extractTerms(terms_including_stop_words);
     157        Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
     158        while (terms_including_stop_words_iter.hasNext()) {
     159        Term term = (Term) terms_including_stop_words_iter.next();
     160        if (!terms.contains(term)) {
     161            System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
     162        }
     163        }
     164
     165        // Simple case for getting all the matching documents
     166        if (end_results == Integer.MAX_VALUE) {
     167        // Perform the query (filter and sorter may be null)
     168        Hits hits = searcher.search(query, filter, sorter);
     169        System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
     170
     171        // Output the matching documents
     172        System.out.println("  <StartResults num=\"" + start_results + "\" />");
     173        System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
     174        for (int i = start_results; i <= hits.length(); i++) {
     175            Document doc = hits.doc(i - 1);
     176            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
     177        }
     178        }
     179
     180        // Slightly more complicated case for returning a subset of the matching documents
     181        else {
     182        // Perform the query (filter may be null)
     183        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
     184        System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
     185
     186        // Output the matching documents
     187        System.out.println("  <StartResults num=\"" + start_results + "\" />");
     188        System.out.println("  <EndsResults num=\"" + end_results + "\" />");
     189        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
     190            Document doc = reader.document(hits.scoreDocs[i - 1].doc);
     191            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
     192        }
     193        }
     194    }
     195    catch (ParseException parse_exception) {
     196        System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
     197    }
     198    catch (TooManyClauses too_many_clauses_exception) {
     199        System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
     200    }
     201
     202    System.out.println("</ResultSet>");
     203    }
     204
    204205
    205206    private static String xmlSafe(String text) {
Note: See TracChangeset for help on using the changeset viewer.