Context Navigation

← Previous Changeset
Next Changeset →

Changeset 12983

Timestamp:

2006-10-03T16:55:27+13:00 (18 years ago)

Author:

mdewsnip

Message:

Moved the stuff for running the query into a new runQuery function, in preparation for allowing the query string to be specified as a command-line argument.

Location:

trunk

Files:

: 3 edited

gsdl/bin/java/LuceneWrap.jar (modified) ( previous)
gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java (modified) (2 diffs)
indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

-              r12981
+              r12983
             break;
+        }
+        System.out.println("<ResultSet>");
+        System.out.println("  <QueryString>" + xmlSafe(query_string) + "</QueryString>");
+        if (filter != null) {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+        }
+        try {
+            Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
+            query_including_stop_words = query_including_stop_words.rewrite(reader);
+            Query query = parseQuery(reader, query_parser, query_string, fuzziness);
+            query = query.rewrite(reader);
+            // Return the list of expanded query terms and their frequencies
+            HashSet terms = new HashSet();
+            query.extractTerms(terms);
+            Iterator term_iterator = terms.iterator();
+            System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
+            while (term_iterator.hasNext()) {
+            Term term = (Term) term_iterator.next();
+            // Get the term frequency over all the documents
+            TermDocs term_docs = reader.termDocs(term);
+            int term_freq = term_docs.freq();
+            while (term_docs.next()) {
+                term_freq += term_docs.freq();
+            }
+            // If you wanted to limit this to just text terms add
+            // something like this:
+            // if (term.field().equals(TEXTFIELD))
+            System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
+            }
+            // Return the list of stop words removed from the query
+            HashSet terms_including_stop_words = new HashSet();
+            query_including_stop_words.extractTerms(terms_including_stop_words);
+            Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
+            while (terms_including_stop_words_iter.hasNext()) {
+            Term term = (Term) terms_including_stop_words_iter.next();
+            if (!terms.contains(term)) {
+                System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
+            }
+            }
+            // Simple case for getting all the matching documents
+            if (end_results == Integer.MAX_VALUE) {
+            // Perform the query (filter and sorter may be null)
+            Hits hits = searcher.search(query, filter, sorter);
+            System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
+            // Output the matching documents
+            System.out.println("  <StartResults num=\"" + start_results + "\" />");
+            System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
+            for (int i = start_results; i <= hits.length(); i++) {
+                Document doc = hits.doc(i - 1);
+                System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+            }
+            }
+            // Slightly more complicated case for returning a subset of the matching documents
+            else {
+            // Perform the query (filter may be null)
+            TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
+            System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
+            // Output the matching documents
+            System.out.println("  <StartResults num=\"" + start_results + "\" />");
+            System.out.println("  <EndsResults num=\"" + end_results + "\" />");
+            for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
+                Document doc = reader.document(hits.scoreDocs[i - 1].doc);
+                System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+            }
+            }
+        }
+        catch (ParseException parse_exception) {
+            System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
+        }
+        catch (TooManyClauses too_many_clauses_exception) {
+            System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
+        }
+        System.out.println("</ResultSet>");
+        }
+        searcher.close();
+        runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string);
+        }
+    }
     catch (IOException exception) {
 …
+    }
+    }
+    private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string)
+    throws IOException
+    {
+    try {
+        Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
+        query_including_stop_words = query_including_stop_words.rewrite(reader);
+        Query query = parseQuery(reader, query_parser, query_string, fuzziness);
+        query = query.rewrite(reader);
+        // Return the list of expanded query terms and their frequencies
+        HashSet terms = new HashSet();
+        query.extractTerms(terms);
+        Iterator term_iterator = terms.iterator();
+        System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
+        while (term_iterator.hasNext()) {
+        Term term = (Term) term_iterator.next();
+        // Get the term frequency over all the documents
+        TermDocs term_docs = reader.termDocs(term);
+        int term_freq = term_docs.freq();
+        while (term_docs.next()) {
+            term_freq += term_docs.freq();
+        }
+        // If you wanted to limit this to just text terms add
+        // something like this:
+        // if (term.field().equals(TEXTFIELD))
+        System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
+        }
+        // Return the list of stop words removed from the query
+        HashSet terms_including_stop_words = new HashSet();
+        query_including_stop_words.extractTerms(terms_including_stop_words);
+        Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
+        while (terms_including_stop_words_iter.hasNext()) {
+        Term term = (Term) terms_including_stop_words_iter.next();
+        if (!terms.contains(term)) {
+            System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
+        }
+        }
+        // Simple case for getting all the matching documents
+        if (end_results == Integer.MAX_VALUE) {
+        // Perform the query (filter and sorter may be null)
+        Hits hits = searcher.search(query, filter, sorter);
+        System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
+        // Output the matching documents
+        System.out.println("  <StartResults num=\"" + start_results + "\" />");
+        System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
+        for (int i = start_results; i <= hits.length(); i++) {
+            Document doc = hits.doc(i - 1);
+            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+        }
+        }
+        // Slightly more complicated case for returning a subset of the matching documents
+        else {
+        // Perform the query (filter may be null)
+        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
+        System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
+        // Output the matching documents
+        System.out.println("  <StartResults num=\"" + start_results + "\" />");
+        System.out.println("  <EndsResults num=\"" + end_results + "\" />");
+        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
+            Document doc = reader.document(hits.scoreDocs[i - 1].doc);
+            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+        }
+        }
+    }
+    catch (ParseException parse_exception) {
+        System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
+    }
+    catch (TooManyClauses too_many_clauses_exception) {
+        System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
+    }
+    System.out.println("</ResultSet>");
+    }
     private static String xmlSafe(String text) {

trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

-              r12981
+              r12983
             break;
+        }
+        System.out.println("<ResultSet>");
+        System.out.println("  <QueryString>" + xmlSafe(query_string) + "</QueryString>");
+        if (filter != null) {
+            System.out.println("  <FilterString>" + filter.toString() + "</FilterString>");
+        }
+        try {
+            Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
+            query_including_stop_words = query_including_stop_words.rewrite(reader);
+            Query query = parseQuery(reader, query_parser, query_string, fuzziness);
+            query = query.rewrite(reader);
+            // Return the list of expanded query terms and their frequencies
+            HashSet terms = new HashSet();
+            query.extractTerms(terms);
+            Iterator term_iterator = terms.iterator();
+            System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
+            while (term_iterator.hasNext()) {
+            Term term = (Term) term_iterator.next();
+            // Get the term frequency over all the documents
+            TermDocs term_docs = reader.termDocs(term);
+            int term_freq = term_docs.freq();
+            while (term_docs.next()) {
+                term_freq += term_docs.freq();
+            }
+            // If you wanted to limit this to just text terms add
+            // something like this:
+            // if (term.field().equals(TEXTFIELD))
+            System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
+            }
+            // Return the list of stop words removed from the query
+            HashSet terms_including_stop_words = new HashSet();
+            query_including_stop_words.extractTerms(terms_including_stop_words);
+            Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
+            while (terms_including_stop_words_iter.hasNext()) {
+            Term term = (Term) terms_including_stop_words_iter.next();
+            if (!terms.contains(term)) {
+                System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
+            }
+            }
+            // Simple case for getting all the matching documents
+            if (end_results == Integer.MAX_VALUE) {
+            // Perform the query (filter and sorter may be null)
+            Hits hits = searcher.search(query, filter, sorter);
+            System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
+            // Output the matching documents
+            System.out.println("  <StartResults num=\"" + start_results + "\" />");
+            System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
+            for (int i = start_results; i <= hits.length(); i++) {
+                Document doc = hits.doc(i - 1);
+                System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+            }
+            }
+            // Slightly more complicated case for returning a subset of the matching documents
+            else {
+            // Perform the query (filter may be null)
+            TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
+            System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
+            // Output the matching documents
+            System.out.println("  <StartResults num=\"" + start_results + "\" />");
+            System.out.println("  <EndsResults num=\"" + end_results + "\" />");
+            for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
+                Document doc = reader.document(hits.scoreDocs[i - 1].doc);
+                System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+            }
+            }
+        }
+        catch (ParseException parse_exception) {
+            System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
+        }
+        catch (TooManyClauses too_many_clauses_exception) {
+            System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
+        }
+        System.out.println("</ResultSet>");
+        }
+        searcher.close();
+        runQuery(searcher, reader, query_parser, query_parser_no_stop_words, query_string);
+        }
+    }
     catch (IOException exception) {
 …
+    }
+    }
+    private static void runQuery(Searcher searcher, IndexReader reader, QueryParser query_parser, QueryParser query_parser_no_stop_words, String query_string)
+    throws IOException
+    {
+    try {
+        Query query_including_stop_words = parseQuery(reader, query_parser_no_stop_words, query_string, fuzziness);
+        query_including_stop_words = query_including_stop_words.rewrite(reader);
+        Query query = parseQuery(reader, query_parser, query_string, fuzziness);
+        query = query.rewrite(reader);
+        // Return the list of expanded query terms and their frequencies
+        HashSet terms = new HashSet();
+        query.extractTerms(terms);
+        Iterator term_iterator = terms.iterator();
+        System.out.println("  <QueryTermsInfo num=\"" + terms.size() + "\"/>");
+        while (term_iterator.hasNext()) {
+        Term term = (Term) term_iterator.next();
+        // Get the term frequency over all the documents
+        TermDocs term_docs = reader.termDocs(term);
+        int term_freq = term_docs.freq();
+        while (term_docs.next()) {
+            term_freq += term_docs.freq();
+        }
+        // If you wanted to limit this to just text terms add
+        // something like this:
+        // if (term.field().equals(TEXTFIELD))
+        System.out.println("  <Term value=\"" + term.text() + "\" field=\"" + term.field() + "\" freq=\"" + term_freq + "\" />");
+        }
+        // Return the list of stop words removed from the query
+        HashSet terms_including_stop_words = new HashSet();
+        query_including_stop_words.extractTerms(terms_including_stop_words);
+        Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
+        while (terms_including_stop_words_iter.hasNext()) {
+        Term term = (Term) terms_including_stop_words_iter.next();
+        if (!terms.contains(term)) {
+            System.out.println("  <StopWord value=\"" + term.text() + "\"/>");
+        }
+        }
+        // Simple case for getting all the matching documents
+        if (end_results == Integer.MAX_VALUE) {
+        // Perform the query (filter and sorter may be null)
+        Hits hits = searcher.search(query, filter, sorter);
+        System.out.println("  <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
+        // Output the matching documents
+        System.out.println("  <StartResults num=\"" + start_results + "\" />");
+        System.out.println("  <EndsResults num=\"" + hits.length() + "\" />");
+        for (int i = start_results; i <= hits.length(); i++) {
+            Document doc = hits.doc(i - 1);
+            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+        }
+        }
+        // Slightly more complicated case for returning a subset of the matching documents
+        else {
+        // Perform the query (filter may be null)
+        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
+        System.out.println("  <MatchingDocsInfo num=\"" + hits.totalHits + "\"/>");
+        // Output the matching documents
+        System.out.println("  <StartResults num=\"" + start_results + "\" />");
+        System.out.println("  <EndsResults num=\"" + end_results + "\" />");
+        for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
+            Document doc = reader.document(hits.scoreDocs[i - 1].doc);
+            System.out.println("  <Match id=\"" + doc.get("nodeID") + "\" />");
+        }
+        }
+    }
+    catch (ParseException parse_exception) {
+        System.out.println("  <Error type=\"PARSE_EXCEPTION\"/>");
+    }
+    catch (TooManyClauses too_many_clauses_exception) {
+        System.out.println("  <Error type=\"TOO_MANY_CLAUSES\"/>");
+    }
+    System.out.println("</ResultSet>");
+    }
     private static String xmlSafe(String text) {

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 12983

Legend:

trunk/gsdl/src/java/org/nzdl/gsdl/LuceneWrap/GS2LuceneQuery.java

trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

Download in other formats: