source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java@ 12372

Last change on this file since 12372 was 12372, checked in by mdewsnip, 18 years ago

Now returns the stop words that have been removed from the query.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**
2 *
3 * @author [email protected]
4 * @author [email protected]
5 * @version
6 */
7
8package org.nzdl.gsdl.LuceneWrap;
9
10
11import java.io.BufferedReader;
12import java.io.InputStreamReader;
13import java.util.HashSet;
14import java.util.Iterator;
15
16import org.apache.lucene.analysis.Analyzer;
17import org.apache.lucene.analysis.standard.StandardAnalyzer;
18import org.apache.lucene.document.Document;
19import org.apache.lucene.index.IndexReader;
20import org.apache.lucene.index.Term;
21import org.apache.lucene.queryParser.QueryParser;
22import org.apache.lucene.search.Hits;
23import org.apache.lucene.search.IndexSearcher;
24import org.apache.lucene.search.Query;
25import org.apache.lucene.search.Searcher;
26import org.apache.lucene.search.Sort;
27
28
29public class GS2LuceneQuery
30{
31 public static void main (String args[])
32 {
33 if (args.length == 0) {
34 System.out.println("Usage: GS2LuceneQuery <index directory> (<sort field>)");
35 return;
36 }
37
38 try {
39 Searcher searcher = new IndexSearcher(args[0]);
40 IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
41
42 Sort sorter = new Sort();
43
44 // New code to allow the default conjunction operator to be
45 // definable
46 String default_conjuction_operator = "OR";
47 for (int i = 1; i < args.length; i++)
48 {
49 if (args[i].equals("-sort"))
50 {
51 i++;
52 sorter = new Sort(args[i]);
53 }
54 if (args[i].equals("-dco"))
55 {
56 i++;
57 default_conjuction_operator = args[i];
58 }
59 }
60
61 // Create one query parser with the standard set of stop words, and one with none
62 QueryParser query_parser = new QueryParser("TX", new StandardAnalyzer());
63 QueryParser query_parser_no_stop_words = new QueryParser("TX", new StandardAnalyzer(new String[] { }));
64
65 // Lucene does "OR" queries by default; do an "AND" query if specified
66 if (default_conjuction_operator.equals("AND")) {
67 query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
68 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
69 }
70
71 BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
72 while (true) {
73 // Read the query from STDIN
74 String query_string = in.readLine();
75 if (query_string == null || query_string.length() == -1) {
76 break;
77 }
78 System.err.println("**** query = " + query_string);
79
80 // Parse the query and rewrite it into individual terms (eg. for wildcard searches)
81 Query query = query_parser.parse(query_string);
82 query = query.rewrite(reader);
83 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
84 query_including_stop_words = query_including_stop_words.rewrite(reader);
85
86 // Perform the query
87 Hits hits = searcher.search(query, sorter);
88 System.out.println("<ResultSet>");
89 System.out.println(" <QueryString>" + query_string + "</QueryString>");
90
91 // Return the list of expanded query terms and their frequencies
92 HashSet terms = new HashSet();
93 query.extractTerms(terms);
94 System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>");
95 Iterator terms_iter = terms.iterator();
96 while (terms_iter.hasNext()) {
97 Term term = (Term) terms_iter.next();
98 System.out.println(" <Term value=\"" + term.text() + "\" freq=\"" + reader.docFreq(term) + "\" field=\"" + term.field() + "\"/>");
99 }
100
101 // Return the list of stop words removed from the query
102 HashSet terms_including_stop_words = new HashSet();
103 query_including_stop_words.extractTerms(terms_including_stop_words);
104 System.out.println(" <StopWordsInfo num=\"" + (terms_including_stop_words.size() - terms.size()) + "\"/>");
105 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
106 while (terms_including_stop_words_iter.hasNext()) {
107 Term term = (Term) terms_including_stop_words_iter.next();
108 if (!terms.contains(term)) {
109 System.err.println(" <StopWord value=\"" + term.text() + "\"/>");
110 }
111 }
112
113 // Return the matching documents
114 System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
115 for (int i = 0; i < hits.length(); i++) {
116 Document doc = hits.doc(i);
117 String node_id = doc.get("nodeID");
118 System.out.println(" <Match id=\"" + node_id + "\"/>");
119 }
120
121 System.out.println("</ResultSet>");
122 }
123
124 searcher.close();
125 }
126 catch (Exception exception) {
127 exception.printStackTrace();
128 }
129 }
130}
Note: See TracBrowser for help on using the repository browser.