1 | /**
|
---|
2 | *
|
---|
3 | * @author [email protected]
|
---|
4 | * @author [email protected]
|
---|
5 | * @version
|
---|
6 | */
|
---|
7 |
|
---|
8 | package org.nzdl.gsdl.LuceneWrap;
|
---|
9 |
|
---|
10 |
|
---|
11 | import java.io.BufferedReader;
|
---|
12 | import java.io.InputStreamReader;
|
---|
13 | import java.util.HashSet;
|
---|
14 | import java.util.Iterator;
|
---|
15 |
|
---|
16 | import org.apache.lucene.analysis.Analyzer;
|
---|
17 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
---|
18 | import org.apache.lucene.document.Document;
|
---|
19 | import org.apache.lucene.index.IndexReader;
|
---|
20 | import org.apache.lucene.index.Term;
|
---|
21 | import org.apache.lucene.queryParser.QueryParser;
|
---|
22 | import org.apache.lucene.search.Hits;
|
---|
23 | import org.apache.lucene.search.IndexSearcher;
|
---|
24 | import org.apache.lucene.search.Query;
|
---|
25 | import org.apache.lucene.search.Searcher;
|
---|
26 | import org.apache.lucene.search.Sort;
|
---|
27 |
|
---|
28 |
|
---|
29 | public class GS2LuceneQuery
|
---|
30 | {
|
---|
31 | public static void main (String args[])
|
---|
32 | {
|
---|
33 | if (args.length == 0) {
|
---|
34 | System.out.println("Usage: GS2LuceneQuery <index directory> (<sort field>)");
|
---|
35 | return;
|
---|
36 | }
|
---|
37 |
|
---|
38 | try {
|
---|
39 | Searcher searcher = new IndexSearcher(args[0]);
|
---|
40 | IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
|
---|
41 |
|
---|
42 | Sort sorter = new Sort();
|
---|
43 |
|
---|
44 | // New code to allow the default conjunction operator to be
|
---|
45 | // definable
|
---|
46 | String default_conjuction_operator = "OR";
|
---|
47 | for (int i = 1; i < args.length; i++)
|
---|
48 | {
|
---|
49 | if (args[i].equals("-sort"))
|
---|
50 | {
|
---|
51 | i++;
|
---|
52 | sorter = new Sort(args[i]);
|
---|
53 | }
|
---|
54 | if (args[i].equals("-dco"))
|
---|
55 | {
|
---|
56 | i++;
|
---|
57 | default_conjuction_operator = args[i];
|
---|
58 | }
|
---|
59 | }
|
---|
60 |
|
---|
61 | // Create one query parser with the standard set of stop words, and one with none
|
---|
62 | QueryParser query_parser = new QueryParser("TX", new StandardAnalyzer());
|
---|
63 | QueryParser query_parser_no_stop_words = new QueryParser("TX", new StandardAnalyzer(new String[] { }));
|
---|
64 |
|
---|
65 | // Lucene does "OR" queries by default; do an "AND" query if specified
|
---|
66 | if (default_conjuction_operator.equals("AND")) {
|
---|
67 | query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
|
---|
68 | query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
|
---|
69 | }
|
---|
70 |
|
---|
71 | BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
|
---|
72 | while (true) {
|
---|
73 | // Read the query from STDIN
|
---|
74 | String query_string = in.readLine();
|
---|
75 | if (query_string == null || query_string.length() == -1) {
|
---|
76 | break;
|
---|
77 | }
|
---|
78 | System.err.println("**** query = " + query_string);
|
---|
79 |
|
---|
80 | // Parse the query and rewrite it into individual terms (eg. for wildcard searches)
|
---|
81 | Query query = query_parser.parse(query_string);
|
---|
82 | query = query.rewrite(reader);
|
---|
83 | Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
|
---|
84 | query_including_stop_words = query_including_stop_words.rewrite(reader);
|
---|
85 |
|
---|
86 | // Perform the query
|
---|
87 | Hits hits = searcher.search(query, sorter);
|
---|
88 | System.out.println("<ResultSet>");
|
---|
89 | System.out.println(" <QueryString>" + query_string + "</QueryString>");
|
---|
90 |
|
---|
91 | // Return the list of expanded query terms and their frequencies
|
---|
92 | HashSet terms = new HashSet();
|
---|
93 | query.extractTerms(terms);
|
---|
94 | System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>");
|
---|
95 | Iterator terms_iter = terms.iterator();
|
---|
96 | while (terms_iter.hasNext()) {
|
---|
97 | Term term = (Term) terms_iter.next();
|
---|
98 | System.out.println(" <Term value=\"" + term.text() + "\" freq=\"" + reader.docFreq(term) + "\" field=\"" + term.field() + "\"/>");
|
---|
99 | }
|
---|
100 |
|
---|
101 | // Return the list of stop words removed from the query
|
---|
102 | HashSet terms_including_stop_words = new HashSet();
|
---|
103 | query_including_stop_words.extractTerms(terms_including_stop_words);
|
---|
104 | System.out.println(" <StopWordsInfo num=\"" + (terms_including_stop_words.size() - terms.size()) + "\"/>");
|
---|
105 | Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
|
---|
106 | while (terms_including_stop_words_iter.hasNext()) {
|
---|
107 | Term term = (Term) terms_including_stop_words_iter.next();
|
---|
108 | if (!terms.contains(term)) {
|
---|
109 | System.err.println(" <StopWord value=\"" + term.text() + "\"/>");
|
---|
110 | }
|
---|
111 | }
|
---|
112 |
|
---|
113 | // Return the matching documents
|
---|
114 | System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
|
---|
115 | for (int i = 0; i < hits.length(); i++) {
|
---|
116 | Document doc = hits.doc(i);
|
---|
117 | String node_id = doc.get("nodeID");
|
---|
118 | System.out.println(" <Match id=\"" + node_id + "\"/>");
|
---|
119 | }
|
---|
120 |
|
---|
121 | System.out.println("</ResultSet>");
|
---|
122 | }
|
---|
123 |
|
---|
124 | searcher.close();
|
---|
125 | }
|
---|
126 | catch (Exception exception) {
|
---|
127 | exception.printStackTrace();
|
---|
128 | }
|
---|
129 | }
|
---|
130 | }
|
---|