Context Navigation

source: trunk/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java@ 12372

Last change on this file since 12372 was 12372, checked in by mdewsnip, 18 years ago
Now returns the stop words that have been removed from the query.
Property svn:keywords set to `Author Date Id Revision`
File size: 4.5 KB

Line
1	/**
2	*
3	* @author [email protected]
4	* @author [email protected]
5	* @version
6	*/
7
8	package org.nzdl.gsdl.LuceneWrap;
9
10
11	import java.io.BufferedReader;
12	import java.io.InputStreamReader;
13	import java.util.HashSet;
14	import java.util.Iterator;
15
16	import org.apache.lucene.analysis.Analyzer;
17	import org.apache.lucene.analysis.standard.StandardAnalyzer;
18	import org.apache.lucene.document.Document;
19	import org.apache.lucene.index.IndexReader;
20	import org.apache.lucene.index.Term;
21	import org.apache.lucene.queryParser.QueryParser;
22	import org.apache.lucene.search.Hits;
23	import org.apache.lucene.search.IndexSearcher;
24	import org.apache.lucene.search.Query;
25	import org.apache.lucene.search.Searcher;
26	import org.apache.lucene.search.Sort;
27
28
29	public class GS2LuceneQuery
30	{
31	public static void main (String args[])
32	{
33	if (args.length == 0) {
34	System.out.println("Usage: GS2LuceneQuery <index directory> (<sort field>)");
35	return;
36	}
37
38	try {
39	Searcher searcher = new IndexSearcher(args[0]);
40	IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
41
42	Sort sorter = new Sort();
43
44	// New code to allow the default conjunction operator to be
45	// definable
46	String default_conjuction_operator = "OR";
47	for (int i = 1; i < args.length; i++)
48	{
49	if (args[i].equals("-sort"))
50	{
51	i++;
52	sorter = new Sort(args[i]);
53	}
54	if (args[i].equals("-dco"))
55	{
56	i++;
57	default_conjuction_operator = args[i];
58	}
59	}
60
61	// Create one query parser with the standard set of stop words, and one with none
62	QueryParser query_parser = new QueryParser("TX", new StandardAnalyzer());
63	QueryParser query_parser_no_stop_words = new QueryParser("TX", new StandardAnalyzer(new String[] { }));
64
65	// Lucene does "OR" queries by default; do an "AND" query if specified
66	if (default_conjuction_operator.equals("AND")) {
67	query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
68	query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
69	}
70
71	BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
72	while (true) {
73	// Read the query from STDIN
74	String query_string = in.readLine();
75	if (query_string == null \|\| query_string.length() == -1) {
76	break;
77	}
78	System.err.println("**** query = " + query_string);
79
80	// Parse the query and rewrite it into individual terms (eg. for wildcard searches)
81	Query query = query_parser.parse(query_string);
82	query = query.rewrite(reader);
83	Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
84	query_including_stop_words = query_including_stop_words.rewrite(reader);
85
86	// Perform the query
87	Hits hits = searcher.search(query, sorter);
88	System.out.println("<ResultSet>");
89	System.out.println(" <QueryString>" + query_string + "</QueryString>");
90
91	// Return the list of expanded query terms and their frequencies
92	HashSet terms = new HashSet();
93	query.extractTerms(terms);
94	System.out.println(" <QueryTermsInfo num=\"" + terms.size() + "\"/>");
95	Iterator terms_iter = terms.iterator();
96	while (terms_iter.hasNext()) {
97	Term term = (Term) terms_iter.next();
98	System.out.println(" <Term value=\"" + term.text() + "\" freq=\"" + reader.docFreq(term) + "\" field=\"" + term.field() + "\"/>");
99	}
100
101	// Return the list of stop words removed from the query
102	HashSet terms_including_stop_words = new HashSet();
103	query_including_stop_words.extractTerms(terms_including_stop_words);
104	System.out.println(" <StopWordsInfo num=\"" + (terms_including_stop_words.size() - terms.size()) + "\"/>");
105	Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
106	while (terms_including_stop_words_iter.hasNext()) {
107	Term term = (Term) terms_including_stop_words_iter.next();
108	if (!terms.contains(term)) {
109	System.err.println(" <StopWord value=\"" + term.text() + "\"/>");
110	}
111	}
112
113	// Return the matching documents
114	System.out.println(" <MatchingDocsInfo num=\"" + hits.length() + "\"/>");
115	for (int i = 0; i < hits.length(); i++) {
116	Document doc = hits.doc(i);
117	String node_id = doc.get("nodeID");
118	System.out.println(" <Match id=\"" + node_id + "\"/>");
119	}
120
121	System.out.println("</ResultSet>");
122	}
123
124	searcher.close();
125	}
126	catch (Exception exception) {
127	exception.printStackTrace();
128	}
129	}
130	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: