Context Navigation

source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java@ 24739

Last change on this file since 24739 was 24739, checked in by davidb, 13 years ago
Shifted to using LuceneWrapper3
Property svn:executable set to ``*
File size: 10.4 KB

Line
1	/**********************************************************************
2	*
3	* SolrQueryWrapper.java
4	*
5	* Copyright 2004 The New Zealand Digital Library Project
6	*
7	* A component of the Greenstone digital library software
8	* from the New Zealand Digital Library Project at the
9	* University of Waikato, New Zealand.
10	*
11	* This program is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	*
25	*********************************************************************/
26	package org.greenstone.gsdl3.util;
27
28
29	import java.io.*;
30	import java.util.*;
31	import java.util.regex.*;
32
33	import org.apache.log4j.Logger;
34
35	import org.apache.solr.client.solrj.SolrQuery;
36	import org.apache.solr.client.solrj.SolrServer;
37	import org.apache.solr.client.solrj.SolrServerException;
38	import org.apache.solr.client.solrj.response.FacetField;
39	import org.apache.solr.client.solrj.response.FacetField.Count;
40	import org.apache.solr.client.solrj.response.QueryResponse;
41
42	import org.apache.solr.common.SolrDocument;
43	import org.apache.solr.common.SolrDocumentList;
44	import org.apache.solr.common.params.ModifiableSolrParams;
45	import org.apache.solr.common.params.SolrParams;
46
47	import org.greenstone.LuceneWrapper3.SharedSoleneQuery;
48	import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
49
50
51	public class SolrQueryWrapper extends SharedSoleneQuery
52	{
53
54	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName());
55
56	/*
57	// Use the standard set of English stop words by default
58	static private String[] stop_words = GS2Analyzer.STOP_WORDS;
59
60	private String full_indexdir="";
61
62	private String default_conjunction_operator = "OR";
63	private String fuzziness = null;
64	private String sort_field = null;
65	private Sort sorter=new Sort();
66	private String filter_string = null;
67	private Filter filter = null;
68
69	private QueryParser query_parser = null;
70	private QueryParser query_parser_no_stop_words = null;
71	*/
72
73	protected int max_docs = 100;
74
75	SolrServer solr_core = null;
76
77
78	public SolrQueryWrapper() {
79	super();
80	}
81	/*
82	// Create one query parser with the standard set of stop words, and one with none
83
84	query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
85	query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
86	}
87	*/
88
89	public void setMaxDocs(int max_docs)
90	{
91	this.max_docs = max_docs;
92	}
93
94	public void setSolrCore(SolrServer solr_core)
95	{
96	this.solr_core = solr_core;
97	}
98
99
100	public boolean initialise() {
101
102	if (solr_core==null) {
103	utf8out.println("Solr Core not loaded in ");
104	utf8out.flush();
105	return false;
106	}
107	return true;
108
109	}
110
111	public SharedSoleneQueryResult runQuery(String query_string) {
112
113	if (query_string == null \|\| query_string.equals("")) {
114	utf8out.println("The query word is not indicated ");
115	utf8out.flush();
116	return null;
117	}
118
119	SolrQueryResult solr_query_result=new SolrQueryResult();
120	solr_query_result.clear();
121
122	ModifiableSolrParams solrParams = new ModifiableSolrParams();
123	solrParams.set("q", query_string);
124	solrParams.set("start", start_results);
125	solrParams.set("rows", (end_results - start_results) +1);
126	solrParams.set("fl","docOID score");
127
128	/*
129	try {
130	Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
131	query_including_stop_words = query_including_stop_words.rewrite(reader);
132
133	// System.err.println("******* query_string " + query_string + "**");
134
135	Query query = parseQuery(reader, query_parser, query_string, fuzziness);
136	query = query.rewrite(reader);
137
138	// Get the list of expanded query terms and their frequencies
139	// num docs matching, and total frequency
140	HashSet terms = new HashSet();
141	query.extractTerms(terms);
142
143	HashMap doc_term_freq_map = new HashMap();
144
145	Iterator iter = terms.iterator();
146	while (iter.hasNext()) {
147
148	Term term = (Term) iter.next();
149
150	// Get the term frequency over all the documents
151	TermDocs term_docs = reader.termDocs(term);
152	int term_freq = 0;
153	int match_docs = 0;
154	while (term_docs.next())
155	{
156	if (term_docs.freq() != 0)
157	{
158	term_freq += term_docs.freq();
159	match_docs++;
160
161	// Calculate the document-level term frequency as well
162	Integer lucene_doc_num_obj = new Integer(term_docs.doc());
163	int doc_term_freq = 0;
164	if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
165	{
166	doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
167	}
168	doc_term_freq += term_docs.freq();
169
170	doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
171	}
172	}
173
174	// Create a term
175	lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq);
176	}
177
178	// Get the list of stop words removed from the query
179	HashSet terms_including_stop_words = new HashSet();
180	query_including_stop_words.extractTerms(terms_including_stop_words);
181	Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
182	while (terms_including_stop_words_iter.hasNext()) {
183	Term term = (Term) terms_including_stop_words_iter.next();
184	if (!terms.contains(term)) {
185	lucene_query_result.addStopWord(term.text());
186	}
187	}
188
189	*/
190
191	try {
192	QueryResponse solrResponse = solr_core.query(solrParams);
193
194	SolrDocumentList hits = solrResponse.getResults();
195
196	if (hits != null) {
197
198	logger.info("*** hits size = " + hits.size());
199	logger.info("*** num docs found = " + hits.getNumFound());
200
201	logger.info("*** start results = " + start_results);
202	logger.info("*** end results = " + end_results);
203	logger.info("*** max docs = " + max_docs);
204
205	// numDocsFound is the total number of mactching docs in the collection
206	// as opposed to the number of documents returned in the hits list
207
208	solr_query_result.setTotalDocs((int)hits.getNumFound());
209
210	solr_query_result.setStartResults(start_results);
211	solr_query_result.setEndResults(start_results + hits.size());
212
213	// Output the matching documents
214	for (int i = 0; i < hits.size(); i++) {
215	SolrDocument doc = hits.get(i);
216
217	// Need to think about how to support document term frequency. Make zero for now
218	int doc_term_freq = 0;
219	String docOID = (String)doc.get("docOID");
220	Float score = (Float)doc.get("score");
221
222	logger.info("**** docOID = " + docOID);
223	logger.info("**** score = " + score);
224
225	solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq);
226	}
227	}
228	else {
229	solr_query_result.setTotalDocs(0);
230
231	solr_query_result.setStartResults(0);
232	solr_query_result.setEndResults(0);
233	}
234	}
235
236	catch (SolrServerException server_exception) {
237	solr_query_result.setError(SolrQueryResult.SERVER_ERROR);
238	}
239
240
241	/*
242
243	// do the query
244	// Simple case for getting all the matching documents
245	if (end_results == Integer.MAX_VALUE) {
246	// Perform the query (filter and sorter may be null)
247	Hits hits = searcher.search(query, filter, sorter);
248	lucene_query_result.setTotalDocs(hits.length());
249
250	// Output the matching documents
251	lucene_query_result.setStartResults(start_results);
252	lucene_query_result.setEndResults(hits.length());
253
254	for (int i = start_results; i <= hits.length(); i++) {
255	int lucene_doc_num = hits.id(i - 1);
256	Document doc = hits.doc(i - 1);
257	int doc_term_freq = 0;
258	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
259	if (doc_term_freq_object != null)
260	{
261	doc_term_freq = doc_term_freq_object.intValue();
262	}
263	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
264	}
265	}
266
267	// Slightly more complicated case for returning a subset of the matching documents
268	else {
269	// Perform the query (filter may be null)
270	TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
271	lucene_query_result.setTotalDocs(hits.totalHits);
272
273	lucene_query_result.setStartResults(start_results);
274	lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);
275
276	// Output the matching documents
277	for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
278	int lucene_doc_num = hits.scoreDocs[i - 1].doc;
279	Document doc = reader.document(lucene_doc_num);
280	int doc_term_freq = 0;
281	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
282	if (doc_term_freq_object != null)
283	{
284	doc_term_freq = doc_term_freq_object.intValue();
285	}
286	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
287	}
288	}
289	*/
290
291	return solr_query_result;
292	}
293	/*
294
295	catch (ParseException parse_exception) {
296	lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);
297	}
298	catch (TooManyClauses too_many_clauses_exception) {
299	lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
300	}
301	catch (IOException exception) {
302	lucene_query_result.setError(LuceneQueryResult.IO_ERROR);
303	exception.printStackTrace();
304	}
305	catch (Exception exception) {
306	lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);
307	exception.printStackTrace();
308	}
309	return lucene_query_result;
310	}
311
312	public void setDefaultConjunctionOperator(String default_conjunction_operator) {
313	this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
314	if (default_conjunction_operator.equals("AND")) {
315	query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
316	query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
317	} else { // default is OR
318	query_parser.setDefaultOperator(query_parser.OR_OPERATOR);
319	query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);
320	}
321
322
323	}
324	*/
325
326	public void cleanUp() {
327	super.cleanUp();
328	}
329
330	}
331
332

Note: See TracBrowser for help on using the repository browser.

Download in other formats: