Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java@ 24641

Last change on this file since 24641 was 24641, checked in by davidb, 13 years ago
Initial cut at Greenstone3 runtime code to support Solr. Solr code based on version 3.3, so this also include an upgraded version of the LuceneWrapper code (gs2build/common-src/indexers/lucene-gs) that works with this version of the support jar files
Property svn:executable set to ``*
File size: 10.4 KB

Line
1	/**********************************************************************
2	*
3	* SolrQueryWrapper.java
4	*
5	* Copyright 2004 The New Zealand Digital Library Project
6	*
7	* A component of the Greenstone digital library software
8	* from the New Zealand Digital Library Project at the
9	* University of Waikato, New Zealand.
10	*
11	* This program is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	*
25	*********************************************************************/
26	package org.greenstone.gsdl3.util;
27
28
29	import java.io.*;
30	import java.util.*;
31	import java.util.regex.*;
32
33	import org.apache.log4j.Logger;
34
35	import org.apache.solr.client.solrj.SolrQuery;
36	import org.apache.solr.client.solrj.SolrServer;
37	import org.apache.solr.client.solrj.SolrServerException;
38	import org.apache.solr.client.solrj.response.FacetField;
39	import org.apache.solr.client.solrj.response.FacetField.Count;
40	import org.apache.solr.client.solrj.response.QueryResponse;
41
42	import org.apache.solr.common.SolrDocument;
43	import org.apache.solr.common.SolrDocumentList;
44	import org.apache.solr.common.params.ModifiableSolrParams;
45	import org.apache.solr.common.params.SolrParams;
46
47	import org.greenstone.LuceneWrapper.SharedSoleneQuery;
48	import org.greenstone.LuceneWrapper.SharedSoleneQueryResult;
49
50
51	public class SolrQueryWrapper extends SharedSoleneQuery
52	{
53
54	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName());
55
56	/*
57	// Use the standard set of English stop words by default
58	static private String[] stop_words = GS2Analyzer.STOP_WORDS;
59
60	private String full_indexdir="";
61
62	private String default_conjunction_operator = "OR";
63	private String fuzziness = null;
64	private String sort_field = null;
65	private Sort sorter=new Sort();
66	private String filter_string = null;
67	private Filter filter = null;
68
69	private QueryParser query_parser = null;
70	private QueryParser query_parser_no_stop_words = null;
71	*/
72
73	protected int max_docs = 100;
74
75	SolrServer solr_core = null;
76
77
78	public SolrQueryWrapper() {
79	super();
80	}
81	/*
82	// Create one query parser with the standard set of stop words, and one with none
83
84	query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
85	query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
86	}
87	*/
88
89	public void setMaxDocs(int max_docs)
90	{
91	this.max_docs = max_docs;
92	}
93
94	public void setSolrCore(SolrServer solr_core)
95	{
96	this.solr_core = solr_core;
97	}
98
99
100	public boolean initialise() {
101
102	if (solr_core==null) {
103	utf8out.println("Solr Core not loaded in ");
104	utf8out.flush();
105	return false;
106	}
107	return true;
108
109	}
110
111	public SharedSoleneQueryResult runQuery(String query_string) {
112
113	if (query_string == null \|\| query_string.equals("")) {
114	utf8out.println("The query word is not indicated ");
115	utf8out.flush();
116	return null;
117	}
118
119	SolrQueryResult solr_query_result=new SolrQueryResult();
120	solr_query_result.clear();
121
122	ModifiableSolrParams solrParams = new ModifiableSolrParams();
123	solrParams.set("q", query_string);
124	solrParams.set("start", start_results);
125	solrParams.set("rows", (end_results - start_results) +1);
126	solrParams.set("fl","docOID score");
127
128	/*
129	try {
130	Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
131	query_including_stop_words = query_including_stop_words.rewrite(reader);
132
133	// System.err.println("******* query_string " + query_string + "**");
134
135	Query query = parseQuery(reader, query_parser, query_string, fuzziness);
136	query = query.rewrite(reader);
137
138	// Get the list of expanded query terms and their frequencies
139	// num docs matching, and total frequency
140	HashSet terms = new HashSet();
141	query.extractTerms(terms);
142
143	HashMap doc_term_freq_map = new HashMap();
144
145	Iterator iter = terms.iterator();
146	while (iter.hasNext()) {
147
148	Term term = (Term) iter.next();
149
150	// Get the term frequency over all the documents
151	TermDocs term_docs = reader.termDocs(term);
152	int term_freq = 0;
153	int match_docs = 0;
154	while (term_docs.next())
155	{
156	if (term_docs.freq() != 0)
157	{
158	term_freq += term_docs.freq();
159	match_docs++;
160
161	// Calculate the document-level term frequency as well
162	Integer lucene_doc_num_obj = new Integer(term_docs.doc());
163	int doc_term_freq = 0;
164	if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
165	{
166	doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
167	}
168	doc_term_freq += term_docs.freq();
169
170	doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
171	}
172	}
173
174	// Create a term
175	lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq);
176	}
177
178	// Get the list of stop words removed from the query
179	HashSet terms_including_stop_words = new HashSet();
180	query_including_stop_words.extractTerms(terms_including_stop_words);
181	Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
182	while (terms_including_stop_words_iter.hasNext()) {
183	Term term = (Term) terms_including_stop_words_iter.next();
184	if (!terms.contains(term)) {
185	lucene_query_result.addStopWord(term.text());
186	}
187	}
188
189	*/
190
191	try {
192	QueryResponse solrResponse = solr_core.query(solrParams);
193
194	SolrDocumentList hits = solrResponse.getResults();
195
196	if (hits != null) {
197
198	logger.info("*** hits size = " + hits.size());
199	logger.info("*** num docs found = " + hits.getNumFound());
200
201	logger.info("*** start results = " + start_results);
202	logger.info("*** end results = " + end_results);
203	logger.info("*** max docs = " + max_docs);
204
205	// numDocsFound is the total number of mactching docs in the collection
206	// as opposed to the number of documents returned in the hits list
207
208	solr_query_result.setTotalDocs((int)hits.getNumFound());
209
210	solr_query_result.setStartResults(start_results);
211	solr_query_result.setEndResults(start_results + hits.size());
212
213	// Output the matching documents
214	for (int i = 0; i < hits.size(); i++) {
215	SolrDocument doc = hits.get(i);
216
217	// Need to think about how to support document term frequency. Make zero for now
218	int doc_term_freq = 0;
219	String docOID = (String)doc.get("docOID");
220	Float score = (Float)doc.get("score");
221
222	logger.info("**** docOID = " + docOID);
223	logger.info("**** score = " + score);
224
225	solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq);
226	}
227	}
228	else {
229	solr_query_result.setTotalDocs(0);
230
231	solr_query_result.setStartResults(0);
232	solr_query_result.setEndResults(0);
233	}
234	}
235
236	catch (SolrServerException server_exception) {
237	solr_query_result.setError(SolrQueryResult.SERVER_ERROR);
238	}
239
240
241	/*
242
243	// do the query
244	// Simple case for getting all the matching documents
245	if (end_results == Integer.MAX_VALUE) {
246	// Perform the query (filter and sorter may be null)
247	Hits hits = searcher.search(query, filter, sorter);
248	lucene_query_result.setTotalDocs(hits.length());
249
250	// Output the matching documents
251	lucene_query_result.setStartResults(start_results);
252	lucene_query_result.setEndResults(hits.length());
253
254	for (int i = start_results; i <= hits.length(); i++) {
255	int lucene_doc_num = hits.id(i - 1);
256	Document doc = hits.doc(i - 1);
257	int doc_term_freq = 0;
258	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
259	if (doc_term_freq_object != null)
260	{
261	doc_term_freq = doc_term_freq_object.intValue();
262	}
263	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
264	}
265	}
266
267	// Slightly more complicated case for returning a subset of the matching documents
268	else {
269	// Perform the query (filter may be null)
270	TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
271	lucene_query_result.setTotalDocs(hits.totalHits);
272
273	lucene_query_result.setStartResults(start_results);
274	lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);
275
276	// Output the matching documents
277	for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
278	int lucene_doc_num = hits.scoreDocs[i - 1].doc;
279	Document doc = reader.document(lucene_doc_num);
280	int doc_term_freq = 0;
281	Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
282	if (doc_term_freq_object != null)
283	{
284	doc_term_freq = doc_term_freq_object.intValue();
285	}
286	lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
287	}
288	}
289	*/
290
291	return solr_query_result;
292	}
293	/*
294
295	catch (ParseException parse_exception) {
296	lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);
297	}
298	catch (TooManyClauses too_many_clauses_exception) {
299	lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
300	}
301	catch (IOException exception) {
302	lucene_query_result.setError(LuceneQueryResult.IO_ERROR);
303	exception.printStackTrace();
304	}
305	catch (Exception exception) {
306	lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);
307	exception.printStackTrace();
308	}
309	return lucene_query_result;
310	}
311
312	public void setDefaultConjunctionOperator(String default_conjunction_operator) {
313	this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
314	if (default_conjunction_operator.equals("AND")) {
315	query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
316	query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
317	} else { // default is OR
318	query_parser.setDefaultOperator(query_parser.OR_OPERATOR);
319	query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);
320	}
321
322
323	}
324	*/
325
326	public void cleanUp() {
327	super.cleanUp();
328	}
329
330	}
331
332

Note: See TracBrowser for help on using the repository browser.

Download in other formats: