source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java@ 24739

Last change on this file since 24739 was 24739, checked in by davidb, 13 years ago

Shifted to using LuceneWrapper3

  • Property svn:executable set to *
File size: 10.4 KB
Line 
1/**********************************************************************
2 *
3 * SolrQueryWrapper.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26package org.greenstone.gsdl3.util;
27
28
29import java.io.*;
30import java.util.*;
31import java.util.regex.*;
32
33import org.apache.log4j.Logger;
34
35import org.apache.solr.client.solrj.SolrQuery;
36import org.apache.solr.client.solrj.SolrServer;
37import org.apache.solr.client.solrj.SolrServerException;
38import org.apache.solr.client.solrj.response.FacetField;
39import org.apache.solr.client.solrj.response.FacetField.Count;
40import org.apache.solr.client.solrj.response.QueryResponse;
41
42import org.apache.solr.common.SolrDocument;
43import org.apache.solr.common.SolrDocumentList;
44import org.apache.solr.common.params.ModifiableSolrParams;
45import org.apache.solr.common.params.SolrParams;
46
47import org.greenstone.LuceneWrapper3.SharedSoleneQuery;
48import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
49
50
51public class SolrQueryWrapper extends SharedSoleneQuery
52{
53
54 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName());
55
56 /*
57 // Use the standard set of English stop words by default
58 static private String[] stop_words = GS2Analyzer.STOP_WORDS;
59
60 private String full_indexdir="";
61
62 private String default_conjunction_operator = "OR";
63 private String fuzziness = null;
64 private String sort_field = null;
65 private Sort sorter=new Sort();
66 private String filter_string = null;
67 private Filter filter = null;
68
69 private QueryParser query_parser = null;
70 private QueryParser query_parser_no_stop_words = null;
71 */
72
73 protected int max_docs = 100;
74
75 SolrServer solr_core = null;
76
77
78 public SolrQueryWrapper() {
79 super();
80 }
81 /*
82 // Create one query parser with the standard set of stop words, and one with none
83
84 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
85 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
86 }
87 */
88
89 public void setMaxDocs(int max_docs)
90 {
91 this.max_docs = max_docs;
92 }
93
94 public void setSolrCore(SolrServer solr_core)
95 {
96 this.solr_core = solr_core;
97 }
98
99
100 public boolean initialise() {
101
102 if (solr_core==null) {
103 utf8out.println("Solr Core not loaded in ");
104 utf8out.flush();
105 return false;
106 }
107 return true;
108
109 }
110
111 public SharedSoleneQueryResult runQuery(String query_string) {
112
113 if (query_string == null || query_string.equals("")) {
114 utf8out.println("The query word is not indicated ");
115 utf8out.flush();
116 return null;
117 }
118
119 SolrQueryResult solr_query_result=new SolrQueryResult();
120 solr_query_result.clear();
121
122 ModifiableSolrParams solrParams = new ModifiableSolrParams();
123 solrParams.set("q", query_string);
124 solrParams.set("start", start_results);
125 solrParams.set("rows", (end_results - start_results) +1);
126 solrParams.set("fl","docOID score");
127
128 /*
129 try {
130 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
131 query_including_stop_words = query_including_stop_words.rewrite(reader);
132
133 // System.err.println("********* query_string " + query_string + "****");
134
135 Query query = parseQuery(reader, query_parser, query_string, fuzziness);
136 query = query.rewrite(reader);
137
138 // Get the list of expanded query terms and their frequencies
139 // num docs matching, and total frequency
140 HashSet terms = new HashSet();
141 query.extractTerms(terms);
142
143 HashMap doc_term_freq_map = new HashMap();
144
145 Iterator iter = terms.iterator();
146 while (iter.hasNext()) {
147
148 Term term = (Term) iter.next();
149
150 // Get the term frequency over all the documents
151 TermDocs term_docs = reader.termDocs(term);
152 int term_freq = 0;
153 int match_docs = 0;
154 while (term_docs.next())
155 {
156 if (term_docs.freq() != 0)
157 {
158 term_freq += term_docs.freq();
159 match_docs++;
160
161 // Calculate the document-level term frequency as well
162 Integer lucene_doc_num_obj = new Integer(term_docs.doc());
163 int doc_term_freq = 0;
164 if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
165 {
166 doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
167 }
168 doc_term_freq += term_docs.freq();
169
170 doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
171 }
172 }
173
174 // Create a term
175 lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq);
176 }
177
178 // Get the list of stop words removed from the query
179 HashSet terms_including_stop_words = new HashSet();
180 query_including_stop_words.extractTerms(terms_including_stop_words);
181 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
182 while (terms_including_stop_words_iter.hasNext()) {
183 Term term = (Term) terms_including_stop_words_iter.next();
184 if (!terms.contains(term)) {
185 lucene_query_result.addStopWord(term.text());
186 }
187 }
188
189*/
190
191 try {
192 QueryResponse solrResponse = solr_core.query(solrParams);
193
194 SolrDocumentList hits = solrResponse.getResults();
195
196 if (hits != null) {
197
198 logger.info("*** hits size = " + hits.size());
199 logger.info("*** num docs found = " + hits.getNumFound());
200
201 logger.info("*** start results = " + start_results);
202 logger.info("*** end results = " + end_results);
203 logger.info("*** max docs = " + max_docs);
204
205 // numDocsFound is the total number of mactching docs in the collection
206 // as opposed to the number of documents returned in the hits list
207
208 solr_query_result.setTotalDocs((int)hits.getNumFound());
209
210 solr_query_result.setStartResults(start_results);
211 solr_query_result.setEndResults(start_results + hits.size());
212
213 // Output the matching documents
214 for (int i = 0; i < hits.size(); i++) {
215 SolrDocument doc = hits.get(i);
216
217 // Need to think about how to support document term frequency. Make zero for now
218 int doc_term_freq = 0;
219 String docOID = (String)doc.get("docOID");
220 Float score = (Float)doc.get("score");
221
222 logger.info("**** docOID = " + docOID);
223 logger.info("**** score = " + score);
224
225 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq);
226 }
227 }
228 else {
229 solr_query_result.setTotalDocs(0);
230
231 solr_query_result.setStartResults(0);
232 solr_query_result.setEndResults(0);
233 }
234 }
235
236 catch (SolrServerException server_exception) {
237 solr_query_result.setError(SolrQueryResult.SERVER_ERROR);
238 }
239
240
241 /*
242
243 // do the query
244 // Simple case for getting all the matching documents
245 if (end_results == Integer.MAX_VALUE) {
246 // Perform the query (filter and sorter may be null)
247 Hits hits = searcher.search(query, filter, sorter);
248 lucene_query_result.setTotalDocs(hits.length());
249
250 // Output the matching documents
251 lucene_query_result.setStartResults(start_results);
252 lucene_query_result.setEndResults(hits.length());
253
254 for (int i = start_results; i <= hits.length(); i++) {
255 int lucene_doc_num = hits.id(i - 1);
256 Document doc = hits.doc(i - 1);
257 int doc_term_freq = 0;
258 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
259 if (doc_term_freq_object != null)
260 {
261 doc_term_freq = doc_term_freq_object.intValue();
262 }
263 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
264 }
265 }
266
267 // Slightly more complicated case for returning a subset of the matching documents
268 else {
269 // Perform the query (filter may be null)
270 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
271 lucene_query_result.setTotalDocs(hits.totalHits);
272
273 lucene_query_result.setStartResults(start_results);
274 lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);
275
276 // Output the matching documents
277 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
278 int lucene_doc_num = hits.scoreDocs[i - 1].doc;
279 Document doc = reader.document(lucene_doc_num);
280 int doc_term_freq = 0;
281 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
282 if (doc_term_freq_object != null)
283 {
284 doc_term_freq = doc_term_freq_object.intValue();
285 }
286 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
287 }
288 }
289 */
290
291 return solr_query_result;
292 }
293 /*
294
295 catch (ParseException parse_exception) {
296 lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);
297 }
298 catch (TooManyClauses too_many_clauses_exception) {
299 lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
300 }
301 catch (IOException exception) {
302 lucene_query_result.setError(LuceneQueryResult.IO_ERROR);
303 exception.printStackTrace();
304 }
305 catch (Exception exception) {
306 lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);
307 exception.printStackTrace();
308 }
309 return lucene_query_result;
310 }
311
312 public void setDefaultConjunctionOperator(String default_conjunction_operator) {
313 this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
314 if (default_conjunction_operator.equals("AND")) {
315 query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
316 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
317 } else { // default is OR
318 query_parser.setDefaultOperator(query_parser.OR_OPERATOR);
319 query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);
320 }
321
322
323 }
324 */
325
326 public void cleanUp() {
327 super.cleanUp();
328 }
329
330}
331
332
Note: See TracBrowser for help on using the repository browser.