source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java@ 24641

Last change on this file since 24641 was 24641, checked in by davidb, 13 years ago

Initial cut at Greenstone3 runtime code to support Solr. Solr code based on version 3.3, so this also include an upgraded version of the LuceneWrapper code (gs2build/common-src/indexers/lucene-gs) that works with this version of the support jar files

  • Property svn:executable set to *
File size: 10.4 KB
Line 
1/**********************************************************************
2 *
3 * SolrQueryWrapper.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26package org.greenstone.gsdl3.util;
27
28
29import java.io.*;
30import java.util.*;
31import java.util.regex.*;
32
33import org.apache.log4j.Logger;
34
35import org.apache.solr.client.solrj.SolrQuery;
36import org.apache.solr.client.solrj.SolrServer;
37import org.apache.solr.client.solrj.SolrServerException;
38import org.apache.solr.client.solrj.response.FacetField;
39import org.apache.solr.client.solrj.response.FacetField.Count;
40import org.apache.solr.client.solrj.response.QueryResponse;
41
42import org.apache.solr.common.SolrDocument;
43import org.apache.solr.common.SolrDocumentList;
44import org.apache.solr.common.params.ModifiableSolrParams;
45import org.apache.solr.common.params.SolrParams;
46
47import org.greenstone.LuceneWrapper.SharedSoleneQuery;
48import org.greenstone.LuceneWrapper.SharedSoleneQueryResult;
49
50
51public class SolrQueryWrapper extends SharedSoleneQuery
52{
53
54 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName());
55
56 /*
57 // Use the standard set of English stop words by default
58 static private String[] stop_words = GS2Analyzer.STOP_WORDS;
59
60 private String full_indexdir="";
61
62 private String default_conjunction_operator = "OR";
63 private String fuzziness = null;
64 private String sort_field = null;
65 private Sort sorter=new Sort();
66 private String filter_string = null;
67 private Filter filter = null;
68
69 private QueryParser query_parser = null;
70 private QueryParser query_parser_no_stop_words = null;
71 */
72
73 protected int max_docs = 100;
74
75 SolrServer solr_core = null;
76
77
78 public SolrQueryWrapper() {
79 super();
80 }
81 /*
82 // Create one query parser with the standard set of stop words, and one with none
83
84 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
85 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
86 }
87 */
88
89 public void setMaxDocs(int max_docs)
90 {
91 this.max_docs = max_docs;
92 }
93
94 public void setSolrCore(SolrServer solr_core)
95 {
96 this.solr_core = solr_core;
97 }
98
99
100 public boolean initialise() {
101
102 if (solr_core==null) {
103 utf8out.println("Solr Core not loaded in ");
104 utf8out.flush();
105 return false;
106 }
107 return true;
108
109 }
110
111 public SharedSoleneQueryResult runQuery(String query_string) {
112
113 if (query_string == null || query_string.equals("")) {
114 utf8out.println("The query word is not indicated ");
115 utf8out.flush();
116 return null;
117 }
118
119 SolrQueryResult solr_query_result=new SolrQueryResult();
120 solr_query_result.clear();
121
122 ModifiableSolrParams solrParams = new ModifiableSolrParams();
123 solrParams.set("q", query_string);
124 solrParams.set("start", start_results);
125 solrParams.set("rows", (end_results - start_results) +1);
126 solrParams.set("fl","docOID score");
127
128 /*
129 try {
130 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
131 query_including_stop_words = query_including_stop_words.rewrite(reader);
132
133 // System.err.println("********* query_string " + query_string + "****");
134
135 Query query = parseQuery(reader, query_parser, query_string, fuzziness);
136 query = query.rewrite(reader);
137
138 // Get the list of expanded query terms and their frequencies
139 // num docs matching, and total frequency
140 HashSet terms = new HashSet();
141 query.extractTerms(terms);
142
143 HashMap doc_term_freq_map = new HashMap();
144
145 Iterator iter = terms.iterator();
146 while (iter.hasNext()) {
147
148 Term term = (Term) iter.next();
149
150 // Get the term frequency over all the documents
151 TermDocs term_docs = reader.termDocs(term);
152 int term_freq = 0;
153 int match_docs = 0;
154 while (term_docs.next())
155 {
156 if (term_docs.freq() != 0)
157 {
158 term_freq += term_docs.freq();
159 match_docs++;
160
161 // Calculate the document-level term frequency as well
162 Integer lucene_doc_num_obj = new Integer(term_docs.doc());
163 int doc_term_freq = 0;
164 if (doc_term_freq_map.containsKey(lucene_doc_num_obj))
165 {
166 doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue();
167 }
168 doc_term_freq += term_docs.freq();
169
170 doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq));
171 }
172 }
173
174 // Create a term
175 lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq);
176 }
177
178 // Get the list of stop words removed from the query
179 HashSet terms_including_stop_words = new HashSet();
180 query_including_stop_words.extractTerms(terms_including_stop_words);
181 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator();
182 while (terms_including_stop_words_iter.hasNext()) {
183 Term term = (Term) terms_including_stop_words_iter.next();
184 if (!terms.contains(term)) {
185 lucene_query_result.addStopWord(term.text());
186 }
187 }
188
189*/
190
191 try {
192 QueryResponse solrResponse = solr_core.query(solrParams);
193
194 SolrDocumentList hits = solrResponse.getResults();
195
196 if (hits != null) {
197
198 logger.info("*** hits size = " + hits.size());
199 logger.info("*** num docs found = " + hits.getNumFound());
200
201 logger.info("*** start results = " + start_results);
202 logger.info("*** end results = " + end_results);
203 logger.info("*** max docs = " + max_docs);
204
205 // numDocsFound is the total number of mactching docs in the collection
206 // as opposed to the number of documents returned in the hits list
207
208 solr_query_result.setTotalDocs((int)hits.getNumFound());
209
210 solr_query_result.setStartResults(start_results);
211 solr_query_result.setEndResults(start_results + hits.size());
212
213 // Output the matching documents
214 for (int i = 0; i < hits.size(); i++) {
215 SolrDocument doc = hits.get(i);
216
217 // Need to think about how to support document term frequency. Make zero for now
218 int doc_term_freq = 0;
219 String docOID = (String)doc.get("docOID");
220 Float score = (Float)doc.get("score");
221
222 logger.info("**** docOID = " + docOID);
223 logger.info("**** score = " + score);
224
225 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq);
226 }
227 }
228 else {
229 solr_query_result.setTotalDocs(0);
230
231 solr_query_result.setStartResults(0);
232 solr_query_result.setEndResults(0);
233 }
234 }
235
236 catch (SolrServerException server_exception) {
237 solr_query_result.setError(SolrQueryResult.SERVER_ERROR);
238 }
239
240
241 /*
242
243 // do the query
244 // Simple case for getting all the matching documents
245 if (end_results == Integer.MAX_VALUE) {
246 // Perform the query (filter and sorter may be null)
247 Hits hits = searcher.search(query, filter, sorter);
248 lucene_query_result.setTotalDocs(hits.length());
249
250 // Output the matching documents
251 lucene_query_result.setStartResults(start_results);
252 lucene_query_result.setEndResults(hits.length());
253
254 for (int i = start_results; i <= hits.length(); i++) {
255 int lucene_doc_num = hits.id(i - 1);
256 Document doc = hits.doc(i - 1);
257 int doc_term_freq = 0;
258 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
259 if (doc_term_freq_object != null)
260 {
261 doc_term_freq = doc_term_freq_object.intValue();
262 }
263 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
264 }
265 }
266
267 // Slightly more complicated case for returning a subset of the matching documents
268 else {
269 // Perform the query (filter may be null)
270 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
271 lucene_query_result.setTotalDocs(hits.totalHits);
272
273 lucene_query_result.setStartResults(start_results);
274 lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);
275
276 // Output the matching documents
277 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {
278 int lucene_doc_num = hits.scoreDocs[i - 1].doc;
279 Document doc = reader.document(lucene_doc_num);
280 int doc_term_freq = 0;
281 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
282 if (doc_term_freq_object != null)
283 {
284 doc_term_freq = doc_term_freq_object.intValue();
285 }
286 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
287 }
288 }
289 */
290
291 return solr_query_result;
292 }
293 /*
294
295 catch (ParseException parse_exception) {
296 lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);
297 }
298 catch (TooManyClauses too_many_clauses_exception) {
299 lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);
300 }
301 catch (IOException exception) {
302 lucene_query_result.setError(LuceneQueryResult.IO_ERROR);
303 exception.printStackTrace();
304 }
305 catch (Exception exception) {
306 lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);
307 exception.printStackTrace();
308 }
309 return lucene_query_result;
310 }
311
312 public void setDefaultConjunctionOperator(String default_conjunction_operator) {
313 this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
314 if (default_conjunction_operator.equals("AND")) {
315 query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
316 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);
317 } else { // default is OR
318 query_parser.setDefaultOperator(query_parser.OR_OPERATOR);
319 query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);
320 }
321
322
323 }
324 */
325
326 public void cleanUp() {
327 super.cleanUp();
328 }
329
330}
331
332
Note: See TracBrowser for help on using the repository browser.