source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java@ 25866

Last change on this file since 25866 was 25866, checked in by sjm84, 12 years ago

Some major upgrades to SolrQueryWrapper to enable faceting

  • Property svn:executable set to *
File size: 7.3 KB
Line 
1/**********************************************************************
2 *
3 * SolrQueryWrapper.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26package org.greenstone.gsdl3.util;
27
28import java.lang.reflect.Type;
29import java.net.URLDecoder;
30import java.util.ArrayList;
31import java.util.HashMap;
32import java.util.List;
33
34import org.apache.log4j.Logger;
35import org.apache.solr.client.solrj.SolrServer;
36import org.apache.solr.client.solrj.SolrServerException;
37import org.apache.solr.client.solrj.response.QueryResponse;
38import org.apache.solr.common.SolrDocument;
39import org.apache.solr.common.SolrDocumentList;
40import org.apache.solr.common.params.ModifiableSolrParams;
41import org.greenstone.LuceneWrapper3.SharedSoleneQuery;
42import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
43
44import com.google.gson.Gson;
45import com.google.gson.reflect.TypeToken;
46
47public class SolrQueryWrapper extends SharedSoleneQuery
48{
49 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName());
50 protected int max_docs = 100;
51 protected ArrayList<String> _facets = new ArrayList<String>();
52 protected ArrayList<String> _facetQueries = new ArrayList<String>();
53 SolrServer solr_core = null;
54
55 public SolrQueryWrapper()
56 {
57 super();
58 start_results = 0;
59 }
60
61 public void setMaxDocs(int max_docs)
62 {
63 this.max_docs = max_docs;
64 }
65
66 public void setSolrCore(SolrServer solr_core)
67 {
68 this.solr_core = solr_core;
69 }
70
71 public void addFacet(String facet)
72 {
73 if (!_facets.contains(facet))
74 {
75 _facets.add(facet);
76 }
77 }
78
79 public void clearFacets()
80 {
81 _facets.clear();
82 }
83
84 public void addFacetQuery(String facetQuery)
85 {
86 if (!_facetQueries.contains(facetQuery))
87 {
88 _facetQueries.add(facetQuery);
89 }
90 }
91
92 public void clearFacetQueries()
93 {
94 _facetQueries.clear();
95 }
96
97 public boolean initialise()
98 {
99 if (solr_core == null)
100 {
101 utf8out.println("Solr Core not loaded in ");
102 utf8out.flush();
103 return false;
104 }
105 return true;
106 }
107
108 public SharedSoleneQueryResult runQuery(String query_string)
109 {
110 if (query_string == null || query_string.equals(""))
111 {
112 utf8out.println("The query word is not indicated ");
113 utf8out.flush();
114 return null;
115 }
116
117 SolrQueryResult solr_query_result = new SolrQueryResult();
118 solr_query_result.clear();
119
120 if (_facetQueries.size() > 0)
121 {
122 HashMap<String, ArrayList<String>> grouping = new HashMap<String, ArrayList<String>>();
123 for (String currentQuery : _facetQueries)
124 {
125 //Facet queries are stored in JSON, so we have to decode it
126 Gson gson = new Gson();
127 Type type = new TypeToken<List<String>>()
128 {
129 }.getType();
130 List<String> queryElems = gson.fromJson(currentQuery, type);
131
132 //Group each query segment by the index it uses
133 for (String currentQueryElement : queryElems)
134 {
135 String decodedQueryElement = null;
136 try
137 {
138 decodedQueryElement = URLDecoder.decode(currentQueryElement, "UTF-8");
139 }
140 catch (Exception ex)
141 {
142 continue;
143 }
144
145 int colonIndex = currentQueryElement.indexOf(":");
146 String indexShortName = currentQueryElement.substring(0, colonIndex);
147
148 if(grouping.get(indexShortName) == null)
149 {
150 grouping.put(indexShortName, new ArrayList<String>());
151 }
152 grouping.get(indexShortName).add(decodedQueryElement);
153 }
154 }
155
156 //Construct the facet query string to add to the regular query string
157 StringBuilder facetQueryString = new StringBuilder();
158 int keysetCounter = 0;
159 for (String key : grouping.keySet())
160 {
161 StringBuilder currentFacetString = new StringBuilder("(");
162 int groupCounter = 0;
163 for(String queryElem : grouping.get(key))
164 {
165 currentFacetString.append(queryElem);
166
167 groupCounter++;
168 if(groupCounter < grouping.get(key).size())
169 {
170 currentFacetString.append(" OR ");
171 }
172 }
173 currentFacetString.append(")");
174
175 facetQueryString.append(currentFacetString);
176
177 keysetCounter++;
178 if(keysetCounter < grouping.keySet().size())
179 {
180 facetQueryString.append(" AND ");
181 }
182 }
183
184 if(facetQueryString.length() > 0)
185 {
186 query_string += " AND " + facetQueryString;
187 }
188 }
189
190 ModifiableSolrParams solrParams = new ModifiableSolrParams();
191 solrParams.set("q", query_string);
192 solrParams.set("start", start_results);
193 solrParams.set("rows", (end_results - start_results) + 1);
194 solrParams.set("fl", "docOID score");
195
196 if (_facets.size() > 0)
197 {
198 solrParams.set("facet", "true");
199 for (int i = 0; i < _facets.size(); i++)
200 {
201 solrParams.add("facet.field", _facets.get(i));
202 }
203 }
204
205 try
206 {
207 QueryResponse solrResponse = solr_core.query(solrParams);
208 SolrDocumentList hits = solrResponse.getResults();
209
210 if (hits != null)
211 {
212 logger.info("*** hits size = " + hits.size());
213 logger.info("*** num docs found = " + hits.getNumFound());
214
215 logger.info("*** start results = " + start_results);
216 logger.info("*** end results = " + end_results);
217 logger.info("*** max docs = " + max_docs);
218
219 // numDocsFound is the total number of matching docs in the collection
220 // as opposed to the number of documents returned in the hits list
221
222 solr_query_result.setTotalDocs((int) hits.getNumFound());
223
224 solr_query_result.setStartResults(start_results);
225 solr_query_result.setEndResults(start_results + hits.size());
226
227 // Output the matching documents
228 for (int i = 0; i < hits.size(); i++)
229 {
230 SolrDocument doc = hits.get(i);
231
232 // Need to think about how to support document term frequency. Make zero for now
233 int doc_term_freq = 0;
234 String docOID = (String) doc.get("docOID");
235 Float score = (Float) doc.get("score");
236
237 logger.info("**** docOID = " + docOID);
238 logger.info("**** score = " + score);
239
240 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq);
241 }
242 }
243 else
244 {
245 solr_query_result.setTotalDocs(0);
246
247 solr_query_result.setStartResults(0);
248 solr_query_result.setEndResults(0);
249 }
250
251 solr_query_result.setFacetResults(solrResponse.getFacetFields());
252 }
253 catch (SolrServerException server_exception)
254 {
255 server_exception.printStackTrace();
256 solr_query_result.setError(SolrQueryResult.SERVER_ERROR);
257 }
258
259 return solr_query_result;
260 }
261
262 //Greenstone universe operates with a base of 1 for "start_results"
263 //But Solr operates from 0
264 public void setStartResults(int start_results)
265 {
266 if (start_results < 0)
267 {
268 start_results = 0;
269 }
270 this.start_results = start_results - 1;
271 }
272
273 public void cleanUp()
274 {
275 super.cleanUp();
276 }
277}
Note: See TracBrowser for help on using the repository browser.