source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 25863

Last change on this file since 25863 was 25863, checked in by sjm84, 12 years ago

Some changes to enable facet searching

  • Property svn:executable set to *
File size: 9.6 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.Map;
27import java.util.Set;
28import java.util.Vector;
29
30import org.apache.log4j.Logger;
31import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
32import org.apache.solr.core.CoreContainer;
33import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
34import org.greenstone.gsdl3.util.GSFile;
35import org.greenstone.gsdl3.util.GSXML;
36import org.greenstone.gsdl3.util.SolrQueryWrapper;
37import org.greenstone.util.GlobalProperties;
38import org.w3c.dom.Element;
39import org.w3c.dom.NodeList;
40
41public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
42{
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
44
45 static protected CoreContainer all_solr_cores = null;
46
47 protected HashMap solr_core_cache;
48 protected SolrQueryWrapper solr_src = null;
49
50 protected ArrayList<String> _facets = new ArrayList<String>();
51
52 public GS2SolrSearch()
53 {
54 // Used to store the solr cores that match the required 'level'
55 // of search (e.g. either document-level=>didx, or
56 // section-level=>sidx. The hashmap is filled out on demand
57 // based on 'level' parameter passed in to 'setUpQueryer()'
58
59 solr_core_cache = new HashMap();
60
61 if (all_solr_cores == null)
62 {
63 // Share one CoreContainer across all sites/collections
64 try
65 {
66 String gsdl3_home = GlobalProperties.getGSDL3Home();
67 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
68
69 String solr_home_str = GSFile.extHome(gsdl3_home, solr_ext_name);
70 File solr_home = new File(solr_home_str);
71 File solr_xml = new File(solr_home, "solr.xml");
72
73 all_solr_cores = new CoreContainer(solr_home_str, solr_xml);
74 }
75 catch (Exception e)
76 {
77 e.printStackTrace();
78 }
79 }
80
81 this.solr_src = new SolrQueryWrapper();
82 }
83
84 /** configure this service */
85 public boolean configure(Element info, Element extra_info)
86 {
87 if (!super.configure(info, extra_info))
88 {
89 return false;
90 }
91
92 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
93 NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
94
95 ArrayList<String> chosenFacets = new ArrayList<String>();
96 for (int i = 0; i < configIndexElems.getLength(); i++)
97 {
98 Element current = (Element) configIndexElems.item(i);
99 if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
100 {
101 chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
102 }
103 }
104
105 Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
106 NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
107
108 for(int j = 0; j < buildIndexElems.getLength(); j++)
109 {
110 Element current = (Element) buildIndexElems.item(j);
111 for(int i = 0; i < chosenFacets.size(); i++)
112 {
113 if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
114 {
115 _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
116 }
117 }
118 }
119
120 return true;
121 }
122
123 public void cleanUp()
124 {
125 super.cleanUp();
126 this.solr_src.cleanUp();
127 all_solr_cores.shutdown();
128 }
129
130 /** methods to handle actually doing the query */
131
132 /** do any initialisation of the query object */
133 protected boolean setUpQueryer(HashMap params)
134 {
135 this.solr_src.clearFacets();
136 this.solr_src.clearFacetQueries();
137
138 for (int i = 0; i < _facets.size(); i++)
139 {
140 this.solr_src.addFacet(_facets.get(i));
141 }
142
143 String index = "didx";
144 String physical_index_language_name = null;
145 String physical_sub_index_name = null;
146 int maxdocs = 100;
147 int hits_per_page = 20;
148 int start_page = 1;
149 // set up the query params
150 Set entries = params.entrySet();
151 Iterator i = entries.iterator();
152 while (i.hasNext())
153 {
154 Map.Entry m = (Map.Entry) i.next();
155 String name = (String) m.getKey();
156 String value = (String) m.getValue();
157
158 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
159 {
160 maxdocs = Integer.parseInt(value);
161 }
162 else if (name.equals(HITS_PER_PAGE_PARAM))
163 {
164 hits_per_page = Integer.parseInt(value);
165 }
166 else if (name.equals(START_PAGE_PARAM))
167 {
168 start_page = Integer.parseInt(value);
169 }
170 else if (name.equals(MATCH_PARAM))
171 {
172 if (value.equals(MATCH_PARAM_ALL))
173 {
174 this.solr_src.setDefaultConjunctionOperator("AND");
175 }
176 else
177 {
178 this.solr_src.setDefaultConjunctionOperator("OR");
179 }
180 }
181 else if (name.equals(RANK_PARAM))
182 {
183 if (value.equals(RANK_PARAM_RANK_VALUE))
184 {
185 value = null;
186 }
187 this.solr_src.setSortField(value);
188 }
189 else if (name.equals(LEVEL_PARAM))
190 {
191 if (value.toUpperCase().equals("SEC"))
192 {
193 index = "sidx";
194 }
195 else
196 {
197 index = "didx";
198 }
199 }
200 else if (name.equals("facets") && value.length() > 0)
201 {
202 String[] facets = value.split(",");
203
204 for (String facet : facets)
205 {
206 this.solr_src.addFacet(facet);
207 }
208 }
209 else if (name.equals("facetQueries") && value.length() > 0)
210 {
211 this.solr_src.addFacetQuery(value);
212 }
213 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
214 {
215 physical_sub_index_name = value;
216 }
217 else if (name.equals(INDEX_LANGUAGE_PARAM))
218 {
219 physical_index_language_name = value;
220 } // ignore any others
221 }
222 // set up start and end results if necessary
223 int start_results = 1;
224 if (start_page != 1)
225 {
226 start_results = ((start_page - 1) * hits_per_page) + 1;
227 }
228 int end_results = hits_per_page * start_page;
229 this.solr_src.setStartResults(start_results);
230 this.solr_src.setEndResults(end_results);
231 this.solr_src.setMaxDocs(maxdocs);
232
233 if (index.equals("sidx") || index.equals("didx"))
234 {
235 if (physical_sub_index_name != null)
236 {
237 index += physical_sub_index_name;
238 }
239 if (physical_index_language_name != null)
240 {
241 index += physical_index_language_name;
242 }
243 }
244
245 // now we know the index level, we can dig out the required
246 // solr-core, (caching the result in 'solr_core_cache')
247
248 String site_name = this.router.getSiteName();
249 String coll_name = this.cluster_name;
250
251 String core_name = site_name + "-" + coll_name + "-" + index;
252
253 EmbeddedSolrServer solr_core = null;
254
255 if (!solr_core_cache.containsKey(core_name))
256 {
257 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
258
259 solr_core_cache.put(core_name, solr_core);
260 }
261 else
262 {
263 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
264 }
265
266 this.solr_src.setSolrCore(solr_core);
267 this.solr_src.initialise();
268 return true;
269 }
270
271 /** do the query */
272 protected Object runQuery(String query)
273 {
274 try
275 {
276 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
277 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
278
279 return sqr;
280 }
281 catch (Exception e)
282 {
283 logger.error("Exception happened in run query: ", e);
284 }
285
286 return null;
287 }
288
289 /** get the total number of docs that match */
290 protected long numDocsMatched(Object query_result)
291 {
292 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
293
294 }
295
296 /** get the list of doc ids */
297 protected String[] getDocIDs(Object query_result)
298 {
299 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
300 String[] doc_nums = new String[docs.size()];
301 for (int d = 0; d < docs.size(); d++)
302 {
303 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
304 doc_nums[d] = doc_num;
305 }
306 return doc_nums;
307 }
308
309 /** get the list of doc ranks */
310 protected String[] getDocRanks(Object query_result)
311 {
312 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
313 String[] doc_ranks = new String[docs.size()];
314 for (int d = 0; d < docs.size(); d++)
315 {
316 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
317 }
318 return doc_ranks;
319 }
320
321 /** add in term info if available */
322 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
323 {
324 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
325
326 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
327 for (int t = 0; t < terms.size(); t++)
328 {
329 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
330
331 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
332 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
333 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
334 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
335 term_elem.setAttribute(FIELD_ATT, term_info.field_);
336 term_list.appendChild(term_elem);
337 }
338
339 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
340 for (int t = 0; t < stopwords.size(); t++)
341 {
342 String stopword = (String) stopwords.get(t);
343
344 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
345 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
346 term_list.appendChild(stopword_elem);
347 }
348
349 return true;
350 }
351
352}
Note: See TracBrowser for help on using the repository browser.