source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 25892

Last change on this file since 25892 was 25892, checked in by ak19, 12 years ago

First commit for allowing a SOLR collection to be activated without having to run ant restart to get Greenstone to allow searching the updated solr index.

  • Property svn:executable set to *
File size: 11.2 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.List;
27import java.util.Map;
28import java.util.Set;
29import java.util.Vector;
30
31import org.apache.log4j.Logger;
32import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
33import org.apache.solr.client.solrj.response.FacetField;
34import org.apache.solr.core.CoreContainer;
35import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
36import org.greenstone.gsdl3.util.FacetWrapper;
37import org.greenstone.gsdl3.util.GSFile;
38import org.greenstone.gsdl3.util.GSXML;
39import org.greenstone.gsdl3.util.SolrFacetWrapper;
40import org.greenstone.gsdl3.util.SolrQueryResult;
41import org.greenstone.gsdl3.util.SolrQueryWrapper;
42import org.greenstone.util.GlobalProperties;
43import org.w3c.dom.Element;
44import org.w3c.dom.NodeList;
45
46public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
47{
48 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
49
50 static protected CoreContainer all_solr_cores = null;
51
52 protected HashMap solr_core_cache;
53 protected SolrQueryWrapper solr_src = null;
54
55 protected ArrayList<String> _facets = new ArrayList<String>();
56
57 public GS2SolrSearch()
58 {
59 does_faceting = true;
60 // Used to store the solr cores that match the required 'level'
61 // of search (e.g. either document-level=>didx, or
62 // section-level=>sidx. The hashmap is filled out on demand
63 // based on 'level' parameter passed in to 'setUpQueryer()'
64
65 solr_core_cache = new HashMap();
66
67 if (all_solr_cores == null)
68 {
69 // Share one CoreContainer across all sites/collections
70 try
71 {
72 String gsdl3_home = GlobalProperties.getGSDL3Home();
73 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
74
75 String solr_home_str = GSFile.extHome(gsdl3_home, solr_ext_name);
76
77 all_solr_cores = new CoreContainer(solr_home_str);
78 }
79 catch (Exception e)
80 {
81 e.printStackTrace();
82 }
83 }
84
85 this.solr_src = new SolrQueryWrapper();
86 }
87
88 /** configure this service */
89 public boolean configure(Element info, Element extra_info)
90 {
91 boolean success = super.configure(info, extra_info);
92
93 // 1. Make the CoreContainer reload solr.xml
94 // This is particularly needed for when activate.pl is executed during
95 // a running GS3 server. At that point, the solr collection is reactivated and
96 // we need to tell Greenstone that the solr index has changed. This requires
97 // the CoreContainer to reload the solr.xml file, and it all works again.
98
99 solr_core_cache.clear(); // clear the map of existing solr cores
100
101 // Reload the updated solr.xml into the CoreContainer
102 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
103 try {
104 String solr_home_str = all_solr_cores.getSolrHome();
105 File solr_home = new File(solr_home_str);
106 File solr_xml = new File( solr_home,"solr.xml" );
107
108 all_solr_cores.load(solr_home_str,solr_xml);
109 } catch (Exception e) {
110 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
111 e.printStackTrace();
112 return false;
113 }
114
115 if(!success) {
116 return false;
117 }
118
119 // 2. Setting up facets
120 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
121 NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
122
123 ArrayList<String> chosenFacets = new ArrayList<String>();
124 for (int i = 0; i < configIndexElems.getLength(); i++)
125 {
126 Element current = (Element) configIndexElems.item(i);
127 if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
128 {
129 chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
130 }
131 }
132
133 Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
134 NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
135
136 for (int j = 0; j < buildIndexElems.getLength(); j++)
137 {
138 Element current = (Element) buildIndexElems.item(j);
139 for (int i = 0; i < chosenFacets.size(); i++)
140 {
141 if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
142 {
143 _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
144 }
145 }
146 }
147
148 return true;
149 }
150
151 public void cleanUp()
152 {
153 super.cleanUp();
154 this.solr_src.cleanUp();
155 all_solr_cores.shutdown();
156 }
157
158 /** methods to handle actually doing the query */
159
160 /** do any initialisation of the query object */
161 protected boolean setUpQueryer(HashMap params)
162 {
163 this.solr_src.clearFacets();
164 this.solr_src.clearFacetQueries();
165
166 for (int i = 0; i < _facets.size(); i++)
167 {
168 this.solr_src.addFacet(_facets.get(i));
169 }
170
171 String index = "didx";
172 String physical_index_language_name = null;
173 String physical_sub_index_name = null;
174 int maxdocs = 100;
175 int hits_per_page = 20;
176 int start_page = 1;
177 // set up the query params
178 Set entries = params.entrySet();
179 Iterator i = entries.iterator();
180 while (i.hasNext())
181 {
182 Map.Entry m = (Map.Entry) i.next();
183 String name = (String) m.getKey();
184 String value = (String) m.getValue();
185
186 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
187 {
188 maxdocs = Integer.parseInt(value);
189 }
190 else if (name.equals(HITS_PER_PAGE_PARAM))
191 {
192 hits_per_page = Integer.parseInt(value);
193 }
194 else if (name.equals(START_PAGE_PARAM))
195 {
196 start_page = Integer.parseInt(value);
197 }
198 else if (name.equals(MATCH_PARAM))
199 {
200 if (value.equals(MATCH_PARAM_ALL))
201 {
202 this.solr_src.setDefaultConjunctionOperator("AND");
203 }
204 else
205 {
206 this.solr_src.setDefaultConjunctionOperator("OR");
207 }
208 }
209 else if (name.equals(RANK_PARAM))
210 {
211 if (value.equals(RANK_PARAM_RANK_VALUE))
212 {
213 value = null;
214 }
215 this.solr_src.setSortField(value);
216 }
217 else if (name.equals(LEVEL_PARAM))
218 {
219 if (value.toUpperCase().equals("SEC"))
220 {
221 index = "sidx";
222 }
223 else
224 {
225 index = "didx";
226 }
227 }
228 else if (name.equals("facets") && value.length() > 0)
229 {
230 String[] facets = value.split(",");
231
232 for (String facet : facets)
233 {
234 this.solr_src.addFacet(facet);
235 }
236 }
237 else if (name.equals("facetQueries") && value.length() > 0)
238 {
239 this.solr_src.addFacetQuery(value);
240 }
241 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
242 {
243 physical_sub_index_name = value;
244 }
245 else if (name.equals(INDEX_LANGUAGE_PARAM))
246 {
247 physical_index_language_name = value;
248 } // ignore any others
249 }
250 // set up start and end results if necessary
251 int start_results = 1;
252 if (start_page != 1)
253 {
254 start_results = ((start_page - 1) * hits_per_page) + 1;
255 }
256 int end_results = hits_per_page * start_page;
257 this.solr_src.setStartResults(start_results);
258 this.solr_src.setEndResults(end_results);
259 this.solr_src.setMaxDocs(maxdocs);
260
261 if (index.equals("sidx") || index.equals("didx"))
262 {
263 if (physical_sub_index_name != null)
264 {
265 index += physical_sub_index_name;
266 }
267 if (physical_index_language_name != null)
268 {
269 index += physical_index_language_name;
270 }
271 }
272
273 // now we know the index level, we can dig out the required
274 // solr-core, (caching the result in 'solr_core_cache')
275
276 String site_name = this.router.getSiteName();
277 String coll_name = this.cluster_name;
278
279 String core_name = site_name + "-" + coll_name + "-" + index;
280
281 EmbeddedSolrServer solr_core = null;
282
283 if (!solr_core_cache.containsKey(core_name))
284 {
285 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
286
287 solr_core_cache.put(core_name, solr_core);
288 }
289 else
290 {
291 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
292 }
293
294 this.solr_src.setSolrCore(solr_core);
295 this.solr_src.initialise();
296 return true;
297 }
298
299 /** do the query */
300 protected Object runQuery(String query)
301 {
302 try
303 {
304 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
305 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
306
307 return sqr;
308 }
309 catch (Exception e)
310 {
311 logger.error("Exception happened in run query: ", e);
312 }
313
314 return null;
315 }
316
317 /** get the total number of docs that match */
318 protected long numDocsMatched(Object query_result)
319 {
320 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
321
322 }
323
324 /** get the list of doc ids */
325 protected String[] getDocIDs(Object query_result)
326 {
327 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
328 String[] doc_nums = new String[docs.size()];
329 for (int d = 0; d < docs.size(); d++)
330 {
331 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
332 doc_nums[d] = doc_num;
333 }
334 return doc_nums;
335 }
336
337 /** get the list of doc ranks */
338 protected String[] getDocRanks(Object query_result)
339 {
340 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
341 String[] doc_ranks = new String[docs.size()];
342 for (int d = 0; d < docs.size(); d++)
343 {
344 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
345 }
346 return doc_ranks;
347 }
348
349 /** add in term info if available */
350 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
351 {
352 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
353
354 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
355 for (int t = 0; t < terms.size(); t++)
356 {
357 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
358
359 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
360 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
361 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
362 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
363 term_elem.setAttribute(FIELD_ATT, term_info.field_);
364 term_list.appendChild(term_elem);
365 }
366
367 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
368 for (int t = 0; t < stopwords.size(); t++)
369 {
370 String stopword = (String) stopwords.get(t);
371
372 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
373 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
374 term_list.appendChild(stopword_elem);
375 }
376
377 return true;
378 }
379
380 protected ArrayList<FacetWrapper> getFacets(Object query_result)
381 {
382 if (!(query_result instanceof SolrQueryResult))
383 {
384 return null;
385 }
386
387 SolrQueryResult result = (SolrQueryResult) query_result;
388 List<FacetField> facets = result.getFacetResults();
389
390 if (facets == null)
391 {
392 return null;
393 }
394
395 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
396
397 for (FacetField facet : facets)
398 {
399 newFacetList.add(new SolrFacetWrapper(facet));
400 }
401
402 return newFacetList;
403 }
404}
Note: See TracBrowser for help on using the repository browser.