source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 25898

Last change on this file since 25898 was 25898, checked in by ak19, 12 years ago

GS2SolrSearch.cleanUp() called upon deactivating a solr collection no longer calls shutdown on the CoreContainer all_solr_cores, which would thereby shut down cores of other solr collections in GS3, but removes solrcores from the CoreContainer belonging to the collection being deactivated. Need to still test this on Windows, to make sure it doesn't hang upon ant stopping the GS3 server after searching a solr collection.

  • Property svn:executable set to *
File size: 14.0 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Element;
46import org.w3c.dom.NodeList;
47
48public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
49{
50 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
51
52 static protected CoreContainer all_solr_cores = null;
53
54 protected HashMap solr_core_cache;
55 protected SolrQueryWrapper solr_src = null;
56
57 protected ArrayList<String> _facets = new ArrayList<String>();
58
59 public GS2SolrSearch()
60 {
61 does_faceting = true;
62 // Used to store the solr cores that match the required 'level'
63 // of search (e.g. either document-level=>didx, or
64 // section-level=>sidx. The hashmap is filled out on demand
65 // based on 'level' parameter passed in to 'setUpQueryer()'
66
67 solr_core_cache = new HashMap();
68
69 if (all_solr_cores == null)
70 {
71 // Share one CoreContainer across all sites/collections
72 try
73 {
74 String gsdl3_home = GlobalProperties.getGSDL3Home();
75 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
76
77 String solr_home_str = GSFile.extHome(gsdl3_home, solr_ext_name);
78
79 all_solr_cores = new CoreContainer(solr_home_str);
80 }
81 catch (Exception e)
82 {
83 e.printStackTrace();
84 }
85 }
86
87 this.solr_src = new SolrQueryWrapper();
88 }
89
90 /** configure this service */
91 public boolean configure(Element info, Element extra_info)
92 {
93 boolean success = super.configure(info, extra_info);
94
95 // 1. Make the CoreContainer reload solr.xml
96 // This is particularly needed for when activate.pl is executed during
97 // a running GS3 server. At that point, the solr collection is reactivated and
98 // we need to tell Greenstone that the solr index has changed. This requires
99 // the CoreContainer to reload the solr.xml file, and it all works again.
100
101 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
102
103 // Reload the updated solr.xml into the CoreContainer
104 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
105 try {
106 String solr_home_str = all_solr_cores.getSolrHome();
107 File solr_home = new File(solr_home_str);
108 File solr_xml = new File( solr_home,"solr.xml" );
109
110 all_solr_cores.load(solr_home_str,solr_xml);
111
112 } catch (Exception e) {
113 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
114 e.printStackTrace();
115 return false;
116 }
117
118 if(!success) {
119 return false;
120 }
121
122 // 2. Setting up facets
123 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
124 NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
125
126 ArrayList<String> chosenFacets = new ArrayList<String>();
127 for (int i = 0; i < configIndexElems.getLength(); i++)
128 {
129 Element current = (Element) configIndexElems.item(i);
130 if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
131 {
132 chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
133 }
134 }
135
136 Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
137 NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
138
139 for (int j = 0; j < buildIndexElems.getLength(); j++)
140 {
141 Element current = (Element) buildIndexElems.item(j);
142 for (int i = 0; i < chosenFacets.size(); i++)
143 {
144 if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
145 {
146 _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
147 }
148 }
149 }
150
151 return true;
152 }
153
154 public void cleanUp()
155 {
156 super.cleanUp();
157 this.solr_src.cleanUp();
158
159 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
160 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
161 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
162 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
163 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
164
165 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
166 solr_core_cache.clear();
167
168 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
169 String collection_core_name_prefix = getCollectionCoreNamePrefix();
170
171 Collection<String> coreNames = all_solr_cores.getCoreNames();
172 if(!coreNames.isEmpty()) {
173 Iterator<String> coreIterator = coreNames.iterator();
174 while(coreIterator.hasNext()) {
175
176 String solrCoreName = coreIterator.next();
177 if(solrCoreName.startsWith(collection_core_name_prefix)) {
178
179 logger.error("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
180
181 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
182 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
183 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
184 while(!solr_core.isClosed()) {
185 logger.error("@@@@@@ " + solrCoreName + " was not closed. Closing....");
186 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
187 }
188 if(solr_core.isClosed()) {
189 logger.error("@@@@@@ " + solrCoreName + " is closed.");
190 }
191 solr_core = null;
192 }
193 }
194 }
195
196 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
197 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
198 // is being deactivated
199 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
200 if(coreNamesRemaining.isEmpty()) {
201 logger.error("**** CoreContainer contains 0 solrCores. Shutting down...");
202
203 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
204 all_solr_cores = null;
205 }
206 else { // else part is just for debugging
207 Iterator coreIterator = coreNamesRemaining.iterator();
208 while(coreIterator.hasNext()) {
209 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
210 }
211 }
212 }
213
214 /** methods to handle actually doing the query */
215
216 /** do any initialisation of the query object */
217 protected boolean setUpQueryer(HashMap params)
218 {
219 this.solr_src.clearFacets();
220 this.solr_src.clearFacetQueries();
221
222 for (int i = 0; i < _facets.size(); i++)
223 {
224 this.solr_src.addFacet(_facets.get(i));
225 }
226
227 String index = "didx";
228 String physical_index_language_name = null;
229 String physical_sub_index_name = null;
230 int maxdocs = 100;
231 int hits_per_page = 20;
232 int start_page = 1;
233 // set up the query params
234 Set entries = params.entrySet();
235 Iterator i = entries.iterator();
236 while (i.hasNext())
237 {
238 Map.Entry m = (Map.Entry) i.next();
239 String name = (String) m.getKey();
240 String value = (String) m.getValue();
241
242 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
243 {
244 maxdocs = Integer.parseInt(value);
245 }
246 else if (name.equals(HITS_PER_PAGE_PARAM))
247 {
248 hits_per_page = Integer.parseInt(value);
249 }
250 else if (name.equals(START_PAGE_PARAM))
251 {
252 start_page = Integer.parseInt(value);
253 }
254 else if (name.equals(MATCH_PARAM))
255 {
256 if (value.equals(MATCH_PARAM_ALL))
257 {
258 this.solr_src.setDefaultConjunctionOperator("AND");
259 }
260 else
261 {
262 this.solr_src.setDefaultConjunctionOperator("OR");
263 }
264 }
265 else if (name.equals(RANK_PARAM))
266 {
267 if (value.equals(RANK_PARAM_RANK_VALUE))
268 {
269 value = null;
270 }
271 this.solr_src.setSortField(value);
272 }
273 else if (name.equals(LEVEL_PARAM))
274 {
275 if (value.toUpperCase().equals("SEC"))
276 {
277 index = "sidx";
278 }
279 else
280 {
281 index = "didx";
282 }
283 }
284 else if (name.equals("facets") && value.length() > 0)
285 {
286 String[] facets = value.split(",");
287
288 for (String facet : facets)
289 {
290 this.solr_src.addFacet(facet);
291 }
292 }
293 else if (name.equals("facetQueries") && value.length() > 0)
294 {
295 this.solr_src.addFacetQuery(value);
296 }
297 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
298 {
299 physical_sub_index_name = value;
300 }
301 else if (name.equals(INDEX_LANGUAGE_PARAM))
302 {
303 physical_index_language_name = value;
304 } // ignore any others
305 }
306 // set up start and end results if necessary
307 int start_results = 1;
308 if (start_page != 1)
309 {
310 start_results = ((start_page - 1) * hits_per_page) + 1;
311 }
312 int end_results = hits_per_page * start_page;
313 this.solr_src.setStartResults(start_results);
314 this.solr_src.setEndResults(end_results);
315 this.solr_src.setMaxDocs(maxdocs);
316
317 if (index.equals("sidx") || index.equals("didx"))
318 {
319 if (physical_sub_index_name != null)
320 {
321 index += physical_sub_index_name;
322 }
323 if (physical_index_language_name != null)
324 {
325 index += physical_index_language_name;
326 }
327 }
328
329 // now we know the index level, we can dig out the required
330 // solr-core, (caching the result in 'solr_core_cache')
331 String core_name = getCollectionCoreNamePrefix() + "-" + index;
332
333 EmbeddedSolrServer solr_core = null;
334
335 if (!solr_core_cache.containsKey(core_name))
336 {
337 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
338
339 solr_core_cache.put(core_name, solr_core);
340 }
341 else
342 {
343 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
344 }
345
346 this.solr_src.setSolrCore(solr_core);
347 this.solr_src.initialise();
348 return true;
349 }
350
351 /** do the query */
352 protected Object runQuery(String query)
353 {
354 try
355 {
356 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
357 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
358
359 return sqr;
360 }
361 catch (Exception e)
362 {
363 logger.error("Exception happened in run query: ", e);
364 }
365
366 return null;
367 }
368
369 /** get the total number of docs that match */
370 protected long numDocsMatched(Object query_result)
371 {
372 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
373
374 }
375
376 /** get the list of doc ids */
377 protected String[] getDocIDs(Object query_result)
378 {
379 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
380 String[] doc_nums = new String[docs.size()];
381 for (int d = 0; d < docs.size(); d++)
382 {
383 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
384 doc_nums[d] = doc_num;
385 }
386 return doc_nums;
387 }
388
389 /** get the list of doc ranks */
390 protected String[] getDocRanks(Object query_result)
391 {
392 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
393 String[] doc_ranks = new String[docs.size()];
394 for (int d = 0; d < docs.size(); d++)
395 {
396 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
397 }
398 return doc_ranks;
399 }
400
401 /** add in term info if available */
402 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
403 {
404 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
405
406 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
407 for (int t = 0; t < terms.size(); t++)
408 {
409 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
410
411 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
412 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
413 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
414 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
415 term_elem.setAttribute(FIELD_ATT, term_info.field_);
416 term_list.appendChild(term_elem);
417 }
418
419 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
420 for (int t = 0; t < stopwords.size(); t++)
421 {
422 String stopword = (String) stopwords.get(t);
423
424 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
425 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
426 term_list.appendChild(stopword_elem);
427 }
428
429 return true;
430 }
431
432 protected ArrayList<FacetWrapper> getFacets(Object query_result)
433 {
434 if (!(query_result instanceof SolrQueryResult))
435 {
436 return null;
437 }
438
439 SolrQueryResult result = (SolrQueryResult) query_result;
440 List<FacetField> facets = result.getFacetResults();
441
442 if (facets == null)
443 {
444 return null;
445 }
446
447 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
448
449 for (FacetField facet : facets)
450 {
451 newFacetList.add(new SolrFacetWrapper(facet));
452 }
453
454 return newFacetList;
455 }
456
457
458 protected String getCollectionCoreNamePrefix() {
459 String site_name = this.router.getSiteName();
460 String coll_name = this.cluster_name;
461 String collection_core_name_prefix = site_name + "-" + coll_name;
462 return collection_core_name_prefix;
463 }
464}
Note: See TracBrowser for help on using the repository browser.