source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 29355

Last change on this file since 29355 was 29355, checked in by ak19, 10 years ago

Overlooked a very important commit, but with corrections: solr cores should not be removed for solr/lucene 4.7.2 as they were for lucene/solr 3.3.0. However, all the solr cores do need to be shutdown on cleanUp() in order to not have the running GS3 server hold a lock on any of the cores, as this will prevent the GS3 tomcat from stopping fully, with part of tomcat still running in the background even if it looks like tomcat has stopped.

  • Property svn:executable set to *
File size: 16.4 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Document;
46import org.w3c.dom.Element;
47import org.w3c.dom.NodeList;
48
49public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
50{
51 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
52
53 static protected CoreContainer all_solr_cores = null;
54
55 protected HashMap solr_core_cache;
56 protected SolrQueryWrapper solr_src = null;
57
58 protected ArrayList<String> _facets = new ArrayList<String>();
59
60 public GS2SolrSearch()
61 {
62 does_faceting = true;
63 // Used to store the solr cores that match the required 'level'
64 // of search (e.g. either document-level=>didx, or
65 // section-level=>sidx. The hashmap is filled out on demand
66 // based on 'level' parameter passed in to 'setUpQueryer()'
67
68 solr_core_cache = new HashMap();
69
70 if (all_solr_cores == null)
71 {
72 // Share one CoreContainer across all sites/collections
73 try
74 {
75 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
76 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
77
78 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
79
80 all_solr_cores = new CoreContainer(solr_home_str);
81 }
82 catch (Exception e)
83 {
84 e.printStackTrace();
85 }
86 }
87
88 this.solr_src = new SolrQueryWrapper();
89 }
90
91 /** configure this service */
92 public boolean configure(Element info, Element extra_info)
93 {
94 boolean success = super.configure(info, extra_info);
95
96 // 1. Make the CoreContainer reload solr.xml
97 // This is particularly needed for when activate.pl is executed during
98 // a running GS3 server. At that point, the solr collection is reactivated and
99 // we need to tell Greenstone that the solr index has changed. This requires
100 // the CoreContainer to reload the solr.xml file, and it all works again.
101
102 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
103
104 // Reload the updated solr.xml into the CoreContainer
105 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
106 try {
107 String solr_home_str = all_solr_cores.getSolrHome();
108 File solr_home = new File(solr_home_str);
109 File solr_xml = new File( solr_home,"solr.xml" );
110
111 //all_solr_cores.load(solr_home_str,solr_xml);
112 all_solr_cores.load();
113
114 } catch (Exception e) {
115 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
116 e.printStackTrace();
117 return false;
118 }
119
120 if(!success) {
121 return false;
122 }
123
124 // 2. Setting up facets
125 // TODO - get these from build config, in case some haven't built
126 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
127 NodeList facet_list = info.getElementsByTagName("facet");
128 for (int i=0; i<facet_list.getLength(); i++) {
129 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
130 }
131 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
132
133 // ArrayList<String> chosenFacets = new ArrayList<String>();
134 // for (int i = 0; i < configIndexElems.getLength(); i++)
135 // {
136 // Element current = (Element) configIndexElems.item(i);
137 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
138 // {
139 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
140 // }
141 // }
142
143 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
144 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
145
146 // for (int j = 0; j < buildIndexElems.getLength(); j++)
147 // {
148 // Element current = (Element) buildIndexElems.item(j);
149 // for (int i = 0; i < chosenFacets.size(); i++)
150 // {
151 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
152 // {
153 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
154 // }
155 // }
156 // }
157
158 return true;
159 }
160
161 public void cleanUp()
162 {
163 super.cleanUp();
164 this.solr_src.cleanUp();
165
166
167 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
168 solr_core_cache.clear();
169
170 // 2. For solr 4.7.2., GLI (and ant stop from cmd) is unable to shutdown the tomcat server fully,
171 // IF any collection has been previewed AND if there are any solr collections in the collect folder.
172 // This is because although the GS3 server seems to have stopped running at this stage, running a
173 // `ps aux | grep tomcat` reveals that some part of tomcat is still running. It seems to be still
174 // holding on to the cores. Doing an all_cores.shutdown() here, stops GS3 from hanging on to the cores
175 // while still preserving the core desciptions in web/ext/solr.xml as needed when restarting the GS3 server.
176
177 // Need GS3 server (tomcat) to release the cores, else a part of tomcat is still running in the background
178 // on ant stop, holding a lock on the cores. Doing shutdown() preserves core descriptions in solr.xml
179 all_solr_cores.shutdown();
180 all_solr_cores = null;
181
182 // For solr 3.3.0's jetty server, but not for solr 4.7.2's jetty server:
183 /*
184
185 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
186 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
187 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
188 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
189 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
190
191 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
192 solr_core_cache.clear();
193
194 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
195 String collection_core_name_prefix = getCollectionCoreNamePrefix();
196
197 if (all_solr_cores!=null) {
198 Collection<String> coreNames = all_solr_cores.getCoreNames();
199 if(!coreNames.isEmpty()) {
200 Iterator<String> coreIterator = coreNames.iterator();
201 while(coreIterator.hasNext()) {
202
203 String solrCoreName = coreIterator.next();
204 if(solrCoreName.startsWith(collection_core_name_prefix)) {
205
206 logger.info("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
207
208 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
209 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
210 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
211 while(!solr_core.isClosed()) {
212 logger.warn("@@@@@@ " + solrCoreName + " was not closed. Closing....");
213 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
214 }
215 if(solr_core.isClosed()) {
216 logger.info("@@@@@@ " + solrCoreName + " is closed.");
217 }
218 solr_core = null;
219 }
220 }
221 }
222 }
223
224 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
225 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
226 // is being deactivated
227 if (all_solr_cores!=null) {
228 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
229 if(coreNamesRemaining.isEmpty()) {
230 logger.info("**** CoreContainer contains 0 solrCores. Shutting down...");
231
232 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
233 all_solr_cores = null;
234 }
235 else { // else part is just for debugging
236 Iterator coreIterator = coreNamesRemaining.iterator();
237 while(coreIterator.hasNext()) {
238 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
239 }
240 }
241 }
242 */
243 }
244
245 /** methods to handle actually doing the query */
246
247 /** do any initialisation of the query object */
248 protected boolean setUpQueryer(HashMap params)
249 {
250 this.solr_src.clearFacets();
251 this.solr_src.clearFacetQueries();
252
253 for (int i = 0; i < _facets.size(); i++)
254 {
255 this.solr_src.addFacet(_facets.get(i));
256 }
257
258 String index = "didx";
259 String physical_index_language_name = null;
260 String physical_sub_index_name = null;
261 int maxdocs = 100;
262 int hits_per_page = 20;
263 int start_page = 1;
264 // set up the query params
265 Set entries = params.entrySet();
266 Iterator i = entries.iterator();
267 while (i.hasNext())
268 {
269 Map.Entry m = (Map.Entry) i.next();
270 String name = (String) m.getKey();
271 String value = (String) m.getValue();
272
273 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
274
275 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
276 {
277 maxdocs = Integer.parseInt(value);
278 }
279 else if (name.equals(HITS_PER_PAGE_PARAM))
280 {
281 hits_per_page = Integer.parseInt(value);
282 }
283 else if (name.equals(START_PAGE_PARAM))
284 {
285 start_page = Integer.parseInt(value);
286 }
287 else if (name.equals(MATCH_PARAM))
288 {
289 if (value.equals(MATCH_PARAM_ALL))
290 {
291 this.solr_src.setDefaultConjunctionOperator("AND");
292 }
293 else
294 {
295 this.solr_src.setDefaultConjunctionOperator("OR");
296 }
297 }
298 else if (name.equals(RANK_PARAM))
299 {
300 if (value.equals(RANK_PARAM_RANK))
301 {
302 value = SolrQueryWrapper.SORT_BY_RANK;
303 } else if (value.equals(RANK_PARAM_NONE)) {
304 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
305 }
306
307 this.solr_src.setSortField(value);
308 }
309 else if (name.equals(SORT_ORDER_PARAM)) {
310 if (value.equals(SORT_ORDER_DESCENDING)) {
311 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
312 } else {
313 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
314 }
315 }
316 else if (name.equals(LEVEL_PARAM))
317 {
318 if (value.toUpperCase().equals("SEC"))
319 {
320 index = "sidx";
321 }
322 else
323 {
324 index = "didx";
325 }
326 }
327 // Would facets ever come in through params???
328 else if (name.equals("facets") && value.length() > 0)
329 {
330 String[] facets = value.split(",");
331
332 for (String facet : facets)
333 {
334 this.solr_src.addFacet(facet);
335 }
336 }
337 else if (name.equals("facetQueries") && value.length() > 0)
338 {
339 this.solr_src.addFacetQuery(value);
340 }
341 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
342 {
343 physical_sub_index_name = value;
344 }
345 else if (name.equals(INDEX_LANGUAGE_PARAM))
346 {
347 physical_index_language_name = value;
348 } // ignore any others
349 }
350 // set up start and end results if necessary
351 int start_results = 1;
352 if (start_page != 1)
353 {
354 start_results = ((start_page - 1) * hits_per_page) + 1;
355 }
356 int end_results = hits_per_page * start_page;
357 this.solr_src.setStartResults(start_results);
358 this.solr_src.setEndResults(end_results);
359 this.solr_src.setMaxDocs(maxdocs);
360
361 if (index.equals("sidx") || index.equals("didx"))
362 {
363 if (physical_sub_index_name != null)
364 {
365 index += physical_sub_index_name;
366 }
367 if (physical_index_language_name != null)
368 {
369 index += physical_index_language_name;
370 }
371 }
372
373 // now we know the index level, we can dig out the required
374 // solr-core, (caching the result in 'solr_core_cache')
375 String core_name = getCollectionCoreNamePrefix() + "-" + index;
376
377 EmbeddedSolrServer solr_core = null;
378
379 if (!solr_core_cache.containsKey(core_name))
380 {
381 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
382
383 solr_core_cache.put(core_name, solr_core);
384 }
385 else
386 {
387 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
388 }
389
390 this.solr_src.setSolrCore(solr_core);
391 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
392 this.solr_src.initialise();
393 return true;
394 }
395
396 /** do the query */
397 protected Object runQuery(String query)
398 {
399 try
400 {
401 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
402 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
403
404 return sqr;
405 }
406 catch (Exception e)
407 {
408 logger.error("Exception happened in run query: ", e);
409 }
410
411 return null;
412 }
413
414 /** get the total number of docs that match */
415 protected long numDocsMatched(Object query_result)
416 {
417 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
418
419 }
420
421 /** get the list of doc ids */
422 protected String[] getDocIDs(Object query_result)
423 {
424 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
425 String[] doc_nums = new String[docs.size()];
426 for (int d = 0; d < docs.size(); d++)
427 {
428 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
429 doc_nums[d] = doc_num;
430 }
431 return doc_nums;
432 }
433
434 /** get the list of doc ranks */
435 protected String[] getDocRanks(Object query_result)
436 {
437 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
438 String[] doc_ranks = new String[docs.size()];
439 for (int d = 0; d < docs.size(); d++)
440 {
441 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
442 }
443 return doc_ranks;
444 }
445
446 /** add in term info if available */
447 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
448 {
449 Document doc = term_list.getOwnerDocument();
450 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
451
452 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
453 for (int t = 0; t < terms.size(); t++)
454 {
455 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
456
457 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
458 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
459 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
460 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
461 term_elem.setAttribute(FIELD_ATT, term_info.field_);
462 term_list.appendChild(term_elem);
463 }
464
465 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
466 for (int t = 0; t < stopwords.size(); t++)
467 {
468 String stopword = (String) stopwords.get(t);
469
470 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
471 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
472 term_list.appendChild(stopword_elem);
473 }
474
475 return true;
476 }
477
478 protected ArrayList<FacetWrapper> getFacets(Object query_result)
479 {
480 if (!(query_result instanceof SolrQueryResult))
481 {
482 return null;
483 }
484
485 SolrQueryResult result = (SolrQueryResult) query_result;
486 List<FacetField> facets = result.getFacetResults();
487
488 if (facets == null)
489 {
490 return null;
491 }
492
493 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
494
495 for (FacetField facet : facets)
496 {
497 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
498 // String name = wrap.getName();
499 // String display_name = "Poo";
500 // wrap.setDisplayName(display_name);
501
502 newFacetList.add(wrap);
503 }
504
505 return newFacetList;
506 }
507
508
509 protected String getCollectionCoreNamePrefix() {
510 String site_name = this.router.getSiteName();
511 String coll_name = this.cluster_name;
512 String collection_core_name_prefix = site_name + "-" + coll_name;
513 return collection_core_name_prefix;
514 }
515}
Note: See TracBrowser for help on using the repository browser.