source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 27872

Last change on this file since 27872 was 27872, checked in by davidb, 11 years ago

Null pointer check needed for when none of the solr cores have been loaded in (because no one has entered a Solr collection)

  • Property svn:executable set to *
File size: 14.6 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Element;
46import org.w3c.dom.NodeList;
47
48public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
49{
50 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
51
52 static protected CoreContainer all_solr_cores = null;
53
54 protected HashMap solr_core_cache;
55 protected SolrQueryWrapper solr_src = null;
56
57 protected ArrayList<String> _facets = new ArrayList<String>();
58
59 public GS2SolrSearch()
60 {
61 does_faceting = true;
62 // Used to store the solr cores that match the required 'level'
63 // of search (e.g. either document-level=>didx, or
64 // section-level=>sidx. The hashmap is filled out on demand
65 // based on 'level' parameter passed in to 'setUpQueryer()'
66
67 solr_core_cache = new HashMap();
68
69 if (all_solr_cores == null)
70 {
71 // Share one CoreContainer across all sites/collections
72 try
73 {
74 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
75 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
76
77 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
78
79 all_solr_cores = new CoreContainer(solr_home_str);
80 }
81 catch (Exception e)
82 {
83 e.printStackTrace();
84 }
85 }
86
87 this.solr_src = new SolrQueryWrapper();
88 }
89
90 /** configure this service */
91 public boolean configure(Element info, Element extra_info)
92 {
93 boolean success = super.configure(info, extra_info);
94
95 // 1. Make the CoreContainer reload solr.xml
96 // This is particularly needed for when activate.pl is executed during
97 // a running GS3 server. At that point, the solr collection is reactivated and
98 // we need to tell Greenstone that the solr index has changed. This requires
99 // the CoreContainer to reload the solr.xml file, and it all works again.
100
101 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
102
103 // Reload the updated solr.xml into the CoreContainer
104 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
105 try {
106 String solr_home_str = all_solr_cores.getSolrHome();
107 File solr_home = new File(solr_home_str);
108 File solr_xml = new File( solr_home,"solr.xml" );
109
110 all_solr_cores.load(solr_home_str,solr_xml);
111
112 } catch (Exception e) {
113 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
114 e.printStackTrace();
115 return false;
116 }
117
118 if(!success) {
119 return false;
120 }
121
122 // 2. Setting up facets
123 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
124 NodeList facet_list = info.getElementsByTagName("facet");
125 for (int i=0; i<facet_list.getLength(); i++) {
126 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
127 }
128 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
129
130 // ArrayList<String> chosenFacets = new ArrayList<String>();
131 // for (int i = 0; i < configIndexElems.getLength(); i++)
132 // {
133 // Element current = (Element) configIndexElems.item(i);
134 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
135 // {
136 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
137 // }
138 // }
139
140 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
141 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
142
143 // for (int j = 0; j < buildIndexElems.getLength(); j++)
144 // {
145 // Element current = (Element) buildIndexElems.item(j);
146 // for (int i = 0; i < chosenFacets.size(); i++)
147 // {
148 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
149 // {
150 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
151 // }
152 // }
153 // }
154
155 return true;
156 }
157
158 public void cleanUp()
159 {
160 super.cleanUp();
161 this.solr_src.cleanUp();
162
163 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
164 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
165 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
166 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
167 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
168
169 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
170 solr_core_cache.clear();
171
172 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
173 String collection_core_name_prefix = getCollectionCoreNamePrefix();
174
175 if (all_solr_cores!=null) {
176 Collection<String> coreNames = all_solr_cores.getCoreNames();
177 if(!coreNames.isEmpty()) {
178 Iterator<String> coreIterator = coreNames.iterator();
179 while(coreIterator.hasNext()) {
180
181 String solrCoreName = coreIterator.next();
182 if(solrCoreName.startsWith(collection_core_name_prefix)) {
183
184 logger.error("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
185
186 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
187 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
188 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
189 while(!solr_core.isClosed()) {
190 logger.error("@@@@@@ " + solrCoreName + " was not closed. Closing....");
191 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
192 }
193 if(solr_core.isClosed()) {
194 logger.error("@@@@@@ " + solrCoreName + " is closed.");
195 }
196 solr_core = null;
197 }
198 }
199 }
200 }
201
202 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
203 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
204 // is being deactivated
205
206 if (all_solr_cores!=null) {
207 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
208 if(coreNamesRemaining.isEmpty()) {
209 logger.error("**** CoreContainer contains 0 solrCores. Shutting down...");
210
211 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
212 all_solr_cores = null;
213 }
214 else { // else part is just for debugging
215 Iterator coreIterator = coreNamesRemaining.iterator();
216 while(coreIterator.hasNext()) {
217 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
218 }
219 }
220 }
221 }
222
223 /** methods to handle actually doing the query */
224
225 /** do any initialisation of the query object */
226 protected boolean setUpQueryer(HashMap params)
227 {
228 this.solr_src.clearFacets();
229 this.solr_src.clearFacetQueries();
230
231 for (int i = 0; i < _facets.size(); i++)
232 {
233 this.solr_src.addFacet(_facets.get(i));
234 }
235
236 String index = "didx";
237 String physical_index_language_name = null;
238 String physical_sub_index_name = null;
239 int maxdocs = 100;
240 int hits_per_page = 20;
241 int start_page = 1;
242 // set up the query params
243 Set entries = params.entrySet();
244 Iterator i = entries.iterator();
245 while (i.hasNext())
246 {
247 Map.Entry m = (Map.Entry) i.next();
248 String name = (String) m.getKey();
249 String value = (String) m.getValue();
250
251 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
252 {
253 maxdocs = Integer.parseInt(value);
254 }
255 else if (name.equals(HITS_PER_PAGE_PARAM))
256 {
257 hits_per_page = Integer.parseInt(value);
258 }
259 else if (name.equals(START_PAGE_PARAM))
260 {
261 start_page = Integer.parseInt(value);
262 }
263 else if (name.equals(MATCH_PARAM))
264 {
265 if (value.equals(MATCH_PARAM_ALL))
266 {
267 this.solr_src.setDefaultConjunctionOperator("AND");
268 }
269 else
270 {
271 this.solr_src.setDefaultConjunctionOperator("OR");
272 }
273 }
274 else if (name.equals(RANK_PARAM))
275 {
276 if (value.equals(RANK_PARAM_RANK_VALUE))
277 {
278 value = null;
279 }
280 this.solr_src.setSortField(value);
281 }
282 else if (name.equals(LEVEL_PARAM))
283 {
284 if (value.toUpperCase().equals("SEC"))
285 {
286 index = "sidx";
287 }
288 else
289 {
290 index = "didx";
291 }
292 }
293 else if (name.equals("facets") && value.length() > 0)
294 {
295 String[] facets = value.split(",");
296
297 for (String facet : facets)
298 {
299 this.solr_src.addFacet(facet);
300 }
301 }
302 else if (name.equals("facetQueries") && value.length() > 0)
303 {
304 this.solr_src.addFacetQuery(value);
305 }
306 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
307 {
308 physical_sub_index_name = value;
309 }
310 else if (name.equals(INDEX_LANGUAGE_PARAM))
311 {
312 physical_index_language_name = value;
313 } // ignore any others
314 }
315 // set up start and end results if necessary
316 int start_results = 1;
317 if (start_page != 1)
318 {
319 start_results = ((start_page - 1) * hits_per_page) + 1;
320 }
321 int end_results = hits_per_page * start_page;
322 this.solr_src.setStartResults(start_results);
323 this.solr_src.setEndResults(end_results);
324 this.solr_src.setMaxDocs(maxdocs);
325
326 if (index.equals("sidx") || index.equals("didx"))
327 {
328 if (physical_sub_index_name != null)
329 {
330 index += physical_sub_index_name;
331 }
332 if (physical_index_language_name != null)
333 {
334 index += physical_index_language_name;
335 }
336 }
337
338 // now we know the index level, we can dig out the required
339 // solr-core, (caching the result in 'solr_core_cache')
340 String core_name = getCollectionCoreNamePrefix() + "-" + index;
341
342 EmbeddedSolrServer solr_core = null;
343
344 if (!solr_core_cache.containsKey(core_name))
345 {
346 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
347
348 solr_core_cache.put(core_name, solr_core);
349 }
350 else
351 {
352 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
353 }
354
355 this.solr_src.setSolrCore(solr_core);
356 this.solr_src.initialise();
357 return true;
358 }
359
360 /** do the query */
361 protected Object runQuery(String query)
362 {
363 try
364 {
365 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
366 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
367
368 return sqr;
369 }
370 catch (Exception e)
371 {
372 logger.error("Exception happened in run query: ", e);
373 }
374
375 return null;
376 }
377
378 /** get the total number of docs that match */
379 protected long numDocsMatched(Object query_result)
380 {
381 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
382
383 }
384
385 /** get the list of doc ids */
386 protected String[] getDocIDs(Object query_result)
387 {
388 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
389 String[] doc_nums = new String[docs.size()];
390 for (int d = 0; d < docs.size(); d++)
391 {
392 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
393 doc_nums[d] = doc_num;
394 }
395 return doc_nums;
396 }
397
398 /** get the list of doc ranks */
399 protected String[] getDocRanks(Object query_result)
400 {
401 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
402 String[] doc_ranks = new String[docs.size()];
403 for (int d = 0; d < docs.size(); d++)
404 {
405 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
406 }
407 return doc_ranks;
408 }
409
410 /** add in term info if available */
411 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
412 {
413 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
414
415 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
416 for (int t = 0; t < terms.size(); t++)
417 {
418 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
419
420 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
421 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
422 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
423 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
424 term_elem.setAttribute(FIELD_ATT, term_info.field_);
425 term_list.appendChild(term_elem);
426 }
427
428 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
429 for (int t = 0; t < stopwords.size(); t++)
430 {
431 String stopword = (String) stopwords.get(t);
432
433 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
434 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
435 term_list.appendChild(stopword_elem);
436 }
437
438 return true;
439 }
440
441 protected ArrayList<FacetWrapper> getFacets(Object query_result)
442 {
443 if (!(query_result instanceof SolrQueryResult))
444 {
445 return null;
446 }
447
448 SolrQueryResult result = (SolrQueryResult) query_result;
449 List<FacetField> facets = result.getFacetResults();
450
451 if (facets == null)
452 {
453 return null;
454 }
455
456 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
457
458 for (FacetField facet : facets)
459 {
460 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
461 String name = wrap.getName();
462 String display_name = "Poo";
463 //wrap.setDisplayName(display_name);
464
465 newFacetList.add(wrap);
466 }
467
468 return newFacetList;
469 }
470
471
472 protected String getCollectionCoreNamePrefix() {
473 String site_name = this.router.getSiteName();
474 String coll_name = this.cluster_name;
475 String collection_core_name_prefix = site_name + "-" + coll_name;
476 return collection_core_name_prefix;
477 }
478}
Note: See TracBrowser for help on using the repository browser.