source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 27867

Last change on this file since 27867 was 27867, checked in by davidb, 11 years ago

Change to use gsdl3_writablehome to help Solr run off CDROM/DVD

  • Property svn:executable set to *
File size: 14.5 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Element;
46import org.w3c.dom.NodeList;
47
48public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
49{
50 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
51
52 static protected CoreContainer all_solr_cores = null;
53
54 protected HashMap solr_core_cache;
55 protected SolrQueryWrapper solr_src = null;
56
57 protected ArrayList<String> _facets = new ArrayList<String>();
58
59 public GS2SolrSearch()
60 {
61 does_faceting = true;
62 // Used to store the solr cores that match the required 'level'
63 // of search (e.g. either document-level=>didx, or
64 // section-level=>sidx. The hashmap is filled out on demand
65 // based on 'level' parameter passed in to 'setUpQueryer()'
66
67 solr_core_cache = new HashMap();
68
69 if (all_solr_cores == null)
70 {
71 // Share one CoreContainer across all sites/collections
72 try
73 {
74 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
75 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
76
77 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
78
79 all_solr_cores = new CoreContainer(solr_home_str);
80 }
81 catch (Exception e)
82 {
83 e.printStackTrace();
84 }
85 }
86
87 this.solr_src = new SolrQueryWrapper();
88 }
89
90 /** configure this service */
91 public boolean configure(Element info, Element extra_info)
92 {
93 boolean success = super.configure(info, extra_info);
94
95 // 1. Make the CoreContainer reload solr.xml
96 // This is particularly needed for when activate.pl is executed during
97 // a running GS3 server. At that point, the solr collection is reactivated and
98 // we need to tell Greenstone that the solr index has changed. This requires
99 // the CoreContainer to reload the solr.xml file, and it all works again.
100
101 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
102
103 // Reload the updated solr.xml into the CoreContainer
104 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
105 try {
106 String solr_home_str = all_solr_cores.getSolrHome();
107 File solr_home = new File(solr_home_str);
108 File solr_xml = new File( solr_home,"solr.xml" );
109
110 all_solr_cores.load(solr_home_str,solr_xml);
111
112 } catch (Exception e) {
113 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
114 e.printStackTrace();
115 return false;
116 }
117
118 if(!success) {
119 return false;
120 }
121
122 // 2. Setting up facets
123 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
124 NodeList facet_list = info.getElementsByTagName("facet");
125 for (int i=0; i<facet_list.getLength(); i++) {
126 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
127 }
128 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
129
130 // ArrayList<String> chosenFacets = new ArrayList<String>();
131 // for (int i = 0; i < configIndexElems.getLength(); i++)
132 // {
133 // Element current = (Element) configIndexElems.item(i);
134 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
135 // {
136 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
137 // }
138 // }
139
140 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
141 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
142
143 // for (int j = 0; j < buildIndexElems.getLength(); j++)
144 // {
145 // Element current = (Element) buildIndexElems.item(j);
146 // for (int i = 0; i < chosenFacets.size(); i++)
147 // {
148 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
149 // {
150 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
151 // }
152 // }
153 // }
154
155 return true;
156 }
157
158 public void cleanUp()
159 {
160 super.cleanUp();
161 this.solr_src.cleanUp();
162
163 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
164 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
165 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
166 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
167 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
168
169 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
170 solr_core_cache.clear();
171
172 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
173 String collection_core_name_prefix = getCollectionCoreNamePrefix();
174
175 Collection<String> coreNames = all_solr_cores.getCoreNames();
176 if(!coreNames.isEmpty()) {
177 Iterator<String> coreIterator = coreNames.iterator();
178 while(coreIterator.hasNext()) {
179
180 String solrCoreName = coreIterator.next();
181 if(solrCoreName.startsWith(collection_core_name_prefix)) {
182
183 logger.error("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
184
185 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
186 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
187 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
188 while(!solr_core.isClosed()) {
189 logger.error("@@@@@@ " + solrCoreName + " was not closed. Closing....");
190 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
191 }
192 if(solr_core.isClosed()) {
193 logger.error("@@@@@@ " + solrCoreName + " is closed.");
194 }
195 solr_core = null;
196 }
197 }
198 }
199
200 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
201 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
202 // is being deactivated
203 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
204 if(coreNamesRemaining.isEmpty()) {
205 logger.error("**** CoreContainer contains 0 solrCores. Shutting down...");
206
207 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
208 all_solr_cores = null;
209 }
210 else { // else part is just for debugging
211 Iterator coreIterator = coreNamesRemaining.iterator();
212 while(coreIterator.hasNext()) {
213 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
214 }
215 }
216 }
217
218 /** methods to handle actually doing the query */
219
220 /** do any initialisation of the query object */
221 protected boolean setUpQueryer(HashMap params)
222 {
223 this.solr_src.clearFacets();
224 this.solr_src.clearFacetQueries();
225
226 for (int i = 0; i < _facets.size(); i++)
227 {
228 this.solr_src.addFacet(_facets.get(i));
229 }
230
231 String index = "didx";
232 String physical_index_language_name = null;
233 String physical_sub_index_name = null;
234 int maxdocs = 100;
235 int hits_per_page = 20;
236 int start_page = 1;
237 // set up the query params
238 Set entries = params.entrySet();
239 Iterator i = entries.iterator();
240 while (i.hasNext())
241 {
242 Map.Entry m = (Map.Entry) i.next();
243 String name = (String) m.getKey();
244 String value = (String) m.getValue();
245
246 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
247 {
248 maxdocs = Integer.parseInt(value);
249 }
250 else if (name.equals(HITS_PER_PAGE_PARAM))
251 {
252 hits_per_page = Integer.parseInt(value);
253 }
254 else if (name.equals(START_PAGE_PARAM))
255 {
256 start_page = Integer.parseInt(value);
257 }
258 else if (name.equals(MATCH_PARAM))
259 {
260 if (value.equals(MATCH_PARAM_ALL))
261 {
262 this.solr_src.setDefaultConjunctionOperator("AND");
263 }
264 else
265 {
266 this.solr_src.setDefaultConjunctionOperator("OR");
267 }
268 }
269 else if (name.equals(RANK_PARAM))
270 {
271 if (value.equals(RANK_PARAM_RANK_VALUE))
272 {
273 value = null;
274 }
275 this.solr_src.setSortField(value);
276 }
277 else if (name.equals(LEVEL_PARAM))
278 {
279 if (value.toUpperCase().equals("SEC"))
280 {
281 index = "sidx";
282 }
283 else
284 {
285 index = "didx";
286 }
287 }
288 else if (name.equals("facets") && value.length() > 0)
289 {
290 String[] facets = value.split(",");
291
292 for (String facet : facets)
293 {
294 this.solr_src.addFacet(facet);
295 }
296 }
297 else if (name.equals("facetQueries") && value.length() > 0)
298 {
299 this.solr_src.addFacetQuery(value);
300 }
301 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
302 {
303 physical_sub_index_name = value;
304 }
305 else if (name.equals(INDEX_LANGUAGE_PARAM))
306 {
307 physical_index_language_name = value;
308 } // ignore any others
309 }
310 // set up start and end results if necessary
311 int start_results = 1;
312 if (start_page != 1)
313 {
314 start_results = ((start_page - 1) * hits_per_page) + 1;
315 }
316 int end_results = hits_per_page * start_page;
317 this.solr_src.setStartResults(start_results);
318 this.solr_src.setEndResults(end_results);
319 this.solr_src.setMaxDocs(maxdocs);
320
321 if (index.equals("sidx") || index.equals("didx"))
322 {
323 if (physical_sub_index_name != null)
324 {
325 index += physical_sub_index_name;
326 }
327 if (physical_index_language_name != null)
328 {
329 index += physical_index_language_name;
330 }
331 }
332
333 // now we know the index level, we can dig out the required
334 // solr-core, (caching the result in 'solr_core_cache')
335 String core_name = getCollectionCoreNamePrefix() + "-" + index;
336
337 EmbeddedSolrServer solr_core = null;
338
339 if (!solr_core_cache.containsKey(core_name))
340 {
341 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
342
343 solr_core_cache.put(core_name, solr_core);
344 }
345 else
346 {
347 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
348 }
349
350 this.solr_src.setSolrCore(solr_core);
351 this.solr_src.initialise();
352 return true;
353 }
354
355 /** do the query */
356 protected Object runQuery(String query)
357 {
358 try
359 {
360 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
361 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
362
363 return sqr;
364 }
365 catch (Exception e)
366 {
367 logger.error("Exception happened in run query: ", e);
368 }
369
370 return null;
371 }
372
373 /** get the total number of docs that match */
374 protected long numDocsMatched(Object query_result)
375 {
376 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
377
378 }
379
380 /** get the list of doc ids */
381 protected String[] getDocIDs(Object query_result)
382 {
383 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
384 String[] doc_nums = new String[docs.size()];
385 for (int d = 0; d < docs.size(); d++)
386 {
387 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
388 doc_nums[d] = doc_num;
389 }
390 return doc_nums;
391 }
392
393 /** get the list of doc ranks */
394 protected String[] getDocRanks(Object query_result)
395 {
396 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
397 String[] doc_ranks = new String[docs.size()];
398 for (int d = 0; d < docs.size(); d++)
399 {
400 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
401 }
402 return doc_ranks;
403 }
404
405 /** add in term info if available */
406 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
407 {
408 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
409
410 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
411 for (int t = 0; t < terms.size(); t++)
412 {
413 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
414
415 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
416 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
417 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
418 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
419 term_elem.setAttribute(FIELD_ATT, term_info.field_);
420 term_list.appendChild(term_elem);
421 }
422
423 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
424 for (int t = 0; t < stopwords.size(); t++)
425 {
426 String stopword = (String) stopwords.get(t);
427
428 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM);
429 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
430 term_list.appendChild(stopword_elem);
431 }
432
433 return true;
434 }
435
436 protected ArrayList<FacetWrapper> getFacets(Object query_result)
437 {
438 if (!(query_result instanceof SolrQueryResult))
439 {
440 return null;
441 }
442
443 SolrQueryResult result = (SolrQueryResult) query_result;
444 List<FacetField> facets = result.getFacetResults();
445
446 if (facets == null)
447 {
448 return null;
449 }
450
451 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
452
453 for (FacetField facet : facets)
454 {
455 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
456 String name = wrap.getName();
457 String display_name = "Poo";
458 //wrap.setDisplayName(display_name);
459
460 newFacetList.add(wrap);
461 }
462
463 return newFacetList;
464 }
465
466
467 protected String getCollectionCoreNamePrefix() {
468 String site_name = this.router.getSiteName();
469 String coll_name = this.cluster_name;
470 String collection_core_name_prefix = site_name + "-" + coll_name;
471 return collection_core_name_prefix;
472 }
473}
Note: See TracBrowser for help on using the repository browser.