source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 29142

Last change on this file since 29142 was 29142, checked in by ak19, 10 years ago

Part of port from lucene3.3.0 to lucene4.7.2. Solr related. 1. Java and perl code changes for solr4.7.2; 2. Dr Bainbridge further fixed a bug in how the variable called running is set in solrserver.pm

  • Property svn:executable set to *
File size: 15.2 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Document;
46import org.w3c.dom.Element;
47import org.w3c.dom.NodeList;
48
49public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
50{
51 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
52
53 static protected CoreContainer all_solr_cores = null;
54
55 protected HashMap solr_core_cache;
56 protected SolrQueryWrapper solr_src = null;
57
58 protected ArrayList<String> _facets = new ArrayList<String>();
59
60 public GS2SolrSearch()
61 {
62 does_faceting = true;
63 // Used to store the solr cores that match the required 'level'
64 // of search (e.g. either document-level=>didx, or
65 // section-level=>sidx. The hashmap is filled out on demand
66 // based on 'level' parameter passed in to 'setUpQueryer()'
67
68 solr_core_cache = new HashMap();
69
70 if (all_solr_cores == null)
71 {
72 // Share one CoreContainer across all sites/collections
73 try
74 {
75 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
76 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
77
78 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
79
80 all_solr_cores = new CoreContainer(solr_home_str);
81 }
82 catch (Exception e)
83 {
84 e.printStackTrace();
85 }
86 }
87
88 this.solr_src = new SolrQueryWrapper();
89 }
90
91 /** configure this service */
92 public boolean configure(Element info, Element extra_info)
93 {
94 boolean success = super.configure(info, extra_info);
95
96 // 1. Make the CoreContainer reload solr.xml
97 // This is particularly needed for when activate.pl is executed during
98 // a running GS3 server. At that point, the solr collection is reactivated and
99 // we need to tell Greenstone that the solr index has changed. This requires
100 // the CoreContainer to reload the solr.xml file, and it all works again.
101
102 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
103
104 // Reload the updated solr.xml into the CoreContainer
105 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
106 try {
107 String solr_home_str = all_solr_cores.getSolrHome();
108 File solr_home = new File(solr_home_str);
109 File solr_xml = new File( solr_home,"solr.xml" );
110
111 //all_solr_cores.load(solr_home_str,solr_xml);
112 all_solr_cores.load();
113
114 } catch (Exception e) {
115 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
116 e.printStackTrace();
117 return false;
118 }
119
120 if(!success) {
121 return false;
122 }
123
124 // 2. Setting up facets
125 // TODO - get these from build config, in case some haven't built
126 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
127 NodeList facet_list = info.getElementsByTagName("facet");
128 for (int i=0; i<facet_list.getLength(); i++) {
129 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
130 }
131 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
132
133 // ArrayList<String> chosenFacets = new ArrayList<String>();
134 // for (int i = 0; i < configIndexElems.getLength(); i++)
135 // {
136 // Element current = (Element) configIndexElems.item(i);
137 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
138 // {
139 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
140 // }
141 // }
142
143 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
144 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
145
146 // for (int j = 0; j < buildIndexElems.getLength(); j++)
147 // {
148 // Element current = (Element) buildIndexElems.item(j);
149 // for (int i = 0; i < chosenFacets.size(); i++)
150 // {
151 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
152 // {
153 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
154 // }
155 // }
156 // }
157
158 return true;
159 }
160
161 public void cleanUp()
162 {
163 super.cleanUp();
164 this.solr_src.cleanUp();
165
166 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
167 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
168 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
169 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
170 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
171
172 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
173 solr_core_cache.clear();
174
175 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
176 String collection_core_name_prefix = getCollectionCoreNamePrefix();
177
178 if (all_solr_cores!=null) {
179 Collection<String> coreNames = all_solr_cores.getCoreNames();
180 if(!coreNames.isEmpty()) {
181 Iterator<String> coreIterator = coreNames.iterator();
182 while(coreIterator.hasNext()) {
183
184 String solrCoreName = coreIterator.next();
185 if(solrCoreName.startsWith(collection_core_name_prefix)) {
186
187 logger.info("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
188
189 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
190 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
191 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
192 while(!solr_core.isClosed()) {
193 logger.warn("@@@@@@ " + solrCoreName + " was not closed. Closing....");
194 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
195 }
196 if(solr_core.isClosed()) {
197 logger.info("@@@@@@ " + solrCoreName + " is closed.");
198 }
199 solr_core = null;
200 }
201 }
202 }
203 }
204
205 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
206 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
207 // is being deactivated
208 if (all_solr_cores!=null) {
209 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
210 if(coreNamesRemaining.isEmpty()) {
211 logger.info("**** CoreContainer contains 0 solrCores. Shutting down...");
212
213 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
214 all_solr_cores = null;
215 }
216 else { // else part is just for debugging
217 Iterator coreIterator = coreNamesRemaining.iterator();
218 while(coreIterator.hasNext()) {
219 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
220 }
221 }
222 }
223 }
224
225 /** methods to handle actually doing the query */
226
227 /** do any initialisation of the query object */
228 protected boolean setUpQueryer(HashMap params)
229 {
230 this.solr_src.clearFacets();
231 this.solr_src.clearFacetQueries();
232
233 for (int i = 0; i < _facets.size(); i++)
234 {
235 this.solr_src.addFacet(_facets.get(i));
236 }
237
238 String index = "didx";
239 String physical_index_language_name = null;
240 String physical_sub_index_name = null;
241 int maxdocs = 100;
242 int hits_per_page = 20;
243 int start_page = 1;
244 // set up the query params
245 Set entries = params.entrySet();
246 Iterator i = entries.iterator();
247 while (i.hasNext())
248 {
249 Map.Entry m = (Map.Entry) i.next();
250 String name = (String) m.getKey();
251 String value = (String) m.getValue();
252
253 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
254 {
255 maxdocs = Integer.parseInt(value);
256 }
257 else if (name.equals(HITS_PER_PAGE_PARAM))
258 {
259 hits_per_page = Integer.parseInt(value);
260 }
261 else if (name.equals(START_PAGE_PARAM))
262 {
263 start_page = Integer.parseInt(value);
264 }
265 else if (name.equals(MATCH_PARAM))
266 {
267 if (value.equals(MATCH_PARAM_ALL))
268 {
269 this.solr_src.setDefaultConjunctionOperator("AND");
270 }
271 else
272 {
273 this.solr_src.setDefaultConjunctionOperator("OR");
274 }
275 }
276 else if (name.equals(RANK_PARAM))
277 {
278 if (value.equals(RANK_PARAM_RANK))
279 {
280 value = SolrQueryWrapper.SORT_BY_RANK;
281 } else if (value.equals(RANK_PARAM_NONE)) {
282 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
283 }
284
285 this.solr_src.setSortField(value);
286 }
287 else if (name.equals(SORT_ORDER_PARAM)) {
288 if (value.equals(SORT_ORDER_DESCENDING)) {
289 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
290 } else {
291 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
292 }
293 }
294 else if (name.equals(LEVEL_PARAM))
295 {
296 if (value.toUpperCase().equals("SEC"))
297 {
298 index = "sidx";
299 }
300 else
301 {
302 index = "didx";
303 }
304 }
305 // Would facets ever come in through params???
306 else if (name.equals("facets") && value.length() > 0)
307 {
308 String[] facets = value.split(",");
309
310 for (String facet : facets)
311 {
312 this.solr_src.addFacet(facet);
313 }
314 }
315 else if (name.equals("facetQueries") && value.length() > 0)
316 {
317 this.solr_src.addFacetQuery(value);
318 }
319 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
320 {
321 physical_sub_index_name = value;
322 }
323 else if (name.equals(INDEX_LANGUAGE_PARAM))
324 {
325 physical_index_language_name = value;
326 } // ignore any others
327 }
328 // set up start and end results if necessary
329 int start_results = 1;
330 if (start_page != 1)
331 {
332 start_results = ((start_page - 1) * hits_per_page) + 1;
333 }
334 int end_results = hits_per_page * start_page;
335 this.solr_src.setStartResults(start_results);
336 this.solr_src.setEndResults(end_results);
337 this.solr_src.setMaxDocs(maxdocs);
338
339 if (index.equals("sidx") || index.equals("didx"))
340 {
341 if (physical_sub_index_name != null)
342 {
343 index += physical_sub_index_name;
344 }
345 if (physical_index_language_name != null)
346 {
347 index += physical_index_language_name;
348 }
349 }
350
351 // now we know the index level, we can dig out the required
352 // solr-core, (caching the result in 'solr_core_cache')
353 String core_name = getCollectionCoreNamePrefix() + "-" + index;
354
355 EmbeddedSolrServer solr_core = null;
356
357 if (!solr_core_cache.containsKey(core_name))
358 {
359 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
360
361 solr_core_cache.put(core_name, solr_core);
362 }
363 else
364 {
365 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
366 }
367
368 this.solr_src.setSolrCore(solr_core);
369 this.solr_src.initialise();
370 return true;
371 }
372
373 /** do the query */
374 protected Object runQuery(String query)
375 {
376 try
377 {
378 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
379 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
380
381 return sqr;
382 }
383 catch (Exception e)
384 {
385 logger.error("Exception happened in run query: ", e);
386 }
387
388 return null;
389 }
390
391 /** get the total number of docs that match */
392 protected long numDocsMatched(Object query_result)
393 {
394 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
395
396 }
397
398 /** get the list of doc ids */
399 protected String[] getDocIDs(Object query_result)
400 {
401 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
402 String[] doc_nums = new String[docs.size()];
403 for (int d = 0; d < docs.size(); d++)
404 {
405 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
406 doc_nums[d] = doc_num;
407 }
408 return doc_nums;
409 }
410
411 /** get the list of doc ranks */
412 protected String[] getDocRanks(Object query_result)
413 {
414 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
415 String[] doc_ranks = new String[docs.size()];
416 for (int d = 0; d < docs.size(); d++)
417 {
418 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
419 }
420 return doc_ranks;
421 }
422
423 /** add in term info if available */
424 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
425 {
426 Document doc = term_list.getOwnerDocument();
427 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
428
429 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
430 for (int t = 0; t < terms.size(); t++)
431 {
432 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
433
434 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
435 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
436 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
437 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
438 term_elem.setAttribute(FIELD_ATT, term_info.field_);
439 term_list.appendChild(term_elem);
440 }
441
442 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
443 for (int t = 0; t < stopwords.size(); t++)
444 {
445 String stopword = (String) stopwords.get(t);
446
447 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
448 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
449 term_list.appendChild(stopword_elem);
450 }
451
452 return true;
453 }
454
455 protected ArrayList<FacetWrapper> getFacets(Object query_result)
456 {
457 if (!(query_result instanceof SolrQueryResult))
458 {
459 return null;
460 }
461
462 SolrQueryResult result = (SolrQueryResult) query_result;
463 List<FacetField> facets = result.getFacetResults();
464
465 if (facets == null)
466 {
467 return null;
468 }
469
470 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
471
472 for (FacetField facet : facets)
473 {
474 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
475 // String name = wrap.getName();
476 // String display_name = "Poo";
477 // wrap.setDisplayName(display_name);
478
479 newFacetList.add(wrap);
480 }
481
482 return newFacetList;
483 }
484
485
486 protected String getCollectionCoreNamePrefix() {
487 String site_name = this.router.getSiteName();
488 String coll_name = this.cluster_name;
489 String collection_core_name_prefix = site_name + "-" + coll_name;
490 return collection_core_name_prefix;
491 }
492}
Note: See TracBrowser for help on using the repository browser.