source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 29228

Last change on this file since 29228 was 29228, checked in by ak19, 10 years ago

Belongs with commit revision 29217. Forgot to commit then: setting the collection prefix so that solrcores matching the collection are the ones used to obtain terms of a query from.

  • Property svn:executable set to *
File size: 15.3 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Document;
46import org.w3c.dom.Element;
47import org.w3c.dom.NodeList;
48
49public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
50{
51 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
52
53 static protected CoreContainer all_solr_cores = null;
54
55 protected HashMap solr_core_cache;
56 protected SolrQueryWrapper solr_src = null;
57
58 protected ArrayList<String> _facets = new ArrayList<String>();
59
60 public GS2SolrSearch()
61 {
62 does_faceting = true;
63 // Used to store the solr cores that match the required 'level'
64 // of search (e.g. either document-level=>didx, or
65 // section-level=>sidx. The hashmap is filled out on demand
66 // based on 'level' parameter passed in to 'setUpQueryer()'
67
68 solr_core_cache = new HashMap();
69
70 if (all_solr_cores == null)
71 {
72 // Share one CoreContainer across all sites/collections
73 try
74 {
75 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
76 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
77
78 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
79
80 all_solr_cores = new CoreContainer(solr_home_str);
81 }
82 catch (Exception e)
83 {
84 e.printStackTrace();
85 }
86 }
87
88 this.solr_src = new SolrQueryWrapper();
89 }
90
91 /** configure this service */
92 public boolean configure(Element info, Element extra_info)
93 {
94 boolean success = super.configure(info, extra_info);
95
96 // 1. Make the CoreContainer reload solr.xml
97 // This is particularly needed for when activate.pl is executed during
98 // a running GS3 server. At that point, the solr collection is reactivated and
99 // we need to tell Greenstone that the solr index has changed. This requires
100 // the CoreContainer to reload the solr.xml file, and it all works again.
101
102 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
103
104 // Reload the updated solr.xml into the CoreContainer
105 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
106 try {
107 String solr_home_str = all_solr_cores.getSolrHome();
108 File solr_home = new File(solr_home_str);
109 File solr_xml = new File( solr_home,"solr.xml" );
110
111 //all_solr_cores.load(solr_home_str,solr_xml);
112 all_solr_cores.load();
113
114 } catch (Exception e) {
115 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
116 e.printStackTrace();
117 return false;
118 }
119
120 if(!success) {
121 return false;
122 }
123
124 // 2. Setting up facets
125 // TODO - get these from build config, in case some haven't built
126 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
127 NodeList facet_list = info.getElementsByTagName("facet");
128 for (int i=0; i<facet_list.getLength(); i++) {
129 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
130 }
131 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
132
133 // ArrayList<String> chosenFacets = new ArrayList<String>();
134 // for (int i = 0; i < configIndexElems.getLength(); i++)
135 // {
136 // Element current = (Element) configIndexElems.item(i);
137 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
138 // {
139 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
140 // }
141 // }
142
143 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
144 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
145
146 // for (int j = 0; j < buildIndexElems.getLength(); j++)
147 // {
148 // Element current = (Element) buildIndexElems.item(j);
149 // for (int i = 0; i < chosenFacets.size(); i++)
150 // {
151 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
152 // {
153 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
154 // }
155 // }
156 // }
157
158 return true;
159 }
160
161 public void cleanUp()
162 {
163 super.cleanUp();
164 this.solr_src.cleanUp();
165
166 // When cleaning up, not only do we need to empty the solr_core_cache map, but we also need to remove all
167 // references to this collection's sorlcores in the CoreContainer object, which can be more SolrCores than
168 // the EmbeddedSolrServers instantiated and added to the solr_core_cache, since the cache does lazy loading
169 // while the CoreContainer contains all the cores defined in solr.xml, which includes all *possible* cores
170 // for this collection even if EmbeddedSolrServers for these were not added to the solr_core_cache_map.
171
172 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
173 solr_core_cache.clear();
174
175 // 2. Remove all SolrCores in the CoreContainer (all_solr_cores) that are specific to this collection
176 String collection_core_name_prefix = getCollectionCoreNamePrefix();
177
178 if (all_solr_cores!=null) {
179 Collection<String> coreNames = all_solr_cores.getCoreNames();
180 if(!coreNames.isEmpty()) {
181 Iterator<String> coreIterator = coreNames.iterator();
182 while(coreIterator.hasNext()) {
183
184 String solrCoreName = coreIterator.next();
185 if(solrCoreName.startsWith(collection_core_name_prefix)) {
186
187 logger.info("**** Removing collection-specific core: " + solrCoreName + " from CoreContainer");
188
189 // CoreContainer.remove(String name): removes and returns registered core w/o decrementing it's reference count
190 // http://lucene.apache.org/solr/api/index.html?org/apache/solr/core/CoreContainer.html
191 SolrCore solr_core = all_solr_cores.remove(solrCoreName);
192 while(!solr_core.isClosed()) {
193 logger.warn("@@@@@@ " + solrCoreName + " was not closed. Closing....");
194 solr_core.close(); // http://lucene.apache.org/solr/api/org/apache/solr/core/SolrCore.html
195 }
196 if(solr_core.isClosed()) {
197 logger.info("@@@@@@ " + solrCoreName + " is closed.");
198 }
199 solr_core = null;
200 }
201 }
202 }
203 }
204
205 // 3. if there are no more solr cores in Greenstone, then all_solr_cores will be empty, null the CoreContainer
206 // All going well, this will happen when we're ant stopping the Greenstone server and the last Solr collection
207 // is being deactivated
208 if (all_solr_cores!=null) {
209 Collection<String> coreNamesRemaining = all_solr_cores.getCoreNames();
210 if(coreNamesRemaining.isEmpty()) {
211 logger.info("**** CoreContainer contains 0 solrCores. Shutting down...");
212
213 all_solr_cores.shutdown(); // wouldn't do anything anyway for 0 cores I think
214 all_solr_cores = null;
215 }
216 else { // else part is just for debugging
217 Iterator coreIterator = coreNamesRemaining.iterator();
218 while(coreIterator.hasNext()) {
219 logger.error("**** Core: " + coreIterator.next() + " still exists in CoreContainer");
220 }
221 }
222 }
223 }
224
225 /** methods to handle actually doing the query */
226
227 /** do any initialisation of the query object */
228 protected boolean setUpQueryer(HashMap params)
229 {
230 this.solr_src.clearFacets();
231 this.solr_src.clearFacetQueries();
232
233 for (int i = 0; i < _facets.size(); i++)
234 {
235 this.solr_src.addFacet(_facets.get(i));
236 }
237
238 String index = "didx";
239 String physical_index_language_name = null;
240 String physical_sub_index_name = null;
241 int maxdocs = 100;
242 int hits_per_page = 20;
243 int start_page = 1;
244 // set up the query params
245 Set entries = params.entrySet();
246 Iterator i = entries.iterator();
247 while (i.hasNext())
248 {
249 Map.Entry m = (Map.Entry) i.next();
250 String name = (String) m.getKey();
251 String value = (String) m.getValue();
252
253 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
254
255 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
256 {
257 maxdocs = Integer.parseInt(value);
258 }
259 else if (name.equals(HITS_PER_PAGE_PARAM))
260 {
261 hits_per_page = Integer.parseInt(value);
262 }
263 else if (name.equals(START_PAGE_PARAM))
264 {
265 start_page = Integer.parseInt(value);
266 }
267 else if (name.equals(MATCH_PARAM))
268 {
269 if (value.equals(MATCH_PARAM_ALL))
270 {
271 this.solr_src.setDefaultConjunctionOperator("AND");
272 }
273 else
274 {
275 this.solr_src.setDefaultConjunctionOperator("OR");
276 }
277 }
278 else if (name.equals(RANK_PARAM))
279 {
280 if (value.equals(RANK_PARAM_RANK))
281 {
282 value = SolrQueryWrapper.SORT_BY_RANK;
283 } else if (value.equals(RANK_PARAM_NONE)) {
284 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
285 }
286
287 this.solr_src.setSortField(value);
288 }
289 else if (name.equals(SORT_ORDER_PARAM)) {
290 if (value.equals(SORT_ORDER_DESCENDING)) {
291 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
292 } else {
293 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
294 }
295 }
296 else if (name.equals(LEVEL_PARAM))
297 {
298 if (value.toUpperCase().equals("SEC"))
299 {
300 index = "sidx";
301 }
302 else
303 {
304 index = "didx";
305 }
306 }
307 // Would facets ever come in through params???
308 else if (name.equals("facets") && value.length() > 0)
309 {
310 String[] facets = value.split(",");
311
312 for (String facet : facets)
313 {
314 this.solr_src.addFacet(facet);
315 }
316 }
317 else if (name.equals("facetQueries") && value.length() > 0)
318 {
319 this.solr_src.addFacetQuery(value);
320 }
321 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
322 {
323 physical_sub_index_name = value;
324 }
325 else if (name.equals(INDEX_LANGUAGE_PARAM))
326 {
327 physical_index_language_name = value;
328 } // ignore any others
329 }
330 // set up start and end results if necessary
331 int start_results = 1;
332 if (start_page != 1)
333 {
334 start_results = ((start_page - 1) * hits_per_page) + 1;
335 }
336 int end_results = hits_per_page * start_page;
337 this.solr_src.setStartResults(start_results);
338 this.solr_src.setEndResults(end_results);
339 this.solr_src.setMaxDocs(maxdocs);
340
341 if (index.equals("sidx") || index.equals("didx"))
342 {
343 if (physical_sub_index_name != null)
344 {
345 index += physical_sub_index_name;
346 }
347 if (physical_index_language_name != null)
348 {
349 index += physical_index_language_name;
350 }
351 }
352
353 // now we know the index level, we can dig out the required
354 // solr-core, (caching the result in 'solr_core_cache')
355 String core_name = getCollectionCoreNamePrefix() + "-" + index;
356
357 EmbeddedSolrServer solr_core = null;
358
359 if (!solr_core_cache.containsKey(core_name))
360 {
361 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
362
363 solr_core_cache.put(core_name, solr_core);
364 }
365 else
366 {
367 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
368 }
369
370 this.solr_src.setSolrCore(solr_core);
371 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
372 this.solr_src.initialise();
373 return true;
374 }
375
376 /** do the query */
377 protected Object runQuery(String query)
378 {
379 try
380 {
381 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
382 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
383
384 return sqr;
385 }
386 catch (Exception e)
387 {
388 logger.error("Exception happened in run query: ", e);
389 }
390
391 return null;
392 }
393
394 /** get the total number of docs that match */
395 protected long numDocsMatched(Object query_result)
396 {
397 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
398
399 }
400
401 /** get the list of doc ids */
402 protected String[] getDocIDs(Object query_result)
403 {
404 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
405 String[] doc_nums = new String[docs.size()];
406 for (int d = 0; d < docs.size(); d++)
407 {
408 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
409 doc_nums[d] = doc_num;
410 }
411 return doc_nums;
412 }
413
414 /** get the list of doc ranks */
415 protected String[] getDocRanks(Object query_result)
416 {
417 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
418 String[] doc_ranks = new String[docs.size()];
419 for (int d = 0; d < docs.size(); d++)
420 {
421 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
422 }
423 return doc_ranks;
424 }
425
426 /** add in term info if available */
427 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
428 {
429 Document doc = term_list.getOwnerDocument();
430 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
431
432 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
433 for (int t = 0; t < terms.size(); t++)
434 {
435 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
436
437 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
438 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
439 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
440 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
441 term_elem.setAttribute(FIELD_ATT, term_info.field_);
442 term_list.appendChild(term_elem);
443 }
444
445 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
446 for (int t = 0; t < stopwords.size(); t++)
447 {
448 String stopword = (String) stopwords.get(t);
449
450 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
451 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
452 term_list.appendChild(stopword_elem);
453 }
454
455 return true;
456 }
457
458 protected ArrayList<FacetWrapper> getFacets(Object query_result)
459 {
460 if (!(query_result instanceof SolrQueryResult))
461 {
462 return null;
463 }
464
465 SolrQueryResult result = (SolrQueryResult) query_result;
466 List<FacetField> facets = result.getFacetResults();
467
468 if (facets == null)
469 {
470 return null;
471 }
472
473 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
474
475 for (FacetField facet : facets)
476 {
477 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
478 // String name = wrap.getName();
479 // String display_name = "Poo";
480 // wrap.setDisplayName(display_name);
481
482 newFacetList.add(wrap);
483 }
484
485 return newFacetList;
486 }
487
488
489 protected String getCollectionCoreNamePrefix() {
490 String site_name = this.router.getSiteName();
491 String coll_name = this.cluster_name;
492 String collection_core_name_prefix = site_name + "-" + coll_name;
493 return collection_core_name_prefix;
494 }
495}
Note: See TracBrowser for help on using the repository browser.