source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 30564

Last change on this file since 30564 was 30564, checked in by Georgiy Litvinov, 8 years ago

Load Solr cores on search service start.

  • Property svn:executable set to *
File size: 18.2 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21import java.io.File;
22import java.io.IOException;
23// Greenstone classes
24import java.util.ArrayList;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Properties;
30import java.util.Set;
31import java.util.Vector;
32
33import org.apache.log4j.Logger;
34import org.apache.solr.client.solrj.SolrServer;
35import org.apache.solr.client.solrj.SolrServerException;
36import org.apache.solr.client.solrj.impl.HttpSolrServer;
37import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
38import org.apache.solr.client.solrj.request.CoreAdminRequest;
39import org.apache.solr.client.solrj.response.CoreAdminResponse;
40import org.apache.solr.client.solrj.response.FacetField;
41import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
42import org.apache.solr.common.util.NamedList;
43import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
44import org.greenstone.gsdl3.util.FacetWrapper;
45import org.greenstone.gsdl3.util.GSFile;
46import org.greenstone.gsdl3.util.GSXML;
47import org.greenstone.gsdl3.util.SolrFacetWrapper;
48import org.greenstone.gsdl3.util.SolrQueryResult;
49import org.greenstone.gsdl3.util.SolrQueryWrapper;
50import org.greenstone.util.GlobalProperties;
51import org.w3c.dom.Document;
52import org.w3c.dom.Element;
53import org.w3c.dom.NodeList;
54
55import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
56import org.apache.solr.client.solrj.request.CoreAdminRequest;
57import org.apache.solr.client.solrj.response.CoreAdminResponse;
58import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
59import org.apache.solr.common.util.NamedList;
60
61public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
62{
63
64 public static final String SOLR_SERVLET_SUFFIX = "/solr";
65 protected static final String SORT_ORDER_PARAM = "sortOrder";
66 protected static final String SORT_ORDER_DESCENDING = "1";
67 protected static final String SORT_ORDER_ASCENDING = "0";
68
69 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
70
71 protected String solr_servlet_base_url;
72 protected HashMap<String, SolrServer> solr_core_cache;
73 protected SolrQueryWrapper solr_src = null;
74
75 protected ArrayList<String> _facets = new ArrayList<String>();
76
77 public GS2SolrSearch()
78 {
79 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
80 does_faceting = true;
81 does_highlight_snippets = true;
82 does_full_field_highlighting = true;
83 // Used to store the solr cores that match the required 'level'
84 // of search (e.g. either document-level=>didx, or
85 // section-level=>sidx. The hashmap is filled out on demand
86 // based on 'level' parameter passed in to 'setUpQueryer()'
87
88 solr_core_cache = new HashMap<String, SolrServer>();
89
90 this.solr_src = new SolrQueryWrapper();
91
92 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
93 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
94 try {
95 Properties globalProperties = new Properties();
96 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
97 String host = globalProperties.getProperty("tomcat.server", "localhost");
98 String port = globalProperties.getProperty("tomcat.port", "8383");
99 String protocol = globalProperties.getProperty("tomcat.protocol", "http");
100
101 String portStr = port.equals("80") ? "" : ":"+port;
102 solr_servlet_base_url = protocol+"://"+host+portStr+SOLR_SERVLET_SUFFIX;
103 } catch(Exception e) {
104 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
105 }
106 }
107
108 /** configure this service */
109 public boolean configure(Element info, Element extra_info)
110 {
111 boolean success = super.configure(info, extra_info);
112
113 // clear the map of solr cores for this collection added to the map upon querying
114 solr_core_cache.clear();
115
116 if(!success) {
117 return false;
118 }
119
120 // Setting up facets
121 // TODO - get these from build config, in case some haven't built
122 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
123 NodeList facet_list = info.getElementsByTagName("facet");
124 for (int i=0; i<facet_list.getLength(); i++) {
125 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
126 }
127
128 //If use Solr check if cores loaded
129 if (!loadSolrCores()) {
130 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", "
131 + "Couldn't activate Solr cores");
132 return false;
133 }
134 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
135
136 // ArrayList<String> chosenFacets = new ArrayList<String>();
137 // for (int i = 0; i < configIndexElems.getLength(); i++)
138 // {
139 // Element current = (Element) configIndexElems.item(i);
140 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
141 // {
142 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
143 // }
144 // }
145
146 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
147 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
148
149 // for (int j = 0; j < buildIndexElems.getLength(); j++)
150 // {
151 // Element current = (Element) buildIndexElems.item(j);
152 // for (int i = 0; i < chosenFacets.size(); i++)
153 // {
154 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
155 // {
156 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
157 // }
158 // }
159 // }
160
161 return true;
162 }
163
164 public void cleanUp()
165 {
166 super.cleanUp();
167 this.solr_src.cleanUp();
168
169 // clear the map keeping track of the SolrServers in this collection
170 solr_core_cache.clear();
171 }
172
173 /** add in the SOLR specific params to TextQuery */
174 protected void addCustomQueryParams(Element param_list, String lang)
175 {
176 super.addCustomQueryParams(param_list, lang);
177 /** Add in the sort order asc/desc param */
178 createParameter(SORT_ORDER_PARAM, param_list, lang);
179 }
180 /** add in SOLR specific params for AdvancedFieldQuery */
181 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
182 {
183 super.addCustomQueryParamsAdvField(param_list, lang);
184 createParameter(SORT_ORDER_PARAM, param_list, lang);
185
186 }
187 /** create a param and add to the list */
188 protected void createParameter(String name, Element param_list, String lang)
189 {
190 Document doc = param_list.getOwnerDocument();
191 Element param = null;
192 String param_default = paramDefaults.get(name);
193 if (name.equals(SORT_ORDER_PARAM)) {
194 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
195 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
196
197 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
198 }
199
200 if (param != null)
201 {
202 param_list.appendChild(param);
203 }
204 else
205 {
206 super.createParameter(name, param_list, lang);
207 }
208
209 }
210
211 /** methods to handle actually doing the query */
212
213 /** do any initialisation of the query object */
214 protected boolean setUpQueryer(HashMap params)
215 {
216 this.solr_src.clearFacets();
217 this.solr_src.clearFacetQueries();
218
219 for (int i = 0; i < _facets.size(); i++)
220 {
221 this.solr_src.addFacet(_facets.get(i));
222 }
223
224 String index = "didx";
225 if (this.default_level.toUpperCase().equals("SEC")) {
226 index = "sidx";
227 }
228 String physical_index_language_name = null;
229 String physical_sub_index_name = null;
230 int maxdocs = 100;
231 int hits_per_page = 20;
232 int start_page = 1;
233 // set up the query params
234 Set entries = params.entrySet();
235 Iterator i = entries.iterator();
236 while (i.hasNext())
237 {
238 Map.Entry m = (Map.Entry) i.next();
239 String name = (String) m.getKey();
240 String value = (String) m.getValue();
241
242 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
243
244 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
245 {
246 maxdocs = Integer.parseInt(value);
247 }
248 else if (name.equals(HITS_PER_PAGE_PARAM))
249 {
250 hits_per_page = Integer.parseInt(value);
251 }
252 else if (name.equals(START_PAGE_PARAM))
253 {
254 start_page = Integer.parseInt(value);
255 }
256 else if (name.equals(MATCH_PARAM))
257 {
258 if (value.equals(MATCH_PARAM_ALL))
259 {
260 this.solr_src.setDefaultConjunctionOperator("AND");
261 }
262 else
263 {
264 this.solr_src.setDefaultConjunctionOperator("OR");
265 }
266 }
267 else if (name.equals(RANK_PARAM))
268 {
269 if (value.equals(RANK_PARAM_RANK))
270 {
271 value = SolrQueryWrapper.SORT_BY_RANK;
272 } else if (value.equals(RANK_PARAM_NONE)) {
273 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
274 }
275
276 this.solr_src.setSortField(value);
277 }
278 else if (name.equals(SORT_ORDER_PARAM)) {
279 if (value.equals(SORT_ORDER_DESCENDING)) {
280 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
281 } else {
282 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
283 }
284 }
285 else if (name.equals(LEVEL_PARAM))
286 {
287 if (value.toUpperCase().equals("SEC"))
288 {
289 index = "sidx";
290 }
291 else
292 {
293 index = "didx";
294 }
295 }
296 // Would facets ever come in through params???
297 else if (name.equals("facets") && value.length() > 0)
298 {
299 String[] facets = value.split(",");
300
301 for (String facet : facets)
302 {
303 this.solr_src.addFacet(facet);
304 }
305 }
306 else if (name.equals("facetQueries") && value.length() > 0)
307 {
308 this.solr_src.addFacetQuery(value);
309 }
310 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
311 {
312 physical_sub_index_name = value;
313 }
314 else if (name.equals(INDEX_LANGUAGE_PARAM))
315 {
316 physical_index_language_name = value;
317 } // ignore any others
318 }
319 // set up start and end results if necessary
320 int start_results = 0;
321 if (start_page != 1)
322 {
323 start_results = ((start_page - 1) * hits_per_page) ;
324 }
325 int end_results = hits_per_page * start_page;
326 this.solr_src.setStartResults(start_results);
327 this.solr_src.setEndResults(end_results);
328 this.solr_src.setMaxDocs(maxdocs);
329
330 if (index.equals("sidx") || index.equals("didx"))
331 {
332 if (physical_sub_index_name != null)
333 {
334 index += physical_sub_index_name;
335 }
336 if (physical_index_language_name != null)
337 {
338 index += physical_index_language_name;
339 }
340 }
341
342 // now we know the index level, we can dig out the required
343 // solr-core, (caching the result in 'solr_core_cache')
344 String core_name = getCollectionCoreNamePrefix() + "-" + index;
345
346 SolrServer solr_core = null;
347 //CHECK HERE
348 if (!solr_core_cache.containsKey(core_name))
349 {
350 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
351 solr_core_cache.put(core_name, solr_core);
352 }
353 else
354 {
355 solr_core = solr_core_cache.get(core_name);
356 }
357
358 this.solr_src.setSolrCore(solr_core);
359 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
360 this.solr_src.initialise();
361 return true;
362 }
363
364 /** do the query */
365 protected Object runQuery(String query)
366 {
367 try
368 {
369 //if it is a Highlighting Query - execute it
370 this.solr_src.setHighlightField(indexField);
371 if(hldocOID != null)
372 {
373 String rslt = this.solr_src.runHighlightingQuery(query,hldocOID);
374 // Check result
375 if (rslt != null)
376 {
377 return rslt;
378 }
379 //Highlighting request failed. Do standard request.
380 hldocOID = null;
381 }
382 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
383
384 return sqr;
385 }
386 catch (Exception e)
387 {
388 logger.error("Exception happened in run query: ", e);
389 }
390
391 return null;
392 }
393
394
395 /** get the total number of docs that match */
396 protected long numDocsMatched(Object query_result)
397 {
398 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
399
400 }
401
402 /** get the list of doc ids */
403 protected String[] getDocIDs(Object query_result)
404 {
405 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
406 String[] doc_nums = new String[docs.size()];
407 for (int d = 0; d < docs.size(); d++)
408 {
409 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
410 doc_nums[d] = doc_num;
411 }
412 return doc_nums;
413 }
414
415 /** get the list of doc ranks */
416 protected String[] getDocRanks(Object query_result)
417 {
418 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
419 String[] doc_ranks = new String[docs.size()];
420 for (int d = 0; d < docs.size(); d++)
421 {
422 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
423 }
424 return doc_ranks;
425 }
426
427 /** add in term info if available */
428 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
429 {
430 Document doc = term_list.getOwnerDocument();
431 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
432
433 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
434 for (int t = 0; t < terms.size(); t++)
435 {
436 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
437
438 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
439 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
440 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
441 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
442 term_elem.setAttribute(FIELD_ATT, term_info.field_);
443 term_list.appendChild(term_elem);
444 }
445
446 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
447 for (int t = 0; t < stopwords.size(); t++)
448 {
449 String stopword = (String) stopwords.get(t);
450
451 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
452 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
453 term_list.appendChild(stopword_elem);
454 }
455
456 return true;
457 }
458
459 protected ArrayList<FacetWrapper> getFacets(Object query_result)
460 {
461 if (!(query_result instanceof SolrQueryResult))
462 {
463 return null;
464 }
465
466 SolrQueryResult result = (SolrQueryResult) query_result;
467 List<FacetField> facets = result.getFacetResults();
468
469 if (facets == null)
470 {
471 return null;
472 }
473
474 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
475
476 for (FacetField facet : facets)
477 {
478 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
479 // String name = wrap.getName();
480 // String display_name = "Poo";
481 // wrap.setDisplayName(display_name);
482
483 newFacetList.add(wrap);
484 }
485
486 return newFacetList;
487 }
488 @Override
489 protected Map<String, Map<String, List<String>>> getHighlightSnippets(Object query_result)
490 {
491 if (!(query_result instanceof SolrQueryResult))
492 {
493 return null;
494 }
495
496 SolrQueryResult result = (SolrQueryResult) query_result;
497
498 return result.getHighlightResults();
499 }
500
501
502 protected String getCollectionCoreNamePrefix() {
503 String site_name = this.router.getSiteName();
504 String coll_name = this.cluster_name;
505 String collection_core_name_prefix = site_name + "-" + coll_name;
506 return collection_core_name_prefix;
507 }
508
509 private boolean loadSolrCores() {
510
511 HttpSolrServer solrServer = new HttpSolrServer(solr_servlet_base_url);
512 // Max retries
513 solrServer.setMaxRetries(1);
514 // Connection Timeout
515 solrServer.setConnectionTimeout(3000);
516 //Cores
517 String coreSecName = getCollectionCoreNamePrefix() + "-sidx";
518 String coreDocName = getCollectionCoreNamePrefix() + "-didx";
519
520
521 if (!checkSolrCore(coreSecName, solrServer)){
522 if (!activateSolrCore(coreSecName, solrServer)){
523 logger.error("Couldn't activate Solr core " + coreSecName + " for collection " + cluster_name);
524 return false;
525 }
526 }
527 if (!checkSolrCore(coreDocName, solrServer)){
528 if (!activateSolrCore(coreDocName, solrServer)){
529 logger.error("Couldn't activate Solr core " + coreDocName + " for collection " + cluster_name);
530 return false;
531 }
532 }
533 return true;
534 }
535
536 private boolean checkSolrCore(String coreName, HttpSolrServer solrServer) {
537 CoreAdminRequest adminRequest = new CoreAdminRequest();
538 adminRequest.setAction(CoreAdminAction.STATUS);
539 adminRequest.setCoreName(coreName);
540
541 try {
542 CoreAdminResponse adminResponse = adminRequest.process(solrServer);
543 NamedList<NamedList<Object>> coreStatus = adminResponse.getCoreStatus();
544 NamedList<Object> coreList = coreStatus.getVal(0);
545 if (coreList != null) {
546 if (coreList.get("name") == null) {
547 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " not exists.");
548 return false;
549 }
550 }
551
552 } catch (SolrServerException e) {
553 e.printStackTrace();
554 return false;
555 } catch (IOException e) {
556 e.printStackTrace();
557 return false;
558 } catch (RemoteSolrException e1){
559 logger.error("Check solr core " + coreName + " for collection " + cluster_name + " failed.");
560 e1.printStackTrace();
561 return false;
562 }
563 return true;
564 }
565
566 private boolean activateSolrCore(String coreName, HttpSolrServer solrServer) {
567 String dataDir = GSFile.collectionIndexDir(site_home, cluster_name) + File.separator + coreName.substring(coreName.length() - 4);
568 String instanceDir = GSFile.collectionEtcDir(site_home, cluster_name);
569
570 try {
571 CoreAdminRequest.createCore(coreName, instanceDir, solrServer, "", "", dataDir, "");
572 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " activated.");
573 } catch (SolrServerException e1) {
574 e1.printStackTrace();
575 return false;
576 } catch (IOException e1) {
577 e1.printStackTrace();
578 return false;
579 } catch (RemoteSolrException e1){
580 logger.error("Activation solr core " + coreName + " for collection " + cluster_name + " failed.");
581 e1.printStackTrace();
582 return false;
583 }
584
585 return true;
586 }
587
588}
Note: See TracBrowser for help on using the repository browser.