source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 32419

Last change on this file since 32419 was 32419, checked in by ak19, 6 years ago

ProtocolPortProperties.java constructors can throw an Exception now.

  • Property svn:executable set to *
File size: 19.9 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21import java.io.File;
22import java.io.IOException;
23// Greenstone classes
24import java.util.ArrayList;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Properties;
30import java.util.Set;
31import java.util.Vector;
32
33import org.apache.log4j.Logger;
34import org.apache.solr.client.solrj.SolrServer;
35import org.apache.solr.client.solrj.SolrServerException;
36import org.apache.solr.client.solrj.impl.HttpSolrServer;
37import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
38import org.apache.solr.client.solrj.request.CoreAdminRequest;
39import org.apache.solr.client.solrj.response.CoreAdminResponse;
40import org.apache.solr.client.solrj.response.FacetField;
41import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
42import org.apache.solr.common.util.NamedList;
43import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
44import org.greenstone.gsdl3.util.FacetWrapper;
45import org.greenstone.gsdl3.util.GSFile;
46import org.greenstone.gsdl3.util.GSXML;
47import org.greenstone.gsdl3.util.SolrFacetWrapper;
48import org.greenstone.gsdl3.util.SolrQueryResult;
49import org.greenstone.gsdl3.util.SolrQueryWrapper;
50import org.greenstone.util.GlobalProperties;
51import org.greenstone.util.ProtocolPortProperties;
52import org.w3c.dom.Document;
53import org.w3c.dom.Element;
54import org.w3c.dom.NodeList;
55
56import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
57import org.apache.solr.client.solrj.request.CoreAdminRequest;
58import org.apache.solr.client.solrj.response.CoreAdminResponse;
59import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
60import org.apache.solr.common.util.NamedList;
61
62public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
63{
64
65 protected static final String SORT_ORDER_PARAM = "sortOrder";
66 protected static final String SORT_ORDER_DESCENDING = "1";
67 protected static final String SORT_ORDER_ASCENDING = "0";
68
69 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
70
71 protected String solr_servlet_base_url;
72 protected HashMap<String, SolrServer> solr_core_cache;
73 protected SolrQueryWrapper solr_src = null;
74
75 protected ArrayList<String> _facets = new ArrayList<String>();
76 protected HashMap<String, Element> _facet_display_names = new HashMap<String, Element>();
77
78 public GS2SolrSearch()
79 {
80 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
81 does_faceting = true;
82 does_highlight_snippets = true;
83 does_full_field_highlighting = true;
84 // Used to store the solr cores that match the required 'level'
85 // of search (e.g. either document-level=>didx, or
86 // section-level=>sidx. The hashmap is filled out on demand
87 // based on 'level' parameter passed in to 'setUpQueryer()'
88
89 solr_core_cache = new HashMap<String, SolrServer>();
90
91 this.solr_src = new SolrQueryWrapper();
92
93 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
94 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
95 try {
96 Properties globalProperties = new Properties();
97 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
98 String host = globalProperties.getProperty("tomcat.server", "localhost");
99 //String port = globalProperties.getProperty("tomcat.port.http", "8383");
100 //String protocol = globalProperties.getProperty("server.protocol", "http");
101 ProtocolPortProperties protocolPortProps = new ProtocolPortProperties(globalProperties); // can throw Exception
102
103 String protocol = protocolPortProps.getProtocol();
104 String port = protocolPortProps.getPort();
105 String solrContext = globalProperties.getProperty("solr.context", "solr");
106
107 String portStr = port.equals("80") ? "" : ":"+port;
108 solr_servlet_base_url = protocol+"://"+host+portStr+"/"+solrContext;
109 } catch(Exception e) {
110 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
111 }
112 }
113
114 /** configure this service */
115 public boolean configure(Element info, Element extra_info)
116 {
117 boolean success = super.configure(info, extra_info);
118
119 // clear the map of solr cores for this collection added to the map upon querying
120 solr_core_cache.clear();
121
122 if(!success) {
123 return false;
124 }
125
126 if(solr_servlet_base_url == null) {
127 logger.error("Unable to configure GS2SolrSearch - solr_servlet_base_url is null because of issues with port/protocol in global.properties");
128 return false;
129 }
130
131 // Setting up facets
132
133 // the search element from collectionConfig
134 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
135
136 Document owner = info.getOwnerDocument();
137 // for each facet in buildConfig
138 NodeList facet_list = info.getElementsByTagName("facet");
139 for (int i=0; i<facet_list.getLength(); i++) {
140 Element facet = (Element)facet_list.item(i);
141 String shortname = facet.getAttribute(GSXML.SHORTNAME_ATT);
142 _facets.add(shortname);
143
144 // now add any displayItems into the facet element
145 // (which is stored as part of info), then we can add to
146 // the result if needed
147 String longname = facet.getAttribute(GSXML.NAME_ATT);
148 Element config_facet = GSXML.getNamedElement(searchElem, "facet", GSXML.NAME_ATT, longname);
149 if (config_facet != null) {
150 NodeList display_items = config_facet.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
151 for (int j=0; j<display_items.getLength(); j++) {
152 Element e = (Element) display_items.item(j);
153 facet.appendChild(owner.importNode(e, true));
154 }
155 _facet_display_names.put(shortname, facet);
156
157 }
158
159 }
160
161 //If use Solr check if cores loaded
162 if (!loadSolrCores()) {
163 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", "
164 + "Couldn't activate Solr cores");
165 return false;
166 }
167 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
168
169 // ArrayList<String> chosenFacets = new ArrayList<String>();
170 // for (int i = 0; i < configIndexElems.getLength(); i++)
171 // {
172 // Element current = (Element) configIndexElems.item(i);
173 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
174 // {
175 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
176 // }
177 // }
178
179 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
180 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
181
182 // for (int j = 0; j < buildIndexElems.getLength(); j++)
183 // {
184 // Element current = (Element) buildIndexElems.item(j);
185 // for (int i = 0; i < chosenFacets.size(); i++)
186 // {
187 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
188 // {
189 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
190 // }
191 // }
192 // }
193
194 return true;
195 }
196
197 public void cleanUp()
198 {
199 super.cleanUp();
200 this.solr_src.cleanUp();
201
202 // clear the map keeping track of the SolrServers in this collection
203 solr_core_cache.clear();
204 }
205
206 /** add in the SOLR specific params to TextQuery */
207 protected void addCustomQueryParams(Element param_list, String lang)
208 {
209 super.addCustomQueryParams(param_list, lang);
210 /** Add in the sort order asc/desc param */
211 createParameter(SORT_ORDER_PARAM, param_list, lang);
212 }
213 /** add in SOLR specific params for AdvancedFieldQuery */
214 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
215 {
216 super.addCustomQueryParamsAdvField(param_list, lang);
217 createParameter(SORT_ORDER_PARAM, param_list, lang);
218
219 }
220 /** create a param and add to the list */
221 protected void createParameter(String name, Element param_list, String lang)
222 {
223 Document doc = param_list.getOwnerDocument();
224 Element param = null;
225 String param_default = paramDefaults.get(name);
226 if (name.equals(SORT_ORDER_PARAM)) {
227 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
228 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
229
230 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
231 }
232
233 if (param != null)
234 {
235 param_list.appendChild(param);
236 }
237 else
238 {
239 super.createParameter(name, param_list, lang);
240 }
241
242 }
243
244 /** methods to handle actually doing the query */
245
246 /** do any initialisation of the query object */
247 protected boolean setUpQueryer(HashMap params)
248 {
249 this.solr_src.clearFacets();
250 this.solr_src.clearFacetQueries();
251
252 for (int i = 0; i < _facets.size(); i++)
253 {
254 this.solr_src.addFacet(_facets.get(i));
255 }
256
257 String index = "didx";
258 if (this.default_level.toUpperCase().equals("SEC")) {
259 index = "sidx";
260 }
261 String physical_index_language_name = null;
262 String physical_sub_index_name = null;
263 String docFilter = null;
264 int maxdocs = 100;
265 int hits_per_page = 20;
266 int start_page = 1;
267 // set up the query params
268 Set entries = params.entrySet();
269 Iterator i = entries.iterator();
270 while (i.hasNext())
271 {
272 Map.Entry m = (Map.Entry) i.next();
273 String name = (String) m.getKey();
274 String value = (String) m.getValue();
275
276 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
277
278 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
279 {
280 maxdocs = Integer.parseInt(value);
281 }
282 else if (name.equals(HITS_PER_PAGE_PARAM))
283 {
284 hits_per_page = Integer.parseInt(value);
285 }
286 else if (name.equals(START_PAGE_PARAM))
287 {
288 start_page = Integer.parseInt(value);
289 }
290 else if (name.equals(MATCH_PARAM))
291 {
292 if (value.equals(MATCH_PARAM_ALL))
293 {
294 this.solr_src.setDefaultConjunctionOperator("AND");
295 }
296 else
297 {
298 this.solr_src.setDefaultConjunctionOperator("OR");
299 }
300 }
301 else if (name.equals(RANK_PARAM))
302 {
303 if (value.equals(RANK_PARAM_RANK))
304 {
305 value = SolrQueryWrapper.SORT_BY_RANK;
306 } else if (value.equals(RANK_PARAM_NONE)) {
307 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
308 }
309
310 this.solr_src.setSortField(value);
311 }
312 else if (name.equals(SORT_ORDER_PARAM)) {
313 if (value.equals(SORT_ORDER_DESCENDING)) {
314 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
315 } else {
316 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
317 }
318 }
319 else if (name.equals(LEVEL_PARAM))
320 {
321 if (value.toUpperCase().equals("SEC"))
322 {
323 index = "sidx";
324 }
325 else
326 {
327 index = "didx";
328 }
329 }
330 // Would facets ever come in through params???
331 else if (name.equals("facets") && value.length() > 0)
332 {
333 String[] facets = value.split(",");
334
335 for (String facet : facets)
336 {
337 this.solr_src.addFacet(facet);
338 }
339 }
340 else if (name.equals("facetQueries") && value.length() > 0)
341 {
342 //logger.info("@@@ SOLR FACET VALUE FOUND: " + value);
343 this.solr_src.addFacetQuery(value);
344 }
345 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
346 {
347 physical_sub_index_name = value;
348 }
349 else if (name.equals(INDEX_LANGUAGE_PARAM))
350 {
351 physical_index_language_name = value;
352 } // ignore any others
353 else if (name.equals("docFilter"))
354 {
355 docFilter = value;
356 docFilter = docFilter.replaceAll("[^A-Za-z0-9.]", "");
357 this.solr_src.setDocFilter(value);
358 }
359 }
360 // set up start and end results if necessary
361 int start_results = 0;
362 if (start_page != 1)
363 {
364 start_results = ((start_page - 1) * hits_per_page) ;
365 }
366 int end_results = hits_per_page * start_page;
367 this.solr_src.setStartResults(start_results);
368 this.solr_src.setEndResults(end_results);
369 this.solr_src.setMaxDocs(maxdocs);
370
371 if (index.equals("sidx") || index.equals("didx"))
372 {
373 if (physical_sub_index_name != null)
374 {
375 index += physical_sub_index_name;
376 }
377 if (physical_index_language_name != null)
378 {
379 index += physical_index_language_name;
380 }
381 }
382
383 // now we know the index level, we can dig out the required
384 // solr-core, (caching the result in 'solr_core_cache')
385 String core_name = getCollectionCoreNamePrefix() + "-" + index;
386
387 SolrServer solr_core = null;
388 //CHECK HERE
389 if (!solr_core_cache.containsKey(core_name))
390 {
391 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
392 solr_core_cache.put(core_name, solr_core);
393 }
394 else
395 {
396 solr_core = solr_core_cache.get(core_name);
397 }
398
399 this.solr_src.setSolrCore(solr_core);
400 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
401 this.solr_src.initialise();
402 return true;
403 }
404
405 /** do the query */
406 protected Object runQuery(String query)
407 {
408 try
409 {
410 //if it is a Highlighting Query - execute it
411 this.solr_src.setHighlightField(indexField);
412 if(hldocOID != null)
413 {
414 String rslt = this.solr_src.runHighlightingQuery(query,hldocOID);
415 // Check result
416 if (rslt != null)
417 {
418 return rslt;
419 }
420 //Highlighting request failed. Do standard request.
421 hldocOID = null;
422 }
423 //logger.info("@@@@ Query is now: " + query);
424 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
425
426 return sqr;
427 }
428 catch (Exception e)
429 {
430 logger.error("Exception happened in run query: ", e);
431 }
432
433 return null;
434 }
435
436
437 /** get the total number of docs that match */
438 protected long numDocsMatched(Object query_result)
439 {
440 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
441
442 }
443
444 /** get the list of doc ids */
445 protected String[] getDocIDs(Object query_result)
446 {
447 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
448 String[] doc_nums = new String[docs.size()];
449 for (int d = 0; d < docs.size(); d++)
450 {
451 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
452 doc_nums[d] = doc_num;
453 }
454 return doc_nums;
455 }
456
457 /** get the list of doc ranks */
458 protected String[] getDocRanks(Object query_result)
459 {
460 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
461 String[] doc_ranks = new String[docs.size()];
462 for (int d = 0; d < docs.size(); d++)
463 {
464 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
465 }
466 return doc_ranks;
467 }
468
469 /** add in term info if available */
470 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
471 {
472 Document doc = term_list.getOwnerDocument();
473 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
474
475 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
476 for (int t = 0; t < terms.size(); t++)
477 {
478 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
479
480 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
481 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
482 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
483 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
484 term_elem.setAttribute(FIELD_ATT, term_info.field_);
485 term_list.appendChild(term_elem);
486 }
487
488 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
489 for (int t = 0; t < stopwords.size(); t++)
490 {
491 String stopword = (String) stopwords.get(t);
492
493 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
494 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
495 term_list.appendChild(stopword_elem);
496 }
497
498 return true;
499 }
500
501 protected ArrayList<FacetWrapper> getFacets(Object query_result, String lang)
502 {
503 if (!(query_result instanceof SolrQueryResult))
504 {
505 return null;
506 }
507
508 SolrQueryResult result = (SolrQueryResult) query_result;
509 List<FacetField> facets = result.getFacetResults();
510
511 if (facets == null)
512 {
513 return null;
514 }
515
516 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
517
518 for (FacetField facet : facets)
519 {
520 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
521 String fname = wrap.getName();
522 String dname = getDisplayText(_facet_display_names.get(fname), GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
523 wrap.setDisplayName(dname);
524 newFacetList.add(wrap);
525 }
526
527 return newFacetList;
528 }
529 @Override
530 protected Map<String, Map<String, List<String>>> getHighlightSnippets(Object query_result)
531 {
532 if (!(query_result instanceof SolrQueryResult))
533 {
534 return null;
535 }
536
537 SolrQueryResult result = (SolrQueryResult) query_result;
538
539 return result.getHighlightResults();
540 }
541
542
543 protected String getCollectionCoreNamePrefix() {
544 String site_name = this.router.getSiteName();
545 String coll_name = this.cluster_name;
546 String collection_core_name_prefix = site_name + "-" + coll_name;
547 return collection_core_name_prefix;
548 }
549
550 private boolean loadSolrCores() {
551
552 HttpSolrServer solrServer = new HttpSolrServer(solr_servlet_base_url);
553 // Max retries
554 solrServer.setMaxRetries(1);
555 // Connection Timeout
556 solrServer.setConnectionTimeout(3000);
557 //Cores
558 String coreSecName = getCollectionCoreNamePrefix() + "-sidx";
559 String coreDocName = getCollectionCoreNamePrefix() + "-didx";
560
561
562 if (!checkSolrCore(coreSecName, solrServer)){
563 if (!activateSolrCore(coreSecName, solrServer)){
564 logger.error("Couldn't activate Solr core " + coreSecName + " for collection " + cluster_name);
565 return false;
566 }
567 }
568 if (!checkSolrCore(coreDocName, solrServer)){
569 if (!activateSolrCore(coreDocName, solrServer)){
570 logger.error("Couldn't activate Solr core " + coreDocName + " for collection " + cluster_name);
571 return false;
572 }
573 }
574 return true;
575 }
576
577 private boolean checkSolrCore(String coreName, HttpSolrServer solrServer) {
578 CoreAdminRequest adminRequest = new CoreAdminRequest();
579 adminRequest.setAction(CoreAdminAction.STATUS);
580 adminRequest.setCoreName(coreName);
581
582 try {
583 CoreAdminResponse adminResponse = adminRequest.process(solrServer);
584 NamedList<NamedList<Object>> coreStatus = adminResponse.getCoreStatus();
585 NamedList<Object> coreList = coreStatus.getVal(0);
586 if (coreList != null) {
587 if (coreList.get("name") == null) {
588 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " not exists.");
589 return false;
590 }
591 }
592
593 } catch (SolrServerException e) {
594 e.printStackTrace();
595 return false;
596 } catch (IOException e) {
597 e.printStackTrace();
598 return false;
599 } catch (RemoteSolrException e1){
600 logger.error("Check solr core " + coreName + " for collection " + cluster_name + " failed.");
601 e1.printStackTrace();
602 return false;
603 }
604 return true;
605 }
606
607 private boolean activateSolrCore(String coreName, HttpSolrServer solrServer) {
608 String dataDir = GSFile.collectionIndexDir(site_home, cluster_name) + File.separator + coreName.substring(coreName.length() - 4);
609 String instanceDir = GSFile.collectionEtcDir(site_home, cluster_name);
610
611 try {
612 CoreAdminRequest.createCore(coreName, instanceDir, solrServer, "", "", dataDir, "");
613 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " activated.");
614 } catch (SolrServerException e1) {
615 e1.printStackTrace();
616 return false;
617 } catch (IOException e1) {
618 e1.printStackTrace();
619 return false;
620 } catch (RemoteSolrException e1){
621 logger.error("Activation solr core " + coreName + " for collection " + cluster_name + " failed.");
622 e1.printStackTrace();
623 return false;
624 }
625
626 return true;
627 }
628
629}
Note: See TracBrowser for help on using the repository browser.