source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 32410

Last change on this file since 32410 was 32410, checked in by ak19, 6 years ago

Incremental fix. In GS3's solr ext java code: global.properties also uses server.protocol not tomcat.protocol. Next fix will use ProtocolPortProperties

  • Property svn:executable set to *
File size: 19.4 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21import java.io.File;
22import java.io.IOException;
23// Greenstone classes
24import java.util.ArrayList;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Properties;
30import java.util.Set;
31import java.util.Vector;
32
33import org.apache.log4j.Logger;
34import org.apache.solr.client.solrj.SolrServer;
35import org.apache.solr.client.solrj.SolrServerException;
36import org.apache.solr.client.solrj.impl.HttpSolrServer;
37import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
38import org.apache.solr.client.solrj.request.CoreAdminRequest;
39import org.apache.solr.client.solrj.response.CoreAdminResponse;
40import org.apache.solr.client.solrj.response.FacetField;
41import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
42import org.apache.solr.common.util.NamedList;
43import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
44import org.greenstone.gsdl3.util.FacetWrapper;
45import org.greenstone.gsdl3.util.GSFile;
46import org.greenstone.gsdl3.util.GSXML;
47import org.greenstone.gsdl3.util.SolrFacetWrapper;
48import org.greenstone.gsdl3.util.SolrQueryResult;
49import org.greenstone.gsdl3.util.SolrQueryWrapper;
50import org.greenstone.util.GlobalProperties;
51import org.w3c.dom.Document;
52import org.w3c.dom.Element;
53import org.w3c.dom.NodeList;
54
55import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
56import org.apache.solr.client.solrj.request.CoreAdminRequest;
57import org.apache.solr.client.solrj.response.CoreAdminResponse;
58import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
59import org.apache.solr.common.util.NamedList;
60
61public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
62{
63
64 protected static final String SORT_ORDER_PARAM = "sortOrder";
65 protected static final String SORT_ORDER_DESCENDING = "1";
66 protected static final String SORT_ORDER_ASCENDING = "0";
67
68 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
69
70 protected String solr_servlet_base_url;
71 protected HashMap<String, SolrServer> solr_core_cache;
72 protected SolrQueryWrapper solr_src = null;
73
74 protected ArrayList<String> _facets = new ArrayList<String>();
75 protected HashMap<String, Element> _facet_display_names = new HashMap<String, Element>();
76
77 public GS2SolrSearch()
78 {
79 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
80 does_faceting = true;
81 does_highlight_snippets = true;
82 does_full_field_highlighting = true;
83 // Used to store the solr cores that match the required 'level'
84 // of search (e.g. either document-level=>didx, or
85 // section-level=>sidx. The hashmap is filled out on demand
86 // based on 'level' parameter passed in to 'setUpQueryer()'
87
88 solr_core_cache = new HashMap<String, SolrServer>();
89
90 this.solr_src = new SolrQueryWrapper();
91
92 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
93 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
94 try {
95 Properties globalProperties = new Properties();
96 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
97 String host = globalProperties.getProperty("tomcat.server", "localhost");
98 String port = globalProperties.getProperty("tomcat.port.http", "8383");
99 String protocol = globalProperties.getProperty("server.protocol", "http");
100 String solrContext = globalProperties.getProperty("solr.context", "solr");
101
102 String portStr = port.equals("80") ? "" : ":"+port;
103 solr_servlet_base_url = protocol+"://"+host+portStr+"/"+solrContext;
104 } catch(Exception e) {
105 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
106 }
107 }
108
109 /** configure this service */
110 public boolean configure(Element info, Element extra_info)
111 {
112 boolean success = super.configure(info, extra_info);
113
114 // clear the map of solr cores for this collection added to the map upon querying
115 solr_core_cache.clear();
116
117 if(!success) {
118 return false;
119 }
120
121 // Setting up facets
122
123 // the search element from collectionConfig
124 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
125
126 Document owner = info.getOwnerDocument();
127 // for each facet in buildConfig
128 NodeList facet_list = info.getElementsByTagName("facet");
129 for (int i=0; i<facet_list.getLength(); i++) {
130 Element facet = (Element)facet_list.item(i);
131 String shortname = facet.getAttribute(GSXML.SHORTNAME_ATT);
132 _facets.add(shortname);
133
134 // now add any displayItems into the facet element
135 // (which is stored as part of info), then we can add to
136 // the result if needed
137 String longname = facet.getAttribute(GSXML.NAME_ATT);
138 Element config_facet = GSXML.getNamedElement(searchElem, "facet", GSXML.NAME_ATT, longname);
139 if (config_facet != null) {
140 NodeList display_items = config_facet.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
141 for (int j=0; j<display_items.getLength(); j++) {
142 Element e = (Element) display_items.item(j);
143 facet.appendChild(owner.importNode(e, true));
144 }
145 _facet_display_names.put(shortname, facet);
146
147 }
148
149 }
150
151 //If use Solr check if cores loaded
152 if (!loadSolrCores()) {
153 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", "
154 + "Couldn't activate Solr cores");
155 return false;
156 }
157 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
158
159 // ArrayList<String> chosenFacets = new ArrayList<String>();
160 // for (int i = 0; i < configIndexElems.getLength(); i++)
161 // {
162 // Element current = (Element) configIndexElems.item(i);
163 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
164 // {
165 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
166 // }
167 // }
168
169 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
170 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
171
172 // for (int j = 0; j < buildIndexElems.getLength(); j++)
173 // {
174 // Element current = (Element) buildIndexElems.item(j);
175 // for (int i = 0; i < chosenFacets.size(); i++)
176 // {
177 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
178 // {
179 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
180 // }
181 // }
182 // }
183
184 return true;
185 }
186
187 public void cleanUp()
188 {
189 super.cleanUp();
190 this.solr_src.cleanUp();
191
192 // clear the map keeping track of the SolrServers in this collection
193 solr_core_cache.clear();
194 }
195
196 /** add in the SOLR specific params to TextQuery */
197 protected void addCustomQueryParams(Element param_list, String lang)
198 {
199 super.addCustomQueryParams(param_list, lang);
200 /** Add in the sort order asc/desc param */
201 createParameter(SORT_ORDER_PARAM, param_list, lang);
202 }
203 /** add in SOLR specific params for AdvancedFieldQuery */
204 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
205 {
206 super.addCustomQueryParamsAdvField(param_list, lang);
207 createParameter(SORT_ORDER_PARAM, param_list, lang);
208
209 }
210 /** create a param and add to the list */
211 protected void createParameter(String name, Element param_list, String lang)
212 {
213 Document doc = param_list.getOwnerDocument();
214 Element param = null;
215 String param_default = paramDefaults.get(name);
216 if (name.equals(SORT_ORDER_PARAM)) {
217 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
218 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
219
220 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
221 }
222
223 if (param != null)
224 {
225 param_list.appendChild(param);
226 }
227 else
228 {
229 super.createParameter(name, param_list, lang);
230 }
231
232 }
233
234 /** methods to handle actually doing the query */
235
236 /** do any initialisation of the query object */
237 protected boolean setUpQueryer(HashMap params)
238 {
239 this.solr_src.clearFacets();
240 this.solr_src.clearFacetQueries();
241
242 for (int i = 0; i < _facets.size(); i++)
243 {
244 this.solr_src.addFacet(_facets.get(i));
245 }
246
247 String index = "didx";
248 if (this.default_level.toUpperCase().equals("SEC")) {
249 index = "sidx";
250 }
251 String physical_index_language_name = null;
252 String physical_sub_index_name = null;
253 String docFilter = null;
254 int maxdocs = 100;
255 int hits_per_page = 20;
256 int start_page = 1;
257 // set up the query params
258 Set entries = params.entrySet();
259 Iterator i = entries.iterator();
260 while (i.hasNext())
261 {
262 Map.Entry m = (Map.Entry) i.next();
263 String name = (String) m.getKey();
264 String value = (String) m.getValue();
265
266 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
267
268 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
269 {
270 maxdocs = Integer.parseInt(value);
271 }
272 else if (name.equals(HITS_PER_PAGE_PARAM))
273 {
274 hits_per_page = Integer.parseInt(value);
275 }
276 else if (name.equals(START_PAGE_PARAM))
277 {
278 start_page = Integer.parseInt(value);
279 }
280 else if (name.equals(MATCH_PARAM))
281 {
282 if (value.equals(MATCH_PARAM_ALL))
283 {
284 this.solr_src.setDefaultConjunctionOperator("AND");
285 }
286 else
287 {
288 this.solr_src.setDefaultConjunctionOperator("OR");
289 }
290 }
291 else if (name.equals(RANK_PARAM))
292 {
293 if (value.equals(RANK_PARAM_RANK))
294 {
295 value = SolrQueryWrapper.SORT_BY_RANK;
296 } else if (value.equals(RANK_PARAM_NONE)) {
297 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
298 }
299
300 this.solr_src.setSortField(value);
301 }
302 else if (name.equals(SORT_ORDER_PARAM)) {
303 if (value.equals(SORT_ORDER_DESCENDING)) {
304 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
305 } else {
306 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
307 }
308 }
309 else if (name.equals(LEVEL_PARAM))
310 {
311 if (value.toUpperCase().equals("SEC"))
312 {
313 index = "sidx";
314 }
315 else
316 {
317 index = "didx";
318 }
319 }
320 // Would facets ever come in through params???
321 else if (name.equals("facets") && value.length() > 0)
322 {
323 String[] facets = value.split(",");
324
325 for (String facet : facets)
326 {
327 this.solr_src.addFacet(facet);
328 }
329 }
330 else if (name.equals("facetQueries") && value.length() > 0)
331 {
332 //logger.info("@@@ SOLR FACET VALUE FOUND: " + value);
333 this.solr_src.addFacetQuery(value);
334 }
335 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
336 {
337 physical_sub_index_name = value;
338 }
339 else if (name.equals(INDEX_LANGUAGE_PARAM))
340 {
341 physical_index_language_name = value;
342 } // ignore any others
343 else if (name.equals("docFilter"))
344 {
345 docFilter = value;
346 docFilter = docFilter.replaceAll("[^A-Za-z0-9.]", "");
347 this.solr_src.setDocFilter(value);
348 }
349 }
350 // set up start and end results if necessary
351 int start_results = 0;
352 if (start_page != 1)
353 {
354 start_results = ((start_page - 1) * hits_per_page) ;
355 }
356 int end_results = hits_per_page * start_page;
357 this.solr_src.setStartResults(start_results);
358 this.solr_src.setEndResults(end_results);
359 this.solr_src.setMaxDocs(maxdocs);
360
361 if (index.equals("sidx") || index.equals("didx"))
362 {
363 if (physical_sub_index_name != null)
364 {
365 index += physical_sub_index_name;
366 }
367 if (physical_index_language_name != null)
368 {
369 index += physical_index_language_name;
370 }
371 }
372
373 // now we know the index level, we can dig out the required
374 // solr-core, (caching the result in 'solr_core_cache')
375 String core_name = getCollectionCoreNamePrefix() + "-" + index;
376
377 SolrServer solr_core = null;
378 //CHECK HERE
379 if (!solr_core_cache.containsKey(core_name))
380 {
381 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
382 solr_core_cache.put(core_name, solr_core);
383 }
384 else
385 {
386 solr_core = solr_core_cache.get(core_name);
387 }
388
389 this.solr_src.setSolrCore(solr_core);
390 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
391 this.solr_src.initialise();
392 return true;
393 }
394
395 /** do the query */
396 protected Object runQuery(String query)
397 {
398 try
399 {
400 //if it is a Highlighting Query - execute it
401 this.solr_src.setHighlightField(indexField);
402 if(hldocOID != null)
403 {
404 String rslt = this.solr_src.runHighlightingQuery(query,hldocOID);
405 // Check result
406 if (rslt != null)
407 {
408 return rslt;
409 }
410 //Highlighting request failed. Do standard request.
411 hldocOID = null;
412 }
413 //logger.info("@@@@ Query is now: " + query);
414 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
415
416 return sqr;
417 }
418 catch (Exception e)
419 {
420 logger.error("Exception happened in run query: ", e);
421 }
422
423 return null;
424 }
425
426
427 /** get the total number of docs that match */
428 protected long numDocsMatched(Object query_result)
429 {
430 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
431
432 }
433
434 /** get the list of doc ids */
435 protected String[] getDocIDs(Object query_result)
436 {
437 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
438 String[] doc_nums = new String[docs.size()];
439 for (int d = 0; d < docs.size(); d++)
440 {
441 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
442 doc_nums[d] = doc_num;
443 }
444 return doc_nums;
445 }
446
447 /** get the list of doc ranks */
448 protected String[] getDocRanks(Object query_result)
449 {
450 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
451 String[] doc_ranks = new String[docs.size()];
452 for (int d = 0; d < docs.size(); d++)
453 {
454 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
455 }
456 return doc_ranks;
457 }
458
459 /** add in term info if available */
460 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
461 {
462 Document doc = term_list.getOwnerDocument();
463 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
464
465 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
466 for (int t = 0; t < terms.size(); t++)
467 {
468 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
469
470 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
471 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
472 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
473 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
474 term_elem.setAttribute(FIELD_ATT, term_info.field_);
475 term_list.appendChild(term_elem);
476 }
477
478 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
479 for (int t = 0; t < stopwords.size(); t++)
480 {
481 String stopword = (String) stopwords.get(t);
482
483 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
484 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
485 term_list.appendChild(stopword_elem);
486 }
487
488 return true;
489 }
490
491 protected ArrayList<FacetWrapper> getFacets(Object query_result, String lang)
492 {
493 if (!(query_result instanceof SolrQueryResult))
494 {
495 return null;
496 }
497
498 SolrQueryResult result = (SolrQueryResult) query_result;
499 List<FacetField> facets = result.getFacetResults();
500
501 if (facets == null)
502 {
503 return null;
504 }
505
506 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
507
508 for (FacetField facet : facets)
509 {
510 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
511 String fname = wrap.getName();
512 String dname = getDisplayText(_facet_display_names.get(fname), GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
513 wrap.setDisplayName(dname);
514 newFacetList.add(wrap);
515 }
516
517 return newFacetList;
518 }
519 @Override
520 protected Map<String, Map<String, List<String>>> getHighlightSnippets(Object query_result)
521 {
522 if (!(query_result instanceof SolrQueryResult))
523 {
524 return null;
525 }
526
527 SolrQueryResult result = (SolrQueryResult) query_result;
528
529 return result.getHighlightResults();
530 }
531
532
533 protected String getCollectionCoreNamePrefix() {
534 String site_name = this.router.getSiteName();
535 String coll_name = this.cluster_name;
536 String collection_core_name_prefix = site_name + "-" + coll_name;
537 return collection_core_name_prefix;
538 }
539
540 private boolean loadSolrCores() {
541
542 HttpSolrServer solrServer = new HttpSolrServer(solr_servlet_base_url);
543 // Max retries
544 solrServer.setMaxRetries(1);
545 // Connection Timeout
546 solrServer.setConnectionTimeout(3000);
547 //Cores
548 String coreSecName = getCollectionCoreNamePrefix() + "-sidx";
549 String coreDocName = getCollectionCoreNamePrefix() + "-didx";
550
551
552 if (!checkSolrCore(coreSecName, solrServer)){
553 if (!activateSolrCore(coreSecName, solrServer)){
554 logger.error("Couldn't activate Solr core " + coreSecName + " for collection " + cluster_name);
555 return false;
556 }
557 }
558 if (!checkSolrCore(coreDocName, solrServer)){
559 if (!activateSolrCore(coreDocName, solrServer)){
560 logger.error("Couldn't activate Solr core " + coreDocName + " for collection " + cluster_name);
561 return false;
562 }
563 }
564 return true;
565 }
566
567 private boolean checkSolrCore(String coreName, HttpSolrServer solrServer) {
568 CoreAdminRequest adminRequest = new CoreAdminRequest();
569 adminRequest.setAction(CoreAdminAction.STATUS);
570 adminRequest.setCoreName(coreName);
571
572 try {
573 CoreAdminResponse adminResponse = adminRequest.process(solrServer);
574 NamedList<NamedList<Object>> coreStatus = adminResponse.getCoreStatus();
575 NamedList<Object> coreList = coreStatus.getVal(0);
576 if (coreList != null) {
577 if (coreList.get("name") == null) {
578 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " not exists.");
579 return false;
580 }
581 }
582
583 } catch (SolrServerException e) {
584 e.printStackTrace();
585 return false;
586 } catch (IOException e) {
587 e.printStackTrace();
588 return false;
589 } catch (RemoteSolrException e1){
590 logger.error("Check solr core " + coreName + " for collection " + cluster_name + " failed.");
591 e1.printStackTrace();
592 return false;
593 }
594 return true;
595 }
596
597 private boolean activateSolrCore(String coreName, HttpSolrServer solrServer) {
598 String dataDir = GSFile.collectionIndexDir(site_home, cluster_name) + File.separator + coreName.substring(coreName.length() - 4);
599 String instanceDir = GSFile.collectionEtcDir(site_home, cluster_name);
600
601 try {
602 CoreAdminRequest.createCore(coreName, instanceDir, solrServer, "", "", dataDir, "");
603 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " activated.");
604 } catch (SolrServerException e1) {
605 e1.printStackTrace();
606 return false;
607 } catch (IOException e1) {
608 e1.printStackTrace();
609 return false;
610 } catch (RemoteSolrException e1){
611 logger.error("Activation solr core " + coreName + " for collection " + cluster_name + " failed.");
612 e1.printStackTrace();
613 return false;
614 }
615
616 return true;
617 }
618
619}
Note: See TracBrowser for help on using the repository browser.