source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java

Last change on this file was 38155, checked in by kjdon, 7 months ago

made some static strings for param names

  • Property svn:executable set to *
File size: 20.7 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21import java.io.File;
22import java.io.IOException;
23// Greenstone classes
24import java.util.ArrayList;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Properties;
30import java.util.Set;
31import java.util.Vector;
32
33import org.apache.log4j.Logger;
34import org.apache.solr.client.solrj.SolrServer;
35import org.apache.solr.client.solrj.SolrServerException;
36import org.apache.solr.client.solrj.impl.HttpSolrServer;
37import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
38import org.apache.solr.client.solrj.request.CoreAdminRequest;
39import org.apache.solr.client.solrj.response.CoreAdminResponse;
40import org.apache.solr.client.solrj.response.FacetField;
41import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
42import org.apache.solr.common.util.NamedList;
43import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
44import org.greenstone.gsdl3.util.FacetWrapper;
45import org.greenstone.gsdl3.util.GSFile;
46import org.greenstone.gsdl3.util.GSXML;
47import org.greenstone.gsdl3.util.SolrFacetWrapper;
48import org.greenstone.gsdl3.util.SolrQueryResult;
49import org.greenstone.gsdl3.util.SolrQueryWrapper;
50import org.greenstone.util.GlobalProperties;
51import org.greenstone.util.ProtocolPortProperties;
52import org.w3c.dom.Document;
53import org.w3c.dom.Element;
54import org.w3c.dom.NodeList;
55
56import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
57import org.apache.solr.client.solrj.request.CoreAdminRequest;
58import org.apache.solr.client.solrj.response.CoreAdminResponse;
59import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
60import org.apache.solr.common.util.NamedList;
61
62public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
63{
64
65 protected static final String SORT_ORDER_PARAM = "sortOrder";
66 protected static final String SORT_ORDER_DESCENDING = "1";
67 protected static final String SORT_ORDER_ASCENDING = "0";
68
69 protected static final String FACET_QUERIES_PARAM = "facetQueries";
70 protected static final String DOC_FILTER_PARAM = "docFilter";
71
72 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
73
74 protected String solr_servlet_base_url;
75 protected HashMap<String, SolrServer> solr_core_cache;
76 protected SolrQueryWrapper solr_src = null;
77
78 protected ArrayList<String> _facets = new ArrayList<String>();
79
80 protected HashMap<String, Element> _facet_display_names = new HashMap<String, Element>();
81 protected ArrayList<String> _corenames = new ArrayList<String>();
82
83 public GS2SolrSearch()
84 {
85 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
86 does_faceting = true;
87 does_highlight_snippets = true;
88 does_full_field_highlighting = true;
89 // Used to store the solr cores that match the required 'level'
90 // of search (e.g. either document-level=>didx, or
91 // section-level=>sidx. The hashmap is filled out on demand
92 // based on 'level' parameter passed in to 'setUpQueryer()'
93
94 solr_core_cache = new HashMap<String, SolrServer>();
95
96 this.solr_src = new SolrQueryWrapper();
97
98 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
99 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
100 try {
101 Properties globalProperties = new Properties();
102 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
103
104 /*
105 String host = globalProperties.getProperty("tomcat.server", "localhost");
106 //String port = globalProperties.getProperty("tomcat.port.http", "8383");
107 //String protocol = globalProperties.getProperty("server.protocol", "http");
108 ProtocolPortProperties protocolPortProps = new ProtocolPortProperties(globalProperties); // can throw Exception
109
110 String protocol = protocolPortProps.getProtocol();
111 String port = protocolPortProps.getPort();
112 String solrContext = globalProperties.getProperty("solr.context", "solr");
113
114 String portStr = port.equals("80") ? "" : ":"+port;
115 solr_servlet_base_url = protocol+"://"+host+portStr+"/"+solrContext;
116 */
117
118 // The solr servlet is only accessible locally (from "localhost", specifically 127.0.0.1).
119 // for security reasons, as we don't want non librarian users
120 // to go to the solr servlet and delete solr cores or something.
121 // The security Valve element in the tomcat solr.xml context file restricts
122 // access to 127.0.0.1, but here we ensure that the solr URL is the local http one
123 // and not any https with domain name and https port.
124 // Note that we use 127.0.0.1 instead of literally "localhost" since localhost is unsafe
125 ProtocolPortProperties protocolPortProps = new ProtocolPortProperties(globalProperties); // can throw Exception
126 String solrContext = globalProperties.getProperty("solr.context", "solr");
127 solr_servlet_base_url = protocolPortProps.getLocalHttpBaseAddress()+"/"+solrContext;
128
129 } catch(Exception e) {
130 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
131 }
132 }
133
134 /** configure this service */
135 public boolean configure(Element info, Element extra_info)
136 {
137 boolean success = super.configure(info, extra_info);
138
139 // clear the map of solr cores for this collection added to the map upon querying
140 solr_core_cache.clear();
141
142 if(!success) {
143 return false;
144 }
145
146 if(solr_servlet_base_url == null) {
147 logger.error("Unable to configure GS2SolrSearch - solr_servlet_base_url is null because of issues with port/protocol in global.properties");
148 return false;
149 }
150
151 // set up which params to save
152 this.save_params.add(LEVEL_PARAM);
153
154 // Setting up facets
155
156 // the search element from collectionConfig
157 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
158
159 Document owner = info.getOwnerDocument();
160 // for each facet in buildConfig
161 NodeList facet_list = info.getElementsByTagName("facet");
162 for (int i=0; i<facet_list.getLength(); i++) {
163 Element facet = (Element)facet_list.item(i);
164 String shortname = facet.getAttribute(GSXML.SHORTNAME_ATT);
165 _facets.add(shortname);
166
167 // now add any displayItems into the facet element
168 // (which is stored as part of info), then we can add to
169 // the result if needed
170 String longname = facet.getAttribute(GSXML.NAME_ATT);
171 Element config_facet = GSXML.getNamedElement(searchElem, "facet", GSXML.NAME_ATT, longname);
172 if (config_facet != null) {
173 NodeList display_items = config_facet.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
174 for (int j=0; j<display_items.getLength(); j++) {
175 Element e = (Element) display_items.item(j);
176 facet.appendChild(owner.importNode(e, true));
177 }
178 _facet_display_names.put(shortname, facet);
179
180 }
181
182 }
183 NodeList core_list = info.getElementsByTagName("solrcore");
184 if (core_list.getLength()>0) {
185 for (int i=0; i<core_list.getLength(); i++) {
186 Element core = (Element)core_list.item(i);
187 String corename = core.getAttribute(GSXML.NAME_ATT);
188 _corenames.add(corename);
189 }
190 } else {
191 // old style coll, desn't have solrcores listed in buildConfig
192 // check levels, and use didx and sidx depending on which
193 // levels are specified
194 for (int i=0; i<level_ids.size(); i++) {
195 String level = level_ids.get(i);
196 if (level.toUpperCase().equals("SEC")) {
197 _corenames.add("sidx");
198 } else if (level.toUpperCase().equals("DOC")){
199 _corenames.add("didx");
200 }
201 }
202 }
203
204 // check if cores loaded
205 if (!loadSolrCores()) {
206 logger.error("Collection: couldn't configure collection: " + this.cluster_name + ", "
207 + "Couldn't activate Solr cores");
208 return false;
209 }
210 return true;
211 }
212
213 public void cleanUp()
214 {
215 super.cleanUp();
216 this.solr_src.cleanUp();
217
218 // clear the map keeping track of the SolrServers in this collection
219 solr_core_cache.clear();
220 }
221
222 /** add in the SOLR specific params to TextQuery */
223 protected void addCustomQueryParams(Element param_list, String lang)
224 {
225 super.addCustomQueryParams(param_list, lang);
226 /** Add in the sort order asc/desc param */
227 createParameter(SORT_ORDER_PARAM, param_list, lang);
228 }
229 /** add in SOLR specific params for AdvancedFieldQuery */
230 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
231 {
232 super.addCustomQueryParamsAdvField(param_list, lang);
233 createParameter(SORT_ORDER_PARAM, param_list, lang);
234
235 }
236 /** create a param and add to the list */
237 protected void createParameter(String name, Element param_list, String lang)
238 {
239 Document doc = param_list.getOwnerDocument();
240 Element param = null;
241 String param_default = paramDefaults.get(name);
242 if (name.equals(SORT_ORDER_PARAM)) {
243 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
244 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
245
246 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
247 }
248
249 if (param != null)
250 {
251 param_list.appendChild(param);
252 }
253 else
254 {
255 super.createParameter(name, param_list, lang);
256 }
257
258 }
259
260 /** methods to handle actually doing the query */
261
262 /** do any initialisation of the query object */
263 protected Object setUpQueryer(HashMap params)
264 {
265 this.solr_src.clearFacets();
266 this.solr_src.clearFacetQueries();
267
268 for (int i = 0; i < _facets.size(); i++)
269 {
270 this.solr_src.addFacet(_facets.get(i));
271 }
272
273 String index = "didx";
274 if (this.default_level.toUpperCase().equals("SEC")) {
275 index = "sidx";
276 }
277 String physical_index_language_name = null;
278 String physical_sub_index_name = null;
279 String docFilter = null;
280 int maxdocs = 100;
281 int hits_per_page = 20;
282 int start_page = 1;
283 // set up the query params
284 Set entries = params.entrySet();
285 Iterator i = entries.iterator();
286 while (i.hasNext())
287 {
288 Map.Entry m = (Map.Entry) i.next();
289 String name = (String) m.getKey();
290 String value = (String) m.getValue();
291
292 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
293
294 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
295 {
296 maxdocs = Integer.parseInt(value);
297 }
298 else if (name.equals(HITS_PER_PAGE_PARAM))
299 {
300 hits_per_page = Integer.parseInt(value);
301 }
302 else if (name.equals(START_PAGE_PARAM))
303 {
304 start_page = Integer.parseInt(value);
305 }
306 else if (name.equals(MATCH_PARAM))
307 {
308 if (value.equals(MATCH_PARAM_ALL))
309 {
310 this.solr_src.setDefaultConjunctionOperator("AND");
311 }
312 else
313 {
314 this.solr_src.setDefaultConjunctionOperator("OR");
315 }
316 }
317 else if (name.equals(RANK_PARAM))
318 {
319 if (value.equals(RANK_PARAM_RANK))
320 {
321 value = SolrQueryWrapper.SORT_BY_RANK;
322 } else if (value.equals(RANK_PARAM_NONE)) {
323 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
324 }
325
326 this.solr_src.setSortField(value);
327 }
328 else if (name.equals(SORT_ORDER_PARAM)) {
329 if (value.equals(SORT_ORDER_DESCENDING)) {
330 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
331 } else {
332 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
333 }
334 }
335 else if (name.equals(LEVEL_PARAM))
336 {
337 if (value.toUpperCase().equals("SEC"))
338 {
339 index = "sidx";
340 }
341 else
342 {
343 index = "didx";
344 }
345 }
346 // Would facets ever come in through params???
347 else if (name.equals("facets") && value.length() > 0)
348 {
349 String[] facets = value.split(",");
350
351 for (String facet : facets)
352 {
353 this.solr_src.addFacet(facet);
354 }
355 }
356 else if (name.equals(FACET_QUERIES_PARAM) && value.length() > 0)
357 {
358 //logger.info("@@@ SOLR FACET VALUE FOUND: " + value);
359 this.solr_src.addFacetQuery(value);
360 }
361 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
362 {
363 physical_sub_index_name = value;
364 }
365 else if (name.equals(INDEX_LANGUAGE_PARAM))
366 {
367 physical_index_language_name = value;
368 } // ignore any others
369 else if (name.equals(DOC_FILTER_PARAM))
370 {
371 docFilter = value;
372 docFilter = docFilter.replaceAll("[^A-Za-z0-9.]", "");
373 this.solr_src.setDocFilter(value);
374 }
375 }
376 // set up start and end results if necessary
377 int start_results = 0;
378 if (start_page != 1)
379 {
380 start_results = ((start_page - 1) * hits_per_page) ;
381 }
382 int end_results = hits_per_page * start_page;
383 this.solr_src.setStartResults(start_results);
384 this.solr_src.setEndResults(end_results);
385 this.solr_src.setMaxDocs(maxdocs);
386
387 if (index.equals(SECTION_INDEX) || index.equals(DOCUMENT_INDEX))
388 {
389 if (physical_sub_index_name != null)
390 {
391 index += physical_sub_index_name;
392 }
393 if (physical_index_language_name != null)
394 {
395 index += physical_index_language_name;
396 }
397 }
398
399 // now we know the index level, we can dig out the required
400 // solr-core, (caching the result in 'solr_core_cache')
401 String core_name = getCollectionCoreNamePrefix() + "-" + index;
402
403 SolrServer solr_core = null;
404 //CHECK HERE
405 if (!solr_core_cache.containsKey(core_name))
406 {
407 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
408 solr_core_cache.put(core_name, solr_core);
409 }
410 else
411 {
412 solr_core = solr_core_cache.get(core_name);
413 }
414
415 this.solr_src.setSolrCore(solr_core);
416 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
417 this.solr_src.initialise();
418 return this.solr_src; // return true
419 }
420
421 /** do the query */
422 protected Object runQuery(Object queryObject, String query)
423 {
424 try
425 {
426 //if it is a Highlighting Query - execute it
427 this.solr_src.setHighlightField(indexField);
428 if(hldocOID != null)
429 {
430 String rslt = this.solr_src.runHighlightingQuery(query,hldocOID);
431 // Check result
432 if (rslt != null)
433 {
434 return rslt;
435 }
436 //Highlighting request failed. Do standard request.
437 hldocOID = null;
438 }
439 //logger.info("@@@@ Query is now: " + query);
440 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
441
442 return sqr;
443 }
444 catch (Exception e)
445 {
446 logger.error("Exception happened in run query: ", e);
447 }
448
449 return null;
450 }
451
452
453 /** get the total number of docs that match */
454 protected long numDocsMatched(Object query_result)
455 {
456 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
457
458 }
459
460 /** get the list of doc ids */
461 protected String[] getDocIDs(Object query_result)
462 {
463 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
464 String[] doc_nums = new String[docs.size()];
465 for (int d = 0; d < docs.size(); d++)
466 {
467 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
468 doc_nums[d] = doc_num;
469 }
470 return doc_nums;
471 }
472
473 /** get the list of doc ranks */
474 protected String[] getDocRanks(Object query_result)
475 {
476 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
477 String[] doc_ranks = new String[docs.size()];
478 for (int d = 0; d < docs.size(); d++)
479 {
480 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
481 }
482 return doc_ranks;
483 }
484
485 /** add in term info if available */
486 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
487 {
488 Document doc = term_list.getOwnerDocument();
489 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
490
491 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
492 for (int t = 0; t < terms.size(); t++)
493 {
494 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
495
496 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
497 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
498 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
499 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
500 term_elem.setAttribute(FIELD_ATT, term_info.field_);
501 term_list.appendChild(term_elem);
502 }
503
504 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
505 for (int t = 0; t < stopwords.size(); t++)
506 {
507 String stopword = (String) stopwords.get(t);
508
509 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
510 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
511 term_list.appendChild(stopword_elem);
512 }
513
514 return true;
515 }
516
517 protected ArrayList<FacetWrapper> getFacets(Object query_result, String lang)
518 {
519 if (!(query_result instanceof SolrQueryResult))
520 {
521 return null;
522 }
523
524 SolrQueryResult result = (SolrQueryResult) query_result;
525 List<FacetField> facets = result.getFacetResults();
526
527 if (facets == null)
528 {
529 return null;
530 }
531
532 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
533
534 for (FacetField facet : facets)
535 {
536 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
537 String fname = wrap.getName();
538 String dname = getDisplayText(_facet_display_names.get(fname), GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
539 wrap.setDisplayName(dname);
540 newFacetList.add(wrap);
541 }
542
543 return newFacetList;
544 }
545 @Override
546 protected Map<String, Map<String, List<String>>> getHighlightSnippets(Object query_result)
547 {
548 if (!(query_result instanceof SolrQueryResult))
549 {
550 return null;
551 }
552
553 SolrQueryResult result = (SolrQueryResult) query_result;
554
555 return result.getHighlightResults();
556 }
557
558
559 protected String getCollectionCoreNamePrefix() {
560 String site_name = this.router.getSiteName();
561 String coll_name = this.cluster_name;
562 String collection_core_name_prefix = site_name + "-" + coll_name;
563 return collection_core_name_prefix;
564 }
565
566 private boolean loadSolrCores() {
567
568 HttpSolrServer solrServer = new HttpSolrServer(solr_servlet_base_url);
569 // Max retries
570 solrServer.setMaxRetries(1);
571 // Connection Timeout
572 solrServer.setConnectionTimeout(3000);
573 //Cores
574 for (int i=0; i<_corenames.size(); i++) {
575 String this_core = getCollectionCoreNamePrefix()+"-"+_corenames.get(i);
576
577 if (!checkSolrCore(this_core, solrServer)){
578 if (!activateSolrCore(this_core, solrServer)){
579 logger.error("Couldn't activate Solr core " + this_core + " for collection " + cluster_name);
580 return false;
581 }
582 }
583 }
584 return true;
585 }
586
587 private boolean checkSolrCore(String coreName, HttpSolrServer solrServer) {
588 CoreAdminRequest adminRequest = new CoreAdminRequest();
589 adminRequest.setAction(CoreAdminAction.STATUS);
590 adminRequest.setCoreName(coreName);
591
592 try {
593 CoreAdminResponse adminResponse = adminRequest.process(solrServer);
594 NamedList<NamedList<Object>> coreStatus = adminResponse.getCoreStatus();
595 NamedList<Object> coreList = coreStatus.getVal(0);
596 if (coreList != null) {
597 if (coreList.get("name") == null) {
598 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " not exists.");
599 return false;
600 }
601 }
602
603 } catch (SolrServerException e) {
604 e.printStackTrace();
605 return false;
606 } catch (IOException e) {
607 e.printStackTrace();
608 return false;
609 } catch (RemoteSolrException e1){
610 logger.error("Check solr core " + coreName + " for collection " + cluster_name + " failed.");
611 e1.printStackTrace();
612 return false;
613 }
614 return true;
615 }
616
617 private boolean activateSolrCore(String coreName, HttpSolrServer solrServer) {
618 String dataDir = GSFile.collectionIndexDir(site_home, cluster_name) + File.separator + coreName.substring(coreName.length() - 4);
619 String instanceDir = GSFile.collectionEtcDir(site_home, cluster_name);
620
621 try {
622 CoreAdminRequest.createCore(coreName, instanceDir, solrServer, "", "", dataDir, "");
623 logger.warn("Solr core " + coreName + " for collection " + cluster_name + " activated.");
624 } catch (SolrServerException e1) {
625 e1.printStackTrace();
626 return false;
627 } catch (IOException e1) {
628 e1.printStackTrace();
629 return false;
630 } catch (RemoteSolrException e1){
631 logger.error("Activation solr core " + coreName + " for collection " + cluster_name + " failed.");
632 e1.printStackTrace();
633 return false;
634 }
635
636 return true;
637 }
638
639}
Note: See TracBrowser for help on using the repository browser.