source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 30146

Last change on this file since 30146 was 30146, checked in by kjdon, 9 years ago

set the index to match the default level, just in case we are doing cross collection search and don't have a level parameter

  • Property svn:executable set to *
File size: 14.7 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.util.ArrayList;
23import java.util.HashMap;
24import java.util.Iterator;
25import java.util.List;
26import java.util.Map;
27import java.util.Properties;
28import java.util.Set;
29import java.util.Vector;
30
31import org.apache.log4j.Logger;
32import org.apache.solr.client.solrj.SolrServer;
33import org.apache.solr.client.solrj.impl.HttpSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
36import org.greenstone.gsdl3.util.FacetWrapper;
37import org.greenstone.gsdl3.util.GSFile;
38import org.greenstone.gsdl3.util.GSXML;
39import org.greenstone.gsdl3.util.SolrFacetWrapper;
40import org.greenstone.gsdl3.util.SolrQueryResult;
41import org.greenstone.gsdl3.util.SolrQueryWrapper;
42import org.greenstone.util.GlobalProperties;
43import org.w3c.dom.Document;
44import org.w3c.dom.Element;
45import org.w3c.dom.NodeList;
46
47public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
48{
49
50 public static final String SOLR_SERVLET_SUFFIX = "/solr";
51 protected static final String SORT_ORDER_PARAM = "sortOrder";
52 protected static final String SORT_ORDER_DESCENDING = "1";
53 protected static final String SORT_ORDER_ASCENDING = "0";
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
56
57 protected String solr_servlet_base_url;
58 protected HashMap<String, SolrServer> solr_core_cache;
59 protected SolrQueryWrapper solr_src = null;
60
61 protected ArrayList<String> _facets = new ArrayList<String>();
62
63 public GS2SolrSearch()
64 {
65 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
66 does_faceting = true;
67 does_highlight_snippets = true;
68 does_full_field_highlighting = true;
69 // Used to store the solr cores that match the required 'level'
70 // of search (e.g. either document-level=>didx, or
71 // section-level=>sidx. The hashmap is filled out on demand
72 // based on 'level' parameter passed in to 'setUpQueryer()'
73
74 solr_core_cache = new HashMap<String, SolrServer>();
75
76 this.solr_src = new SolrQueryWrapper();
77
78 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
79 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
80 try {
81 Properties globalProperties = new Properties();
82 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
83 String host = globalProperties.getProperty("tomcat.server", "localhost");
84 String port = globalProperties.getProperty("tomcat.port", "8383");
85 String protocol = globalProperties.getProperty("tomcat.protocol", "http");
86
87 String portStr = port.equals("80") ? "" : ":"+port;
88 solr_servlet_base_url = protocol+"://"+host+portStr+SOLR_SERVLET_SUFFIX;
89 } catch(Exception e) {
90 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
91 }
92 }
93
94 /** configure this service */
95 public boolean configure(Element info, Element extra_info)
96 {
97 boolean success = super.configure(info, extra_info);
98
99 // clear the map of solr cores for this collection added to the map upon querying
100 solr_core_cache.clear();
101
102 if(!success) {
103 return false;
104 }
105
106 // Setting up facets
107 // TODO - get these from build config, in case some haven't built
108 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
109 NodeList facet_list = info.getElementsByTagName("facet");
110 for (int i=0; i<facet_list.getLength(); i++) {
111 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
112 }
113 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
114
115 // ArrayList<String> chosenFacets = new ArrayList<String>();
116 // for (int i = 0; i < configIndexElems.getLength(); i++)
117 // {
118 // Element current = (Element) configIndexElems.item(i);
119 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
120 // {
121 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
122 // }
123 // }
124
125 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
126 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
127
128 // for (int j = 0; j < buildIndexElems.getLength(); j++)
129 // {
130 // Element current = (Element) buildIndexElems.item(j);
131 // for (int i = 0; i < chosenFacets.size(); i++)
132 // {
133 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
134 // {
135 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
136 // }
137 // }
138 // }
139
140 return true;
141 }
142
143 public void cleanUp()
144 {
145 super.cleanUp();
146 this.solr_src.cleanUp();
147
148 // clear the map keeping track of the SolrServers in this collection
149 solr_core_cache.clear();
150 }
151
152 /** add in the SOLR specific params to TextQuery */
153 protected void addCustomQueryParams(Element param_list, String lang)
154 {
155 super.addCustomQueryParams(param_list, lang);
156 /** Add in the sort order asc/desc param */
157 createParameter(SORT_ORDER_PARAM, param_list, lang);
158 }
159 /** add in SOLR specific params for AdvancedFieldQuery */
160 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
161 {
162 super.addCustomQueryParamsAdvField(param_list, lang);
163 createParameter(SORT_ORDER_PARAM, param_list, lang);
164
165 }
166 /** create a param and add to the list */
167 protected void createParameter(String name, Element param_list, String lang)
168 {
169 Document doc = param_list.getOwnerDocument();
170 Element param = null;
171 String param_default = paramDefaults.get(name);
172 if (name.equals(SORT_ORDER_PARAM)) {
173 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
174 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
175
176 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
177 }
178
179 if (param != null)
180 {
181 param_list.appendChild(param);
182 }
183 else
184 {
185 super.createParameter(name, param_list, lang);
186 }
187
188 }
189
190 /** methods to handle actually doing the query */
191
192 /** do any initialisation of the query object */
193 protected boolean setUpQueryer(HashMap params)
194 {
195 this.solr_src.clearFacets();
196 this.solr_src.clearFacetQueries();
197
198 for (int i = 0; i < _facets.size(); i++)
199 {
200 this.solr_src.addFacet(_facets.get(i));
201 }
202
203 String index = "didx";
204 if (this.default_level.toUpperCase().equals("SEC")) {
205 index = "sidx";
206 }
207 String physical_index_language_name = null;
208 String physical_sub_index_name = null;
209 int maxdocs = 100;
210 int hits_per_page = 20;
211 int start_page = 1;
212 // set up the query params
213 Set entries = params.entrySet();
214 Iterator i = entries.iterator();
215 while (i.hasNext())
216 {
217 Map.Entry m = (Map.Entry) i.next();
218 String name = (String) m.getKey();
219 String value = (String) m.getValue();
220
221 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
222
223 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
224 {
225 maxdocs = Integer.parseInt(value);
226 }
227 else if (name.equals(HITS_PER_PAGE_PARAM))
228 {
229 hits_per_page = Integer.parseInt(value);
230 }
231 else if (name.equals(START_PAGE_PARAM))
232 {
233 start_page = Integer.parseInt(value);
234 }
235 else if (name.equals(MATCH_PARAM))
236 {
237 if (value.equals(MATCH_PARAM_ALL))
238 {
239 this.solr_src.setDefaultConjunctionOperator("AND");
240 }
241 else
242 {
243 this.solr_src.setDefaultConjunctionOperator("OR");
244 }
245 }
246 else if (name.equals(RANK_PARAM))
247 {
248 if (value.equals(RANK_PARAM_RANK))
249 {
250 value = SolrQueryWrapper.SORT_BY_RANK;
251 } else if (value.equals(RANK_PARAM_NONE)) {
252 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
253 }
254
255 this.solr_src.setSortField(value);
256 }
257 else if (name.equals(SORT_ORDER_PARAM)) {
258 if (value.equals(SORT_ORDER_DESCENDING)) {
259 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
260 } else {
261 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
262 }
263 }
264 else if (name.equals(LEVEL_PARAM))
265 {
266 if (value.toUpperCase().equals("SEC"))
267 {
268 index = "sidx";
269 }
270 else
271 {
272 index = "didx";
273 }
274 }
275 // Would facets ever come in through params???
276 else if (name.equals("facets") && value.length() > 0)
277 {
278 String[] facets = value.split(",");
279
280 for (String facet : facets)
281 {
282 this.solr_src.addFacet(facet);
283 }
284 }
285 else if (name.equals("facetQueries") && value.length() > 0)
286 {
287 this.solr_src.addFacetQuery(value);
288 }
289 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
290 {
291 physical_sub_index_name = value;
292 }
293 else if (name.equals(INDEX_LANGUAGE_PARAM))
294 {
295 physical_index_language_name = value;
296 } // ignore any others
297 }
298 // set up start and end results if necessary
299 int start_results = 1;
300 if (start_page != 1)
301 {
302 start_results = ((start_page - 1) * hits_per_page) + 1;
303 }
304 int end_results = hits_per_page * start_page;
305 this.solr_src.setStartResults(start_results);
306 this.solr_src.setEndResults(end_results);
307 this.solr_src.setMaxDocs(maxdocs);
308
309 if (index.equals("sidx") || index.equals("didx"))
310 {
311 if (physical_sub_index_name != null)
312 {
313 index += physical_sub_index_name;
314 }
315 if (physical_index_language_name != null)
316 {
317 index += physical_index_language_name;
318 }
319 }
320
321 // now we know the index level, we can dig out the required
322 // solr-core, (caching the result in 'solr_core_cache')
323 String core_name = getCollectionCoreNamePrefix() + "-" + index;
324
325 SolrServer solr_core = null;
326
327 if (!solr_core_cache.containsKey(core_name))
328 {
329 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
330 solr_core_cache.put(core_name, solr_core);
331 }
332 else
333 {
334 solr_core = solr_core_cache.get(core_name);
335 }
336
337 this.solr_src.setSolrCore(solr_core);
338 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
339 this.solr_src.initialise();
340 return true;
341 }
342
343 /** do the query */
344 protected Object runQuery(String query)
345 {
346 try
347 {
348 //if it is a Highlighting Query - execute it
349 this.solr_src.setHighlightField(indexField);
350 if(hldocOID != null)
351 {
352 String rslt = this.solr_src.runHighlightingQuery(query,hldocOID);
353 // Check result
354 if (rslt != null)
355 {
356 return rslt;
357 }
358 //Highlighting request failed. Do standard request.
359 hldocOID = null;
360 }
361 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
362
363 return sqr;
364 }
365 catch (Exception e)
366 {
367 logger.error("Exception happened in run query: ", e);
368 }
369
370 return null;
371 }
372
373
374 /** get the total number of docs that match */
375 protected long numDocsMatched(Object query_result)
376 {
377 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
378
379 }
380
381 /** get the list of doc ids */
382 protected String[] getDocIDs(Object query_result)
383 {
384 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
385 String[] doc_nums = new String[docs.size()];
386 for (int d = 0; d < docs.size(); d++)
387 {
388 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
389 doc_nums[d] = doc_num;
390 }
391 return doc_nums;
392 }
393
394 /** get the list of doc ranks */
395 protected String[] getDocRanks(Object query_result)
396 {
397 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
398 String[] doc_ranks = new String[docs.size()];
399 for (int d = 0; d < docs.size(); d++)
400 {
401 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
402 }
403 return doc_ranks;
404 }
405
406 /** add in term info if available */
407 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
408 {
409 Document doc = term_list.getOwnerDocument();
410 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
411
412 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
413 for (int t = 0; t < terms.size(); t++)
414 {
415 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
416
417 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
418 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
419 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
420 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
421 term_elem.setAttribute(FIELD_ATT, term_info.field_);
422 term_list.appendChild(term_elem);
423 }
424
425 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
426 for (int t = 0; t < stopwords.size(); t++)
427 {
428 String stopword = (String) stopwords.get(t);
429
430 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
431 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
432 term_list.appendChild(stopword_elem);
433 }
434
435 return true;
436 }
437
438 protected ArrayList<FacetWrapper> getFacets(Object query_result)
439 {
440 if (!(query_result instanceof SolrQueryResult))
441 {
442 return null;
443 }
444
445 SolrQueryResult result = (SolrQueryResult) query_result;
446 List<FacetField> facets = result.getFacetResults();
447
448 if (facets == null)
449 {
450 return null;
451 }
452
453 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
454
455 for (FacetField facet : facets)
456 {
457 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
458 // String name = wrap.getName();
459 // String display_name = "Poo";
460 // wrap.setDisplayName(display_name);
461
462 newFacetList.add(wrap);
463 }
464
465 return newFacetList;
466 }
467 @Override
468 protected Map<String, Map<String, List<String>>> getHighlightSnippets(Object query_result)
469 {
470 if (!(query_result instanceof SolrQueryResult))
471 {
472 return null;
473 }
474
475 SolrQueryResult result = (SolrQueryResult) query_result;
476
477 return result.getHighlightResults();
478 }
479
480
481 protected String getCollectionCoreNamePrefix() {
482 String site_name = this.router.getSiteName();
483 String coll_name = this.cluster_name;
484 String collection_core_name_prefix = site_name + "-" + coll_name;
485 return collection_core_name_prefix;
486 }
487}
Note: See TracBrowser for help on using the repository browser.