source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 29710

Last change on this file since 29710 was 29710, checked in by ak19, 9 years ago

Removing large chunks of commented out code ahead of changes. The commented out code was specific to Solr 3.3 but do not apply to Solr 4.7.2 that we use now.

  • Property svn:executable set to *
File size: 15.3 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.io.File;
23import java.util.ArrayList;
24import java.util.Collection;
25import java.util.HashMap;
26import java.util.Iterator;
27import java.util.List;
28import java.util.Map;
29import java.util.Set;
30import java.util.Vector;
31
32import org.apache.log4j.Logger;
33import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.apache.solr.core.CoreContainer;
36import org.apache.solr.core.SolrCore;
37import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
38import org.greenstone.gsdl3.util.FacetWrapper;
39import org.greenstone.gsdl3.util.GSFile;
40import org.greenstone.gsdl3.util.GSXML;
41import org.greenstone.gsdl3.util.SolrFacetWrapper;
42import org.greenstone.gsdl3.util.SolrQueryResult;
43import org.greenstone.gsdl3.util.SolrQueryWrapper;
44import org.greenstone.util.GlobalProperties;
45import org.w3c.dom.Document;
46import org.w3c.dom.Element;
47import org.w3c.dom.NodeList;
48
49public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
50{
51
52 protected static final String SORT_ORDER_PARAM = "sortOrder";
53 protected static final String SORT_ORDER_DESCENDING = "1";
54 protected static final String SORT_ORDER_ASCENDING = "0";
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
57
58 static protected CoreContainer all_solr_cores = null;
59
60 protected HashMap solr_core_cache;
61 protected SolrQueryWrapper solr_src = null;
62
63 protected ArrayList<String> _facets = new ArrayList<String>();
64
65 public GS2SolrSearch()
66 {
67 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
68 does_faceting = true;
69 // Used to store the solr cores that match the required 'level'
70 // of search (e.g. either document-level=>didx, or
71 // section-level=>sidx. The hashmap is filled out on demand
72 // based on 'level' parameter passed in to 'setUpQueryer()'
73
74 solr_core_cache = new HashMap();
75
76 if (all_solr_cores == null)
77 {
78 // Share one CoreContainer across all sites/collections
79 try
80 {
81 String gsdl3_writablehome = GlobalProperties.getGSDL3WritableHome();
82 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr");
83
84 String solr_home_str = GSFile.extHome(gsdl3_writablehome, solr_ext_name);
85
86 all_solr_cores = new CoreContainer(solr_home_str);
87 }
88 catch (Exception e)
89 {
90 e.printStackTrace();
91 }
92 }
93
94 this.solr_src = new SolrQueryWrapper();
95 }
96
97 /** configure this service */
98 public boolean configure(Element info, Element extra_info)
99 {
100 boolean success = super.configure(info, extra_info);
101
102 // 1. Make the CoreContainer reload solr.xml
103 // This is particularly needed for when activate.pl is executed during
104 // a running GS3 server. At that point, the solr collection is reactivated and
105 // we need to tell Greenstone that the solr index has changed. This requires
106 // the CoreContainer to reload the solr.xml file, and it all works again.
107
108 solr_core_cache.clear(); // clear the map of solr cores for this collection added to the map upon querying
109
110 // Reload the updated solr.xml into the CoreContainer
111 // (Doing an all_solr_cores.shutdown() first doesn't seem to be required)
112 try {
113 String solr_home_str = all_solr_cores.getSolrHome();
114 File solr_home = new File(solr_home_str);
115 File solr_xml = new File( solr_home,"solr.xml" );
116
117 //all_solr_cores.load(solr_home_str,solr_xml);
118 all_solr_cores.load();
119
120 } catch (Exception e) {
121 logger.error("Exception in GS2SolrSearch.configure(): " + e.getMessage());
122 e.printStackTrace();
123 return false;
124 }
125
126 if(!success) {
127 return false;
128 }
129
130 // 2. Setting up facets
131 // TODO - get these from build config, in case some haven't built
132 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
133 NodeList facet_list = info.getElementsByTagName("facet");
134 for (int i=0; i<facet_list.getLength(); i++) {
135 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
136 }
137 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
138
139 // ArrayList<String> chosenFacets = new ArrayList<String>();
140 // for (int i = 0; i < configIndexElems.getLength(); i++)
141 // {
142 // Element current = (Element) configIndexElems.item(i);
143 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
144 // {
145 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
146 // }
147 // }
148
149 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
150 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
151
152 // for (int j = 0; j < buildIndexElems.getLength(); j++)
153 // {
154 // Element current = (Element) buildIndexElems.item(j);
155 // for (int i = 0; i < chosenFacets.size(); i++)
156 // {
157 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
158 // {
159 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
160 // }
161 // }
162 // }
163
164 return true;
165 }
166
167 public void cleanUp()
168 {
169 super.cleanUp();
170 this.solr_src.cleanUp();
171
172
173 // 1. clear the map keeping track of the solrcores' EmbeddedSolrServers in this collection
174 solr_core_cache.clear();
175
176 // 2. For solr 4.7.2., GLI (and ant stop from cmd) is unable to shutdown the tomcat server fully,
177 // IF any collection has been previewed AND if there are any solr collections in the collect folder.
178 // This is because although the GS3 server seems to have stopped running at this stage, running a
179 // `ps aux | grep tomcat` reveals that some part of tomcat is still running. It seems to be still
180 // holding on to the cores. Doing an all_cores.shutdown() here, stops GS3 from hanging on to the cores
181 // while still preserving the core desciptions in web/ext/solr.xml as needed when restarting the GS3 server.
182
183 // Need GS3 server (tomcat) to release the cores, else a part of tomcat is still running in the background
184 // on ant stop, holding a lock on the cores. Doing shutdown() preserves core descriptions in solr.xml
185 all_solr_cores.shutdown();
186 all_solr_cores = null;
187 }
188
189 /** add in the SOLR specific params to TextQuery */
190 protected void addCustomQueryParams(Element param_list, String lang)
191 {
192 super.addCustomQueryParams(param_list, lang);
193 /** Add in the sort order asc/desc param */
194 createParameter(SORT_ORDER_PARAM, param_list, lang);
195 }
196 /** add in SOLR specific params for AdvancedFieldQuery */
197 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
198 {
199 super.addCustomQueryParamsAdvField(param_list, lang);
200 createParameter(SORT_ORDER_PARAM, param_list, lang);
201
202 }
203 /** create a param and add to the list */
204 protected void createParameter(String name, Element param_list, String lang)
205 {
206 Document doc = param_list.getOwnerDocument();
207 Element param = null;
208 String param_default = paramDefaults.get(name);
209 if (name.equals(SORT_ORDER_PARAM)) {
210 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
211 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
212
213 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
214 }
215
216 if (param != null)
217 {
218 param_list.appendChild(param);
219 }
220 else
221 {
222 super.createParameter(name, param_list, lang);
223 }
224
225 }
226
227 /** methods to handle actually doing the query */
228
229 /** do any initialisation of the query object */
230 protected boolean setUpQueryer(HashMap params)
231 {
232 this.solr_src.clearFacets();
233 this.solr_src.clearFacetQueries();
234
235 for (int i = 0; i < _facets.size(); i++)
236 {
237 this.solr_src.addFacet(_facets.get(i));
238 }
239
240 String index = "didx";
241 String physical_index_language_name = null;
242 String physical_sub_index_name = null;
243 int maxdocs = 100;
244 int hits_per_page = 20;
245 int start_page = 1;
246 // set up the query params
247 Set entries = params.entrySet();
248 Iterator i = entries.iterator();
249 while (i.hasNext())
250 {
251 Map.Entry m = (Map.Entry) i.next();
252 String name = (String) m.getKey();
253 String value = (String) m.getValue();
254
255 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
256
257 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
258 {
259 maxdocs = Integer.parseInt(value);
260 }
261 else if (name.equals(HITS_PER_PAGE_PARAM))
262 {
263 hits_per_page = Integer.parseInt(value);
264 }
265 else if (name.equals(START_PAGE_PARAM))
266 {
267 start_page = Integer.parseInt(value);
268 }
269 else if (name.equals(MATCH_PARAM))
270 {
271 if (value.equals(MATCH_PARAM_ALL))
272 {
273 this.solr_src.setDefaultConjunctionOperator("AND");
274 }
275 else
276 {
277 this.solr_src.setDefaultConjunctionOperator("OR");
278 }
279 }
280 else if (name.equals(RANK_PARAM))
281 {
282 if (value.equals(RANK_PARAM_RANK))
283 {
284 value = SolrQueryWrapper.SORT_BY_RANK;
285 } else if (value.equals(RANK_PARAM_NONE)) {
286 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
287 }
288
289 this.solr_src.setSortField(value);
290 }
291 else if (name.equals(SORT_ORDER_PARAM)) {
292 if (value.equals(SORT_ORDER_DESCENDING)) {
293 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
294 } else {
295 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
296 }
297 }
298 else if (name.equals(LEVEL_PARAM))
299 {
300 if (value.toUpperCase().equals("SEC"))
301 {
302 index = "sidx";
303 }
304 else
305 {
306 index = "didx";
307 }
308 }
309 // Would facets ever come in through params???
310 else if (name.equals("facets") && value.length() > 0)
311 {
312 String[] facets = value.split(",");
313
314 for (String facet : facets)
315 {
316 this.solr_src.addFacet(facet);
317 }
318 }
319 else if (name.equals("facetQueries") && value.length() > 0)
320 {
321 this.solr_src.addFacetQuery(value);
322 }
323 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
324 {
325 physical_sub_index_name = value;
326 }
327 else if (name.equals(INDEX_LANGUAGE_PARAM))
328 {
329 physical_index_language_name = value;
330 } // ignore any others
331 }
332 // set up start and end results if necessary
333 int start_results = 1;
334 if (start_page != 1)
335 {
336 start_results = ((start_page - 1) * hits_per_page) + 1;
337 }
338 int end_results = hits_per_page * start_page;
339 this.solr_src.setStartResults(start_results);
340 this.solr_src.setEndResults(end_results);
341 this.solr_src.setMaxDocs(maxdocs);
342
343 if (index.equals("sidx") || index.equals("didx"))
344 {
345 if (physical_sub_index_name != null)
346 {
347 index += physical_sub_index_name;
348 }
349 if (physical_index_language_name != null)
350 {
351 index += physical_index_language_name;
352 }
353 }
354
355 // now we know the index level, we can dig out the required
356 // solr-core, (caching the result in 'solr_core_cache')
357 String core_name = getCollectionCoreNamePrefix() + "-" + index;
358
359 EmbeddedSolrServer solr_core = null;
360
361 if (!solr_core_cache.containsKey(core_name))
362 {
363 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name);
364
365 solr_core_cache.put(core_name, solr_core);
366 }
367 else
368 {
369 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name);
370 }
371
372 this.solr_src.setSolrCore(solr_core);
373 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
374 this.solr_src.initialise();
375 return true;
376 }
377
378 /** do the query */
379 protected Object runQuery(String query)
380 {
381 try
382 {
383 //SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
384 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
385
386 return sqr;
387 }
388 catch (Exception e)
389 {
390 logger.error("Exception happened in run query: ", e);
391 }
392
393 return null;
394 }
395
396 /** get the total number of docs that match */
397 protected long numDocsMatched(Object query_result)
398 {
399 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
400
401 }
402
403 /** get the list of doc ids */
404 protected String[] getDocIDs(Object query_result)
405 {
406 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
407 String[] doc_nums = new String[docs.size()];
408 for (int d = 0; d < docs.size(); d++)
409 {
410 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
411 doc_nums[d] = doc_num;
412 }
413 return doc_nums;
414 }
415
416 /** get the list of doc ranks */
417 protected String[] getDocRanks(Object query_result)
418 {
419 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
420 String[] doc_ranks = new String[docs.size()];
421 for (int d = 0; d < docs.size(); d++)
422 {
423 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
424 }
425 return doc_ranks;
426 }
427
428 /** add in term info if available */
429 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
430 {
431 Document doc = term_list.getOwnerDocument();
432 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
433
434 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
435 for (int t = 0; t < terms.size(); t++)
436 {
437 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
438
439 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
440 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
441 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
442 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
443 term_elem.setAttribute(FIELD_ATT, term_info.field_);
444 term_list.appendChild(term_elem);
445 }
446
447 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
448 for (int t = 0; t < stopwords.size(); t++)
449 {
450 String stopword = (String) stopwords.get(t);
451
452 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
453 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
454 term_list.appendChild(stopword_elem);
455 }
456
457 return true;
458 }
459
460 protected ArrayList<FacetWrapper> getFacets(Object query_result)
461 {
462 if (!(query_result instanceof SolrQueryResult))
463 {
464 return null;
465 }
466
467 SolrQueryResult result = (SolrQueryResult) query_result;
468 List<FacetField> facets = result.getFacetResults();
469
470 if (facets == null)
471 {
472 return null;
473 }
474
475 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
476
477 for (FacetField facet : facets)
478 {
479 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
480 // String name = wrap.getName();
481 // String display_name = "Poo";
482 // wrap.setDisplayName(display_name);
483
484 newFacetList.add(wrap);
485 }
486
487 return newFacetList;
488 }
489
490
491 protected String getCollectionCoreNamePrefix() {
492 String site_name = this.router.getSiteName();
493 String coll_name = this.cluster_name;
494 String collection_core_name_prefix = site_name + "-" + coll_name;
495 return collection_core_name_prefix;
496 }
497}
Note: See TracBrowser for help on using the repository browser.