source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java@ 29711

Last change on this file since 29711 was 29711, checked in by ak19, 9 years ago

Moving from using the solr jetty server to solr using the GS3 tomcat server. Now localhost:8383/solr hosts the solr server RESTful pages. Changes: 1. Minor changes to GS3 build.xml. 2. GLI no longer does the temporary stopping of the GS3 server, launching jetty server for building a solr collection, stopping jetty, restarting GS3 tomcat server. GLI leaves the GS3 server running. 3. The main changes are to ext/solr. The ext/solr/gs3-setup.sh sets the new SOLR_PORT and SOLR_HOST variables read from the GS3 build.properties, as the jetty port and host variables are no longer used. ext/solr/build.xml now puts the solr war file into tomcat's webapps, as well as helper libraries necessary (xalan related); a solr.xml context file is created from a template file and placed into tomcat's conf/Catalina/localhost; additional solr jar files are copied into tomcat/lib, as well as the slf4j bridge being copied into GS3/web/WEB-INF/lib; the solr perl code has been changed to use the new RESTful URLs and particularly to work with solr running off the GS3 tomcat server, or stop and start it as required, rather than working with (or stopping and starting) the solr jetty server. A new run_solr_server.pl executable script runs the tomcat server rather than the jetty server; major changes to the Java Solr code to no longer work with the EmbeddedSolrServer (which caused a conflict when the index is accessed by solr jetty server upon rebuild of solr collections), our solr Java code now uses HttpSolrServer to contact the solr servlet running off tomcat. 5. Still a bug: when search results go over a page after rebuilding a solr collection in GLI against a running GS3 server, the 2nd page of search results aren't present and things break. But if the server is not running, solr collections rebuild fine, so the changes do everything that GS3.06 did and more.

  • Property svn:executable set to *
File size: 13.9 KB
Line 
1/*
2 * GS2SolrSearch.java
3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.util.ArrayList;
23import java.util.HashMap;
24import java.util.Iterator;
25import java.util.List;
26import java.util.Map;
27import java.util.Properties;
28import java.util.Set;
29import java.util.Vector;
30
31import org.apache.log4j.Logger;
32import org.apache.solr.client.solrj.SolrServer;
33import org.apache.solr.client.solrj.impl.HttpSolrServer;
34import org.apache.solr.client.solrj.response.FacetField;
35import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult;
36import org.greenstone.gsdl3.util.FacetWrapper;
37import org.greenstone.gsdl3.util.GSFile;
38import org.greenstone.gsdl3.util.GSXML;
39import org.greenstone.gsdl3.util.SolrFacetWrapper;
40import org.greenstone.gsdl3.util.SolrQueryResult;
41import org.greenstone.gsdl3.util.SolrQueryWrapper;
42import org.greenstone.util.GlobalProperties;
43import org.w3c.dom.Document;
44import org.w3c.dom.Element;
45import org.w3c.dom.NodeList;
46
47public class GS2SolrSearch extends SharedSoleneGS2FieldSearch
48{
49
50 public static final String SOLR_SERVLET_SUFFIX = "/solr";
51 protected static final String SORT_ORDER_PARAM = "sortOrder";
52 protected static final String SORT_ORDER_DESCENDING = "1";
53 protected static final String SORT_ORDER_ASCENDING = "0";
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName());
56
57 protected String solr_servlet_base_url;
58 protected HashMap<String, SolrServer> solr_core_cache;
59 protected SolrQueryWrapper solr_src = null;
60
61 protected ArrayList<String> _facets = new ArrayList<String>();
62
63 public GS2SolrSearch()
64 {
65 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_DESCENDING);
66 does_faceting = true;
67 // Used to store the solr cores that match the required 'level'
68 // of search (e.g. either document-level=>didx, or
69 // section-level=>sidx. The hashmap is filled out on demand
70 // based on 'level' parameter passed in to 'setUpQueryer()'
71
72 solr_core_cache = new HashMap<String, SolrServer>();
73
74 this.solr_src = new SolrQueryWrapper();
75
76 // Create the solr servlet url on GS3's tomcat. By default it's "http://localhost:8383/solr"
77 // Don't do this in configure(), since the tomcat url will remain unchanged while tomcat is running
78 try {
79 Properties globalProperties = new Properties();
80 globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties"));
81 String host = globalProperties.getProperty("tomcat.server", "localhost");
82 String port = globalProperties.getProperty("tomcat.port", "8383");
83 String protocol = globalProperties.getProperty("tomcat.protocol", "http");
84
85 String portStr = port.equals("80") ? "" : ":"+port;
86 solr_servlet_base_url = protocol+"://"+host+portStr+SOLR_SERVLET_SUFFIX;
87 } catch(Exception e) {
88 logger.error("Error reading greenstone's tomcat solr server properties from global.properties", e);
89 }
90 }
91
92 /** configure this service */
93 public boolean configure(Element info, Element extra_info)
94 {
95 boolean success = super.configure(info, extra_info);
96
97 // clear the map of solr cores for this collection added to the map upon querying
98 solr_core_cache.clear();
99
100 if(!success) {
101 return false;
102 }
103
104 // Setting up facets
105 // TODO - get these from build config, in case some haven't built
106 Element searchElem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
107 NodeList facet_list = info.getElementsByTagName("facet");
108 for (int i=0; i<facet_list.getLength(); i++) {
109 _facets.add(((Element)facet_list.item(i)).getAttribute(GSXML.SHORTNAME_ATT));
110 }
111 // NodeList configIndexElems = searchElem.getElementsByTagName(GSXML.INDEX_ELEM);
112
113 // ArrayList<String> chosenFacets = new ArrayList<String>();
114 // for (int i = 0; i < configIndexElems.getLength(); i++)
115 // {
116 // Element current = (Element) configIndexElems.item(i);
117 // if (current.getAttribute(GSXML.FACET_ATT).equals("true"))
118 // {
119 // chosenFacets.add(current.getAttribute(GSXML.NAME_ATT));
120 // }
121 // }
122
123 // Element indexListElem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_ELEM + GSXML.LIST_MODIFIER);
124 // NodeList buildIndexElems = indexListElem.getElementsByTagName(GSXML.INDEX_ELEM);
125
126 // for (int j = 0; j < buildIndexElems.getLength(); j++)
127 // {
128 // Element current = (Element) buildIndexElems.item(j);
129 // for (int i = 0; i < chosenFacets.size(); i++)
130 // {
131 // if (current.getAttribute(GSXML.NAME_ATT).equals(chosenFacets.get(i)))
132 // {
133 // _facets.add(current.getAttribute(GSXML.SHORTNAME_ATT));
134 // }
135 // }
136 // }
137
138 return true;
139 }
140
141 public void cleanUp()
142 {
143 super.cleanUp();
144 this.solr_src.cleanUp();
145
146 // clear the map keeping track of the SolrServers in this collection
147 solr_core_cache.clear();
148 }
149
150 /** add in the SOLR specific params to TextQuery */
151 protected void addCustomQueryParams(Element param_list, String lang)
152 {
153 super.addCustomQueryParams(param_list, lang);
154 /** Add in the sort order asc/desc param */
155 createParameter(SORT_ORDER_PARAM, param_list, lang);
156 }
157 /** add in SOLR specific params for AdvancedFieldQuery */
158 protected void addCustomQueryParamsAdvField(Element param_list, String lang)
159 {
160 super.addCustomQueryParamsAdvField(param_list, lang);
161 createParameter(SORT_ORDER_PARAM, param_list, lang);
162
163 }
164 /** create a param and add to the list */
165 protected void createParameter(String name, Element param_list, String lang)
166 {
167 Document doc = param_list.getOwnerDocument();
168 Element param = null;
169 String param_default = paramDefaults.get(name);
170 if (name.equals(SORT_ORDER_PARAM)) {
171 String[] vals = { SORT_ORDER_ASCENDING, SORT_ORDER_DESCENDING };
172 String[] vals_texts = { getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_ASCENDING, lang), getTextString("param." + SORT_ORDER_PARAM + "." + SORT_ORDER_DESCENDING, lang) };
173
174 param = GSXML.createParameterDescription(doc, SORT_ORDER_PARAM, getTextString("param." + SORT_ORDER_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, param_default, vals, vals_texts);
175 }
176
177 if (param != null)
178 {
179 param_list.appendChild(param);
180 }
181 else
182 {
183 super.createParameter(name, param_list, lang);
184 }
185
186 }
187
188 /** methods to handle actually doing the query */
189
190 /** do any initialisation of the query object */
191 protected boolean setUpQueryer(HashMap params)
192 {
193 this.solr_src.clearFacets();
194 this.solr_src.clearFacetQueries();
195
196 for (int i = 0; i < _facets.size(); i++)
197 {
198 this.solr_src.addFacet(_facets.get(i));
199 }
200
201 String index = "didx";
202 String physical_index_language_name = null;
203 String physical_sub_index_name = null;
204 int maxdocs = 100;
205 int hits_per_page = 20;
206 int start_page = 1;
207 // set up the query params
208 Set entries = params.entrySet();
209 Iterator i = entries.iterator();
210 while (i.hasNext())
211 {
212 Map.Entry m = (Map.Entry) i.next();
213 String name = (String) m.getKey();
214 String value = (String) m.getValue();
215
216 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value);
217
218 if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
219 {
220 maxdocs = Integer.parseInt(value);
221 }
222 else if (name.equals(HITS_PER_PAGE_PARAM))
223 {
224 hits_per_page = Integer.parseInt(value);
225 }
226 else if (name.equals(START_PAGE_PARAM))
227 {
228 start_page = Integer.parseInt(value);
229 }
230 else if (name.equals(MATCH_PARAM))
231 {
232 if (value.equals(MATCH_PARAM_ALL))
233 {
234 this.solr_src.setDefaultConjunctionOperator("AND");
235 }
236 else
237 {
238 this.solr_src.setDefaultConjunctionOperator("OR");
239 }
240 }
241 else if (name.equals(RANK_PARAM))
242 {
243 if (value.equals(RANK_PARAM_RANK))
244 {
245 value = SolrQueryWrapper.SORT_BY_RANK;
246 } else if (value.equals(RANK_PARAM_NONE)) {
247 value = SolrQueryWrapper.SORT_BY_INDEX_ORDER;
248 }
249
250 this.solr_src.setSortField(value);
251 }
252 else if (name.equals(SORT_ORDER_PARAM)) {
253 if (value.equals(SORT_ORDER_DESCENDING)) {
254 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_DESCENDING);
255 } else {
256 this.solr_src.setSortOrder(SolrQueryWrapper.SORT_ASCENDING);
257 }
258 }
259 else if (name.equals(LEVEL_PARAM))
260 {
261 if (value.toUpperCase().equals("SEC"))
262 {
263 index = "sidx";
264 }
265 else
266 {
267 index = "didx";
268 }
269 }
270 // Would facets ever come in through params???
271 else if (name.equals("facets") && value.length() > 0)
272 {
273 String[] facets = value.split(",");
274
275 for (String facet : facets)
276 {
277 this.solr_src.addFacet(facet);
278 }
279 }
280 else if (name.equals("facetQueries") && value.length() > 0)
281 {
282 this.solr_src.addFacetQuery(value);
283 }
284 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
285 {
286 physical_sub_index_name = value;
287 }
288 else if (name.equals(INDEX_LANGUAGE_PARAM))
289 {
290 physical_index_language_name = value;
291 } // ignore any others
292 }
293 // set up start and end results if necessary
294 int start_results = 1;
295 if (start_page != 1)
296 {
297 start_results = ((start_page - 1) * hits_per_page) + 1;
298 }
299 int end_results = hits_per_page * start_page;
300 this.solr_src.setStartResults(start_results);
301 this.solr_src.setEndResults(end_results);
302 this.solr_src.setMaxDocs(maxdocs);
303
304 if (index.equals("sidx") || index.equals("didx"))
305 {
306 if (physical_sub_index_name != null)
307 {
308 index += physical_sub_index_name;
309 }
310 if (physical_index_language_name != null)
311 {
312 index += physical_index_language_name;
313 }
314 }
315
316 // now we know the index level, we can dig out the required
317 // solr-core, (caching the result in 'solr_core_cache')
318 String core_name = getCollectionCoreNamePrefix() + "-" + index;
319
320 SolrServer solr_core = null;
321
322 if (!solr_core_cache.containsKey(core_name))
323 {
324 solr_core = new HttpSolrServer(this.solr_servlet_base_url+"/"+core_name);
325 solr_core_cache.put(core_name, solr_core);
326 }
327 else
328 {
329 solr_core = solr_core_cache.get(core_name);
330 }
331
332 this.solr_src.setSolrCore(solr_core);
333 this.solr_src.setCollectionCoreNamePrefix(getCollectionCoreNamePrefix());
334 this.solr_src.initialise();
335 return true;
336 }
337
338 /** do the query */
339 protected Object runQuery(String query)
340 {
341 try
342 {
343 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query);
344
345 return sqr;
346 }
347 catch (Exception e)
348 {
349 logger.error("Exception happened in run query: ", e);
350 }
351
352 return null;
353 }
354
355 /** get the total number of docs that match */
356 protected long numDocsMatched(Object query_result)
357 {
358 return ((SharedSoleneQueryResult) query_result).getTotalDocs();
359
360 }
361
362 /** get the list of doc ids */
363 protected String[] getDocIDs(Object query_result)
364 {
365 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
366 String[] doc_nums = new String[docs.size()];
367 for (int d = 0; d < docs.size(); d++)
368 {
369 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_;
370 doc_nums[d] = doc_num;
371 }
372 return doc_nums;
373 }
374
375 /** get the list of doc ranks */
376 protected String[] getDocRanks(Object query_result)
377 {
378 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs();
379 String[] doc_ranks = new String[docs.size()];
380 for (int d = 0; d < docs.size(); d++)
381 {
382 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_);
383 }
384 return doc_ranks;
385 }
386
387 /** add in term info if available */
388 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result)
389 {
390 Document doc = term_list.getOwnerDocument();
391 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
392
393 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms();
394 for (int t = 0; t < terms.size(); t++)
395 {
396 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t);
397
398 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
399 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
400 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
401 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
402 term_elem.setAttribute(FIELD_ATT, term_info.field_);
403 term_list.appendChild(term_elem);
404 }
405
406 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords();
407 for (int t = 0; t < stopwords.size(); t++)
408 {
409 String stopword = (String) stopwords.get(t);
410
411 Element stopword_elem = doc.createElement(GSXML.STOPWORD_ELEM);
412 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword);
413 term_list.appendChild(stopword_elem);
414 }
415
416 return true;
417 }
418
419 protected ArrayList<FacetWrapper> getFacets(Object query_result)
420 {
421 if (!(query_result instanceof SolrQueryResult))
422 {
423 return null;
424 }
425
426 SolrQueryResult result = (SolrQueryResult) query_result;
427 List<FacetField> facets = result.getFacetResults();
428
429 if (facets == null)
430 {
431 return null;
432 }
433
434 ArrayList<FacetWrapper> newFacetList = new ArrayList<FacetWrapper>();
435
436 for (FacetField facet : facets)
437 {
438 SolrFacetWrapper wrap = new SolrFacetWrapper(facet);
439 // String name = wrap.getName();
440 // String display_name = "Poo";
441 // wrap.setDisplayName(display_name);
442
443 newFacetList.add(wrap);
444 }
445
446 return newFacetList;
447 }
448
449
450 protected String getCollectionCoreNamePrefix() {
451 String site_name = this.router.getSiteName();
452 String coll_name = this.cluster_name;
453 String collection_core_name_prefix = site_name + "-" + coll_name;
454 return collection_core_name_prefix;
455 }
456}
Note: See TracBrowser for help on using the repository browser.