Changeset 25862
- Timestamp:
- 2012-06-28T13:02:23+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java
r25766 r25862 1 1 /* 2 * GS2SolrSearch.java3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org4 *5 * This program is free software; you can redistribute it and/or modify6 * the Free Software Foundation; either version 2 of the License, or7 * (at your option) any later version.8 *9 * This program is distributed in the hope that it will be useful,10 * but WITHOUT ANY WARRANTY; without even the implied warranty of11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12 * GNU General Public License for more details.13 *14 * You should have received a copy of the GNU General Public License15 * along with this program; if not, write to the Free Software16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.17 */2 * GS2SolrSearch.java 3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 18 19 19 package org.greenstone.gsdl3.service; 20 20 21 21 // Greenstone classes 22 import org.greenstone.gsdl3.util.*; 23 import org.greenstone.util.GlobalProperties; 24 25 // XML classes 26 import org.w3c.dom.Element; 27 import org.w3c.dom.NodeList; 28 import org.w3c.dom.Document; 29 // java classes 30 import java.util.ArrayList; 22 import java.io.File; 31 23 import java.util.HashMap; 32 import java.io.File;33 24 import java.util.Iterator; 25 import java.util.Map; 34 26 import java.util.Set; 35 import java.util.Map;36 27 import java.util.Vector; 37 28 38 // Logging39 29 import org.apache.log4j.Logger; 40 41 //import org.greenstone.SolrWrapper.GS2SolrQuery;42 //import org.greenstone.SolrWrapper.SolrQueryResult;43 import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult;44 45 30 import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 46 31 import org.apache.solr.core.CoreContainer; 47 48 import java.net.MalformedURLException; 49 import java.util.Iterator; 50 import java.util.List; 51 import java.util.Map; 52 import java.util.Map.Entry; 53 54 32 import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult; 33 import org.greenstone.gsdl3.util.GSFile; 34 import org.greenstone.gsdl3.util.GSXML; 35 import org.greenstone.gsdl3.util.SolrQueryWrapper; 36 import org.greenstone.util.GlobalProperties; 37 import org.w3c.dom.Element; 55 38 56 39 public class GS2SolrSearch extends SharedSoleneGS2FieldSearch 57 40 { 58 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName()); 59 60 static protected CoreContainer all_solr_cores = null; 61 62 protected HashMap solr_core_cache; 63 protected SolrQueryWrapper solr_src=null; 64 65 public GS2SolrSearch() 66 { 67 // Used to store the solr cores that match the required 'level' 68 // of search (e.g. either document-level=>didx, or 69 // section-level=>sidx. The hashmap is filled out on demand 70 // based on 'level' parameter passed in to 'setUpQueryer()' 71 72 solr_core_cache = new HashMap(); 73 74 if (all_solr_cores == null) { 75 // Share one CoreContainer across all sties/collections 76 try { 77 78 String gsdl3_home = GlobalProperties.getGSDL3Home(); 79 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname","solr"); 80 81 String solr_home_str = GSFile.extHome(gsdl3_home,solr_ext_name); 82 File solr_home = new File(solr_home_str); 83 File solr_xml = new File( solr_home,"solr.xml" ); 84 85 all_solr_cores = new CoreContainer(solr_home_str,solr_xml); 86 } 87 catch (Exception e) { 88 e.printStackTrace(); 89 } 90 } 91 92 this.solr_src = new SolrQueryWrapper(); 93 } 94 95 96 public void cleanUp() { 97 super.cleanUp(); 98 this.solr_src.cleanUp(); 99 all_solr_cores.shutdown(); 100 } 101 102 /** methods to handle actually doing the query */ 103 104 /** do any initialisation of the query object */ 105 protected boolean setUpQueryer(HashMap params) { 106 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar; 107 108 String index = "didx"; 109 String physical_index_language_name=null; 110 String physical_sub_index_name=null; 111 int maxdocs = 100; 112 int hits_per_page = 20; 113 int start_page = 1; 114 // set up the query params 115 Set entries = params.entrySet(); 116 Iterator i = entries.iterator(); 117 while (i.hasNext()) { 118 Map.Entry m = (Map.Entry)i.next(); 119 String name = (String)m.getKey(); 120 String value = (String)m.getValue(); 121 122 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 123 maxdocs = Integer.parseInt(value); 124 } else if (name.equals(HITS_PER_PAGE_PARAM)) { 125 hits_per_page = Integer.parseInt(value); 126 } else if (name.equals(START_PAGE_PARAM)) { 127 start_page = Integer.parseInt(value); 128 129 } else if (name.equals(MATCH_PARAM)) { 130 if (value.equals(MATCH_PARAM_ALL)) { 131 this.solr_src.setDefaultConjunctionOperator("AND"); 132 } else{ 133 this.solr_src.setDefaultConjunctionOperator("OR"); 134 } 135 } else if (name.equals(RANK_PARAM)) { 136 if (value.equals(RANK_PARAM_RANK_VALUE)) { 137 value = null; 138 } 139 this.solr_src.setSortField(value); 140 } else if (name.equals(LEVEL_PARAM)) { 141 if (value.toUpperCase().equals("SEC")){ 142 index = "sidx"; 143 } 144 else { 145 index = "didx"; 146 } 147 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { 148 physical_sub_index_name=value; 149 } else if (name.equals(INDEX_LANGUAGE_PARAM)){ 150 physical_index_language_name=value; 151 } // ignore any others 152 } 153 // set up start and end results if necessary 154 int start_results = 1; 155 if (start_page != 1) { 156 start_results = ((start_page-1) * hits_per_page) + 1; 157 } 158 int end_results = hits_per_page * start_page; 159 this.solr_src.setStartResults(start_results); 160 this.solr_src.setEndResults(end_results); 161 this.solr_src.setMaxDocs(maxdocs); 162 163 if (index.equals("sidx") || index.equals("didx")){ 164 if (physical_sub_index_name!=null) { 165 index+=physical_sub_index_name; 166 } 167 if (physical_index_language_name!=null){ 168 index+=physical_index_language_name; 169 } 170 } 171 172 173 // now we know the index level, we can dig out the required 174 // solr-core, (caching the result in 'solr_core_cache') 175 176 String site_name = this.router.getSiteName(); 177 String coll_name = this.cluster_name; 178 179 String core_name = site_name + "-" + coll_name + "-" + index; 180 181 EmbeddedSolrServer solr_core = null; 182 183 if (!solr_core_cache.containsKey(core_name)) { 184 solr_core = new EmbeddedSolrServer(all_solr_cores,core_name); 185 186 solr_core_cache.put(core_name,solr_core); 187 } 188 else { 189 solr_core = (EmbeddedSolrServer)solr_core_cache.get(core_name); 190 } 191 192 this.solr_src.setSolrCore(solr_core); 193 this.solr_src.initialise(); 194 return true; 195 } 196 197 /** do the query */ 198 protected Object runQuery(String query) { 199 200 /* 201 ModifiableSolrParams solrParams = new ModifiableSolrParams(); 202 solrParams.set("collectionName", myCollection); 203 solrParams.set("username", "admin"); 204 solrParams.set("password", "password"); 205 solrParams.set("facet", facet); 206 solrParams.set("q", query); 207 solrParams.set("start", start); 208 solrParams.set("rows", nbDocuments); 209 return server.query(solrParams); 210 */ 211 212 /* 213 SolrQuery solrQuery = new SolrQuery(); 214 solrQuery.setQuery(query); 215 //solrQuery.set("collectionName", myCollection); 216 solrQuery.set("username", "admin"); 217 solrQuery.set("password", "password"); 218 solrQuery.set("facet", facet); 219 solrQuery.setStart(start); 220 solrQuery.setRows(nbDocuments); 221 //return server.query(solrQuery); 222 */ 223 224 try { 225 SharedSoleneQueryResult sqr=this.solr_src.runQuery(query); 226 return sqr; 227 } catch (Exception e) { 228 logger.error ("Exception happened in run query: ", e); 229 } 230 231 return null; 232 } 233 234 /** get the total number of docs that match */ 235 protected long numDocsMatched(Object query_result) { 236 return ((SharedSoleneQueryResult)query_result).getTotalDocs(); 237 238 } 239 240 /** get the list of doc ids */ 241 protected String [] getDocIDs(Object query_result) { 242 Vector docs = ((SharedSoleneQueryResult)query_result).getDocs(); 243 String [] doc_nums = new String [docs.size()]; 244 for (int d = 0; d < docs.size(); d++) { 245 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_; 246 doc_nums[d] = doc_num; 247 } 248 return doc_nums; 249 } 250 251 /** get the list of doc ranks */ 252 protected String [] getDocRanks(Object query_result) { 253 Vector docs = ((SharedSoleneQueryResult)query_result).getDocs(); 254 String [] doc_ranks = new String [docs.size()]; 255 for (int d = 0; d < docs.size(); d++) { 256 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_); 257 } 258 return doc_ranks; 259 } 260 261 /** add in term info if available */ 262 protected boolean addTermInfo(Element term_list, HashMap params, 263 Object query_result) { 264 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 265 266 Vector terms = ((SharedSoleneQueryResult)query_result).getTerms(); 267 for (int t = 0; t < terms.size(); t++) { 268 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t); 269 270 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 271 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 272 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 273 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 274 term_elem.setAttribute(FIELD_ATT, term_info.field_); 275 term_list.appendChild(term_elem); 276 } 277 278 Vector stopwords = ((SharedSoleneQueryResult)query_result).getStopWords(); 279 for (int t = 0; t < stopwords.size(); t++) { 280 String stopword = (String) stopwords.get(t); 281 282 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 283 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 284 term_list.appendChild(stopword_elem); 285 } 286 287 return true; 288 } 289 41 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2SolrSearch.class.getName()); 42 43 static protected CoreContainer all_solr_cores = null; 44 45 protected HashMap solr_core_cache; 46 protected SolrQueryWrapper solr_src = null; 47 48 public GS2SolrSearch() 49 { 50 // Used to store the solr cores that match the required 'level' 51 // of search (e.g. either document-level=>didx, or 52 // section-level=>sidx. The hashmap is filled out on demand 53 // based on 'level' parameter passed in to 'setUpQueryer()' 54 55 solr_core_cache = new HashMap(); 56 57 if (all_solr_cores == null) 58 { 59 // Share one CoreContainer across all sties/collections 60 try 61 { 62 63 String gsdl3_home = GlobalProperties.getGSDL3Home(); 64 String solr_ext_name = GlobalProperties.getProperty("gsdlext.solr.dirname", "solr"); 65 66 String solr_home_str = GSFile.extHome(gsdl3_home, solr_ext_name); 67 File solr_home = new File(solr_home_str); 68 File solr_xml = new File(solr_home, "solr.xml"); 69 70 all_solr_cores = new CoreContainer(solr_home_str, solr_xml); 71 } 72 catch (Exception e) 73 { 74 e.printStackTrace(); 75 } 76 } 77 78 this.solr_src = new SolrQueryWrapper(); 79 } 80 81 public void cleanUp() 82 { 83 super.cleanUp(); 84 this.solr_src.cleanUp(); 85 all_solr_cores.shutdown(); 86 } 87 88 /** methods to handle actually doing the query */ 89 90 /** do any initialisation of the query object */ 91 protected boolean setUpQueryer(HashMap params) 92 { 93 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar; 94 95 String index = "didx"; 96 String physical_index_language_name = null; 97 String physical_sub_index_name = null; 98 int maxdocs = 100; 99 int hits_per_page = 20; 100 int start_page = 1; 101 // set up the query params 102 Set entries = params.entrySet(); 103 Iterator i = entries.iterator(); 104 while (i.hasNext()) 105 { 106 Map.Entry m = (Map.Entry) i.next(); 107 String name = (String) m.getKey(); 108 String value = (String) m.getValue(); 109 110 if (name.equals(MAXDOCS_PARAM) && !value.equals("")) 111 { 112 maxdocs = Integer.parseInt(value); 113 } 114 else if (name.equals(HITS_PER_PAGE_PARAM)) 115 { 116 hits_per_page = Integer.parseInt(value); 117 } 118 else if (name.equals(START_PAGE_PARAM)) 119 { 120 start_page = Integer.parseInt(value); 121 122 } 123 else if (name.equals(MATCH_PARAM)) 124 { 125 if (value.equals(MATCH_PARAM_ALL)) 126 { 127 this.solr_src.setDefaultConjunctionOperator("AND"); 128 } 129 else 130 { 131 this.solr_src.setDefaultConjunctionOperator("OR"); 132 } 133 } 134 else if (name.equals(RANK_PARAM)) 135 { 136 if (value.equals(RANK_PARAM_RANK_VALUE)) 137 { 138 value = null; 139 } 140 this.solr_src.setSortField(value); 141 } 142 else if (name.equals(LEVEL_PARAM)) 143 { 144 if (value.toUpperCase().equals("SEC")) 145 { 146 index = "sidx"; 147 } 148 else 149 { 150 index = "didx"; 151 } 152 } 153 else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) 154 { 155 physical_sub_index_name = value; 156 } 157 else if (name.equals(INDEX_LANGUAGE_PARAM)) 158 { 159 physical_index_language_name = value; 160 } // ignore any others 161 } 162 // set up start and end results if necessary 163 int start_results = 1; 164 if (start_page != 1) 165 { 166 start_results = ((start_page - 1) * hits_per_page) + 1; 167 } 168 int end_results = hits_per_page * start_page; 169 this.solr_src.setStartResults(start_results); 170 this.solr_src.setEndResults(end_results); 171 this.solr_src.setMaxDocs(maxdocs); 172 173 if (index.equals("sidx") || index.equals("didx")) 174 { 175 if (physical_sub_index_name != null) 176 { 177 index += physical_sub_index_name; 178 } 179 if (physical_index_language_name != null) 180 { 181 index += physical_index_language_name; 182 } 183 } 184 185 // now we know the index level, we can dig out the required 186 // solr-core, (caching the result in 'solr_core_cache') 187 188 String site_name = this.router.getSiteName(); 189 String coll_name = this.cluster_name; 190 191 String core_name = site_name + "-" + coll_name + "-" + index; 192 193 EmbeddedSolrServer solr_core = null; 194 195 if (!solr_core_cache.containsKey(core_name)) 196 { 197 solr_core = new EmbeddedSolrServer(all_solr_cores, core_name); 198 199 solr_core_cache.put(core_name, solr_core); 200 } 201 else 202 { 203 solr_core = (EmbeddedSolrServer) solr_core_cache.get(core_name); 204 } 205 206 this.solr_src.setSolrCore(solr_core); 207 this.solr_src.initialise(); 208 return true; 209 } 210 211 /** do the query */ 212 protected Object runQuery(String query) 213 { 214 215 /* 216 * ModifiableSolrParams solrParams = new ModifiableSolrParams(); 217 * solrParams.set("collectionName", myCollection); 218 * solrParams.set("username", "admin"); solrParams.set("password", 219 * "password"); solrParams.set("facet", facet); solrParams.set("q", 220 * query); solrParams.set("start", start); solrParams.set("rows", 221 * nbDocuments); return server.query(solrParams); 222 */ 223 224 /* 225 * SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(query); 226 * //solrQuery.set("collectionName", myCollection); 227 * solrQuery.set("username", "admin"); solrQuery.set("password", 228 * "password"); solrQuery.set("facet", facet); 229 * solrQuery.setStart(start); solrQuery.setRows(nbDocuments); //return 230 * server.query(solrQuery); 231 */ 232 233 try 234 { 235 SharedSoleneQueryResult sqr = this.solr_src.runQuery(query); 236 return sqr; 237 } 238 catch (Exception e) 239 { 240 logger.error("Exception happened in run query: ", e); 241 } 242 243 return null; 244 } 245 246 /** get the total number of docs that match */ 247 protected long numDocsMatched(Object query_result) 248 { 249 return ((SharedSoleneQueryResult) query_result).getTotalDocs(); 250 251 } 252 253 /** get the list of doc ids */ 254 protected String[] getDocIDs(Object query_result) 255 { 256 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs(); 257 String[] doc_nums = new String[docs.size()]; 258 for (int d = 0; d < docs.size(); d++) 259 { 260 String doc_num = ((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).id_; 261 doc_nums[d] = doc_num; 262 } 263 return doc_nums; 264 } 265 266 /** get the list of doc ranks */ 267 protected String[] getDocRanks(Object query_result) 268 { 269 Vector docs = ((SharedSoleneQueryResult) query_result).getDocs(); 270 String[] doc_ranks = new String[docs.size()]; 271 for (int d = 0; d < docs.size(); d++) 272 { 273 doc_ranks[d] = Float.toString(((SharedSoleneQueryResult.DocInfo) docs.elementAt(d)).rank_); 274 } 275 return doc_ranks; 276 } 277 278 /** add in term info if available */ 279 protected boolean addTermInfo(Element term_list, HashMap params, Object query_result) 280 { 281 String query_level = (String) params.get(LEVEL_PARAM); // the current query level 282 283 Vector terms = ((SharedSoleneQueryResult) query_result).getTerms(); 284 for (int t = 0; t < terms.size(); t++) 285 { 286 SharedSoleneQueryResult.TermInfo term_info = (SharedSoleneQueryResult.TermInfo) terms.get(t); 287 288 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 289 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 290 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 291 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 292 term_elem.setAttribute(FIELD_ATT, term_info.field_); 293 term_list.appendChild(term_elem); 294 } 295 296 Vector stopwords = ((SharedSoleneQueryResult) query_result).getStopWords(); 297 for (int t = 0; t < stopwords.size(); t++) 298 { 299 String stopword = (String) stopwords.get(t); 300 301 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 302 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 303 term_list.appendChild(stopword_elem); 304 } 305 306 return true; 307 } 290 308 291 309 }
Note:
See TracChangeset
for help on using the changeset viewer.