Changeset 25865
- Timestamp:
- 2012-06-28T13:15:04+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java
r24739 r25865 26 26 package org.greenstone.gsdl3.util; 27 27 28 29 import java.io.*;30 import java.util.*;31 import java.util.regex.*;32 33 28 import org.apache.log4j.Logger; 34 35 import org.apache.solr.client.solrj.SolrQuery;36 29 import org.apache.solr.client.solrj.SolrServer; 37 30 import org.apache.solr.client.solrj.SolrServerException; 38 import org.apache.solr.client.solrj.response.FacetField;39 import org.apache.solr.client.solrj.response.FacetField.Count;40 31 import org.apache.solr.client.solrj.response.QueryResponse; 41 42 32 import org.apache.solr.common.SolrDocument; 43 33 import org.apache.solr.common.SolrDocumentList; 44 34 import org.apache.solr.common.params.ModifiableSolrParams; 45 import org.apache.solr.common.params.SolrParams;46 47 35 import org.greenstone.LuceneWrapper3.SharedSoleneQuery; 48 36 import org.greenstone.LuceneWrapper3.SharedSoleneQueryResult; 49 37 50 51 public class SolrQueryWrapper extends SharedSoleneQuery 38 public class SolrQueryWrapper extends SharedSoleneQuery 52 39 { 53 40 54 41 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.SolrQueryWrapper.class.getName()); 55 42 56 /* 57 // Use the standard set of English stop words by default 58 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 43 protected int max_docs = 100; 59 44 60 private String full_indexdir=""; 61 62 private String default_conjunction_operator = "OR"; 63 private String fuzziness = null; 64 private String sort_field = null; 65 private Sort sorter=new Sort(); 66 private String filter_string = null; 67 private Filter filter = null; 45 SolrServer solr_core = null; 68 46 69 private QueryParser query_parser = null; 70 private QueryParser query_parser_no_stop_words = null; 71 */ 47 public SolrQueryWrapper() 48 { 49 super(); 50 } 72 51 73 protected int max_docs = 100; 52 public void setMaxDocs(int max_docs) 53 { 54 this.max_docs = max_docs; 55 } 74 56 75 SolrServer solr_core = null; 57 public void setSolrCore(SolrServer solr_core) 58 { 59 this.solr_core = solr_core; 60 } 76 61 62 public boolean initialise() 63 { 77 64 78 public SolrQueryWrapper() { 79 super(); 80 } 81 /* 82 // Create one query parser with the standard set of stop words, and one with none 65 if (solr_core == null) 66 { 67 utf8out.println("Solr Core not loaded in "); 68 utf8out.flush(); 69 return false; 70 } 71 return true; 83 72 84 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 85 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 86 } 87 */ 73 } 88 74 89 public void setMaxDocs(int max_docs) 90 { 91 this.max_docs = max_docs; 92 } 75 public SharedSoleneQueryResult runQuery(String query_string) 76 { 93 77 94 public void setSolrCore(SolrServer solr_core) 95 { 96 this.solr_core = solr_core; 97 } 98 99 100 public boolean initialise() { 101 102 if (solr_core==null) { 103 utf8out.println("Solr Core not loaded in "); 104 utf8out.flush(); 105 return false; 106 } 107 return true; 108 109 } 110 111 public SharedSoleneQueryResult runQuery(String query_string) { 112 113 if (query_string == null || query_string.equals("")) { 114 utf8out.println("The query word is not indicated "); 115 utf8out.flush(); 116 return null; 117 } 118 119 SolrQueryResult solr_query_result=new SolrQueryResult(); 120 solr_query_result.clear(); 121 122 ModifiableSolrParams solrParams = new ModifiableSolrParams(); 123 solrParams.set("q", query_string); 124 solrParams.set("start", start_results); 125 solrParams.set("rows", (end_results - start_results) +1); 126 solrParams.set("fl","docOID score"); 127 128 /* 129 try { 130 Query query_including_stop_words = query_parser_no_stop_words.parse(query_string); 131 query_including_stop_words = query_including_stop_words.rewrite(reader); 132 133 // System.err.println("********* query_string " + query_string + "****"); 134 135 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 136 query = query.rewrite(reader); 137 138 // Get the list of expanded query terms and their frequencies 139 // num docs matching, and total frequency 140 HashSet terms = new HashSet(); 141 query.extractTerms(terms); 142 143 HashMap doc_term_freq_map = new HashMap(); 144 145 Iterator iter = terms.iterator(); 146 while (iter.hasNext()) { 147 148 Term term = (Term) iter.next(); 149 150 // Get the term frequency over all the documents 151 TermDocs term_docs = reader.termDocs(term); 152 int term_freq = 0; 153 int match_docs = 0; 154 while (term_docs.next()) 78 if (query_string == null || query_string.equals("")) 155 79 { 156 if (term_docs.freq() != 0) 157 { 158 term_freq += term_docs.freq(); 159 match_docs++; 160 161 // Calculate the document-level term frequency as well 162 Integer lucene_doc_num_obj = new Integer(term_docs.doc()); 163 int doc_term_freq = 0; 164 if (doc_term_freq_map.containsKey(lucene_doc_num_obj)) 165 { 166 doc_term_freq = ((Integer) doc_term_freq_map.get(lucene_doc_num_obj)).intValue(); 167 } 168 doc_term_freq += term_docs.freq(); 169 170 doc_term_freq_map.put(lucene_doc_num_obj, new Integer(doc_term_freq)); 171 } 80 utf8out.println("The query word is not indicated "); 81 utf8out.flush(); 82 return null; 172 83 } 173 84 174 // Create a term 175 lucene_query_result.addTerm(term.text(), term.field(), match_docs, term_freq); 176 } 177 178 // Get the list of stop words removed from the query 179 HashSet terms_including_stop_words = new HashSet(); 180 query_including_stop_words.extractTerms(terms_including_stop_words); 181 Iterator terms_including_stop_words_iter = terms_including_stop_words.iterator(); 182 while (terms_including_stop_words_iter.hasNext()) { 183 Term term = (Term) terms_including_stop_words_iter.next(); 184 if (!terms.contains(term)) { 185 lucene_query_result.addStopWord(term.text()); 85 SolrQueryResult solr_query_result = new SolrQueryResult(); 86 solr_query_result.clear(); 87 88 ModifiableSolrParams solrParams = new ModifiableSolrParams(); 89 solrParams.set("q", query_string); 90 solrParams.set("start", start_results); 91 solrParams.set("rows", (end_results - start_results) + 1); 92 solrParams.set("fl", "docOID score"); 93 94 try 95 { 96 QueryResponse solrResponse = solr_core.query(solrParams); 97 98 SolrDocumentList hits = solrResponse.getResults(); 99 100 if (hits != null) 101 { 102 103 logger.info("*** hits size = " + hits.size()); 104 logger.info("*** num docs found = " + hits.getNumFound()); 105 106 logger.info("*** start results = " + start_results); 107 logger.info("*** end results = " + end_results); 108 logger.info("*** max docs = " + max_docs); 109 110 // numDocsFound is the total number of mactching docs in the collection 111 // as opposed to the number of documents returned in the hits list 112 113 solr_query_result.setTotalDocs((int) hits.getNumFound()); 114 115 solr_query_result.setStartResults(start_results); 116 solr_query_result.setEndResults(start_results + hits.size()); 117 118 // Output the matching documents 119 for (int i = 0; i < hits.size(); i++) 120 { 121 SolrDocument doc = hits.get(i); 122 123 // Need to think about how to support document term frequency. Make zero for now 124 int doc_term_freq = 0; 125 String docOID = (String) doc.get("docOID"); 126 Float score = (Float) doc.get("score"); 127 128 logger.info("**** docOID = " + docOID); 129 logger.info("**** score = " + score); 130 131 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq); 132 } 133 } 134 else 135 { 136 solr_query_result.setTotalDocs(0); 137 138 solr_query_result.setStartResults(0); 139 solr_query_result.setEndResults(0); 140 } 186 141 } 187 } 188 189 */ 142 catch (SolrServerException server_exception) 143 { 144 solr_query_result.setError(SolrQueryResult.SERVER_ERROR); 145 } 190 146 191 try { 192 QueryResponse solrResponse = solr_core.query(solrParams); 193 194 SolrDocumentList hits = solrResponse.getResults(); 195 196 if (hits != null) { 197 198 logger.info("*** hits size = " + hits.size()); 199 logger.info("*** num docs found = " + hits.getNumFound()); 200 201 logger.info("*** start results = " + start_results); 202 logger.info("*** end results = " + end_results); 203 logger.info("*** max docs = " + max_docs); 204 205 // numDocsFound is the total number of mactching docs in the collection 206 // as opposed to the number of documents returned in the hits list 207 208 solr_query_result.setTotalDocs((int)hits.getNumFound()); 209 210 solr_query_result.setStartResults(start_results); 211 solr_query_result.setEndResults(start_results + hits.size()); 212 213 // Output the matching documents 214 for (int i = 0; i < hits.size(); i++) { 215 SolrDocument doc = hits.get(i); 216 217 // Need to think about how to support document term frequency. Make zero for now 218 int doc_term_freq = 0; 219 String docOID = (String)doc.get("docOID"); 220 Float score = (Float)doc.get("score"); 221 222 logger.info("**** docOID = " + docOID); 223 logger.info("**** score = " + score); 224 225 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq); 226 } 227 } 228 else { 229 solr_query_result.setTotalDocs(0); 230 231 solr_query_result.setStartResults(0); 232 solr_query_result.setEndResults(0); 233 } 147 return solr_query_result; 234 148 } 235 149 236 catch (SolrServerException server_exception) { 237 solr_query_result.setError(SolrQueryResult.SERVER_ERROR); 150 public void cleanUp() 151 { 152 super.cleanUp(); 238 153 } 239 240 241 /*242 243 // do the query244 // Simple case for getting all the matching documents245 if (end_results == Integer.MAX_VALUE) {246 // Perform the query (filter and sorter may be null)247 Hits hits = searcher.search(query, filter, sorter);248 lucene_query_result.setTotalDocs(hits.length());249 250 // Output the matching documents251 lucene_query_result.setStartResults(start_results);252 lucene_query_result.setEndResults(hits.length());253 254 for (int i = start_results; i <= hits.length(); i++) {255 int lucene_doc_num = hits.id(i - 1);256 Document doc = hits.doc(i - 1);257 int doc_term_freq = 0;258 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));259 if (doc_term_freq_object != null)260 {261 doc_term_freq = doc_term_freq_object.intValue();262 }263 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);264 }265 }266 267 // Slightly more complicated case for returning a subset of the matching documents268 else {269 // Perform the query (filter may be null)270 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);271 lucene_query_result.setTotalDocs(hits.totalHits);272 273 lucene_query_result.setStartResults(start_results);274 lucene_query_result.setEndResults(end_results < hits.scoreDocs.length ? end_results: hits.scoreDocs.length);275 276 // Output the matching documents277 for (int i = start_results; (i <= hits.scoreDocs.length && i <= end_results); i++) {278 int lucene_doc_num = hits.scoreDocs[i - 1].doc;279 Document doc = reader.document(lucene_doc_num);280 int doc_term_freq = 0;281 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));282 if (doc_term_freq_object != null)283 {284 doc_term_freq = doc_term_freq_object.intValue();285 }286 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);287 }288 }289 */290 291 return solr_query_result;292 }293 /*294 295 catch (ParseException parse_exception) {296 lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR);297 }298 catch (TooManyClauses too_many_clauses_exception) {299 lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR);300 }301 catch (IOException exception) {302 lucene_query_result.setError(LuceneQueryResult.IO_ERROR);303 exception.printStackTrace();304 }305 catch (Exception exception) {306 lucene_query_result.setError(LuceneQueryResult.OTHER_ERROR);307 exception.printStackTrace();308 }309 return lucene_query_result;310 }311 312 public void setDefaultConjunctionOperator(String default_conjunction_operator) {313 this.default_conjunction_operator = default_conjunction_operator.toUpperCase();314 if (default_conjunction_operator.equals("AND")) {315 query_parser.setDefaultOperator(query_parser.AND_OPERATOR);316 query_parser_no_stop_words.setDefaultOperator(query_parser.AND_OPERATOR);317 } else { // default is OR318 query_parser.setDefaultOperator(query_parser.OR_OPERATOR);319 query_parser_no_stop_words.setDefaultOperator(query_parser.OR_OPERATOR);320 }321 322 323 }324 */325 326 public void cleanUp() {327 super.cleanUp();328 }329 330 154 } 331 332
Note:
See TracChangeset
for help on using the changeset viewer.