Changeset 24731 for main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
- Timestamp:
- 2011-10-07T11:36:07+13:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r24725 r24731 45 45 import org.apache.lucene.search.IndexSearcher; 46 46 import org.apache.lucene.search.Query; 47 import org.apache.lucene.search. TermRangeFilter;47 import org.apache.lucene.search.RangeFilter; 48 48 import org.apache.lucene.search.Searcher; 49 49 import org.apache.lucene.search.ScoreDoc; 50 50 import org.apache.lucene.search.Sort; 51 import org.apache.lucene.search.SortField;52 51 import org.apache.lucene.search.TopFieldDocs; 53 52 54 import org.apache.lucene.store.Directory; 55 import org.apache.lucene.store.FSDirectory; 56 import org.apache.lucene.util.Version; 57 58 public class GS2LuceneQuery extends SharedSoleneQuery 53 54 public class GS2LuceneQuery 59 55 { 60 protected String full_indexdir=""; 61 62 protected Sort sorter=new Sort(); 63 protected Filter filter = null; 64 65 protected static Version matchVersion = Version.LUCENE_24; 66 67 protected QueryParser query_parser = null; 68 protected QueryParser query_parser_no_stop_words = null; 69 protected Searcher searcher = null; 70 protected IndexReader reader = null; 71 56 57 58 static private String TEXTFIELD = "TX"; 59 60 // Use the standard set of English stop words by default 61 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 62 63 private String full_indexdir=""; 64 private String default_conjunction_operator = "OR"; 65 private String fuzziness = null; 66 private String sort_field = null; 67 private Sort sorter=new Sort(); 68 private String filter_string = null; 69 private Filter filter = null; 70 private int start_results=1; 71 private int end_results=Integer.MAX_VALUE; 72 73 private QueryParser query_parser = null; 74 private QueryParser query_parser_no_stop_words = null; 75 private Searcher searcher = null; 76 private IndexReader reader = null; 77 78 static private PrintWriter utf8out = null; 79 80 static 81 { 82 try { 83 OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 84 utf8out = new PrintWriter(osw, true); 85 } 86 catch (UnsupportedEncodingException e) { 87 System.out.println(e); 88 } 89 } 90 91 72 92 public GS2LuceneQuery() { 73 super();74 93 75 94 // Create one query parser with the standard set of stop words, and one with none 76 95 77 query_parser = new QueryParser( matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set78 query_parser_no_stop_words = new QueryParser( matchVersion,TEXTFIELD, new GS2Analyzer(new String[] { }));96 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 97 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 79 98 } 80 99 81 100 82 101 public boolean initialise() { 83 84 if (!super.initialise()) {85 return false;86 }87 88 102 89 103 if (full_indexdir==null || full_indexdir.length()==-1){ … … 92 106 return false; 93 107 } 94 95 108 try { 96 Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 97 searcher = new IndexSearcher(full_indexdir_dir,true); 109 searcher = new IndexSearcher(full_indexdir); 98 110 reader = ((IndexSearcher) searcher).getIndexReader(); 99 111 … … 106 118 107 119 } 108 109 public void setIndexDir(String full_indexdir) {110 this.full_indexdir = full_indexdir;111 }112 113 public void setSortField(String sort_field) {114 super.setSortField(sort_field);115 116 if (sort_field == null) {117 this.sorter = new Sort();118 } else {119 this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?120 }121 }122 123 public void setFilterString(String filter_string) {124 super.setFilterString(filter_string);125 this.filter = parseFilterString(filter_string);126 }127 128 public Filter getFilter() {129 return this.filter;130 }131 132 120 133 121 public LuceneQueryResult runQuery(String query_string) { … … 206 194 if (end_results == Integer.MAX_VALUE) { 207 195 // Perform the query (filter and sorter may be null) 208 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);209 lucene_query_result.setTotalDocs(hits. totalHits);196 Hits hits = searcher.search(query, filter, sorter); 197 lucene_query_result.setTotalDocs(hits.length()); 210 198 211 199 // Output the matching documents 212 200 lucene_query_result.setStartResults(start_results); 213 lucene_query_result.setEndResults(hits. totalHits);214 215 for (int i = start_results; i <= hits. totalHits; i++) {216 int lucene_doc_num = hits. scoreDocs[i - 1].doc;217 Document doc = reader.document(lucene_doc_num);201 lucene_query_result.setEndResults(hits.length()); 202 203 for (int i = start_results; i <= hits.length(); i++) { 204 int lucene_doc_num = hits.id(i - 1); 205 Document doc = hits.doc(i - 1); 218 206 int doc_term_freq = 0; 219 207 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); … … 222 210 doc_term_freq = doc_term_freq_object.intValue(); 223 211 } 224 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score Docs[i-1].score, doc_term_freq);212 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq); 225 213 } 226 214 } … … 268 256 269 257 public void setDefaultConjunctionOperator(String default_conjunction_operator) { 270 super.setDefaultConjunctionOperator(default_conjunction_operator); 271 258 this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 272 259 if (default_conjunction_operator.equals("AND")) { 273 260 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); … … 278 265 } 279 266 } 280 281 267 268 public String getDefaultConjunctionOperator() { 269 return this.default_conjunction_operator; 270 } 271 272 public void setEndResults(int end_results) { 273 this.end_results = end_results; 274 } 275 public int getEndResults() { 276 return this.end_results; 277 } 278 279 public void setFilterString(String filter_string) { 280 this.filter_string = filter_string; 281 this.filter = parseFilterString(filter_string); 282 } 283 public String getFilterString() { 284 return this.filter_string ; 285 } 286 287 public Filter getFilter() { 288 return this.filter; 289 } 290 291 public void setIndexDir(String full_indexdir) { 292 this.full_indexdir = full_indexdir; 293 } 294 295 public void setFuzziness(String fuzziness) { 296 this.fuzziness = fuzziness; 297 } 298 public String getFuzziness() { 299 return this.fuzziness; 300 } 301 302 public void setSortField(String sort_field) { 303 this.sort_field = sort_field; 304 if (sort_field == null) { 305 this.sorter = new Sort(); 306 } else { 307 this.sorter = new Sort(sort_field); 308 } 309 } 310 public String getSortField() { 311 return this.sort_field; 312 } 313 314 public void setStartResults(int start_results) { 315 if (start_results < 1) { 316 start_results = 1; 317 } 318 this.start_results = start_results; 319 } 320 public int getStartResults() { 321 return this.start_results; 322 } 323 282 324 public void cleanUp() { 283 super.cleanUp();284 325 try { 285 326 if (searcher != null) { … … 291 332 } 292 333 293 294 protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 334 private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 295 335 throws java.io.IOException, org.apache.lucene.queryParser.ParseException 296 336 { … … 394 434 } 395 435 396 pr otectedFilter parseFilterString(String filter_string)436 private Filter parseFilterString(String filter_string) 397 437 { 398 438 Filter result = null; … … 405 445 String upper_term = matcher.group(4); 406 446 boolean include_upper = matcher.group(5).equals("]"); 407 result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);447 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 408 448 } 409 449 else { … … 412 452 return result; 413 453 } 414 415 454 455 456 protected void finalize() throws Throwable 457 { 458 try { 459 utf8out.flush(); 460 } finally { 461 super.finalize(); 462 } 463 } 464 465 416 466 /** command line program and auxiliary methods */ 417 467 418 468 // Fairly self-explanatory I should hope 419 static pr otectedboolean query_result_caching_enabled = false;469 static private boolean query_result_caching_enabled = false; 420 470 421 471 422 472 static public void main (String args[]) 423 473 { 474 475 424 476 if (args.length == 0) { 425 477 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); … … 514 566 } 515 567 516 pr otectedstatic void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)568 private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string) 517 569 throws IOException 518 570 { … … 602 654 } 603 655 604 pr otectedstatic String fileSafe(String text)656 private static String fileSafe(String text) 605 657 { 606 658 StringBuffer file_safe_text = new StringBuffer();
Note:
See TracChangeset
for help on using the changeset viewer.