Changeset 24725 for main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
- Timestamp:
- 2011-10-05T15:44:19+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r20910 r24725 45 45 import org.apache.lucene.search.IndexSearcher; 46 46 import org.apache.lucene.search.Query; 47 import org.apache.lucene.search. RangeFilter;47 import org.apache.lucene.search.TermRangeFilter; 48 48 import org.apache.lucene.search.Searcher; 49 49 import org.apache.lucene.search.ScoreDoc; 50 50 import org.apache.lucene.search.Sort; 51 import org.apache.lucene.search.SortField; 51 52 import org.apache.lucene.search.TopFieldDocs; 52 53 53 54 public class GS2LuceneQuery 54 import org.apache.lucene.store.Directory; 55 import org.apache.lucene.store.FSDirectory; 56 import org.apache.lucene.util.Version; 57 58 public class GS2LuceneQuery extends SharedSoleneQuery 55 59 { 56 57 58 static private String TEXTFIELD = "TX"; 59 60 // Use the standard set of English stop words by default 61 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 62 63 private String full_indexdir=""; 64 private String default_conjunction_operator = "OR"; 65 private String fuzziness = null; 66 private String sort_field = null; 67 private Sort sorter=new Sort(); 68 private String filter_string = null; 69 private Filter filter = null; 70 private int start_results=1; 71 private int end_results=Integer.MAX_VALUE; 72 73 private QueryParser query_parser = null; 74 private QueryParser query_parser_no_stop_words = null; 75 private Searcher searcher = null; 76 private IndexReader reader = null; 77 78 static private PrintWriter utf8out = null; 79 80 static 81 { 82 try { 83 OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8"); 84 utf8out = new PrintWriter(osw, true); 85 } 86 catch (UnsupportedEncodingException e) { 87 System.out.println(e); 88 } 89 } 90 91 60 protected String full_indexdir=""; 61 62 protected Sort sorter=new Sort(); 63 protected Filter filter = null; 64 65 protected static Version matchVersion = Version.LUCENE_24; 66 67 protected QueryParser query_parser = null; 68 protected QueryParser query_parser_no_stop_words = null; 69 protected Searcher searcher = null; 70 protected IndexReader reader = null; 71 92 72 public GS2LuceneQuery() { 73 super(); 93 74 94 75 // Create one query parser with the standard set of stop words, and one with none 95 76 96 query_parser = new QueryParser( TEXTFIELD, new GS2Analyzer(stop_words));97 query_parser_no_stop_words = new QueryParser( TEXTFIELD, new GS2Analyzer(new String[] { }));77 query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set 78 query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { })); 98 79 } 99 80 100 81 101 82 public boolean initialise() { 83 84 if (!super.initialise()) { 85 return false; 86 } 87 102 88 103 89 if (full_indexdir==null || full_indexdir.length()==-1){ … … 106 92 return false; 107 93 } 94 108 95 try { 109 searcher = new IndexSearcher(full_indexdir); 96 Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 97 searcher = new IndexSearcher(full_indexdir_dir,true); 110 98 reader = ((IndexSearcher) searcher).getIndexReader(); 111 99 … … 118 106 119 107 } 108 109 public void setIndexDir(String full_indexdir) { 110 this.full_indexdir = full_indexdir; 111 } 112 113 public void setSortField(String sort_field) { 114 super.setSortField(sort_field); 115 116 if (sort_field == null) { 117 this.sorter = new Sort(); 118 } else { 119 this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!? 120 } 121 } 122 123 public void setFilterString(String filter_string) { 124 super.setFilterString(filter_string); 125 this.filter = parseFilterString(filter_string); 126 } 127 128 public Filter getFilter() { 129 return this.filter; 130 } 131 120 132 121 133 public LuceneQueryResult runQuery(String query_string) { … … 194 206 if (end_results == Integer.MAX_VALUE) { 195 207 // Perform the query (filter and sorter may be null) 196 Hits hits = searcher.search(query, filter, sorter);197 lucene_query_result.setTotalDocs(hits. length());208 TopFieldDocs hits = searcher.search(query, filter, end_results, sorter); 209 lucene_query_result.setTotalDocs(hits.totalHits); 198 210 199 211 // Output the matching documents 200 212 lucene_query_result.setStartResults(start_results); 201 lucene_query_result.setEndResults(hits. length());202 203 for (int i = start_results; i <= hits. length(); i++) {204 int lucene_doc_num = hits. id(i - 1);205 Document doc = hits.doc(i - 1);213 lucene_query_result.setEndResults(hits.totalHits); 214 215 for (int i = start_results; i <= hits.totalHits; i++) { 216 int lucene_doc_num = hits.scoreDocs[i - 1].doc; 217 Document doc = reader.document(lucene_doc_num); 206 218 int doc_term_freq = 0; 207 219 Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num)); … … 210 222 doc_term_freq = doc_term_freq_object.intValue(); 211 223 } 212 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score (i-1), doc_term_freq);224 lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq); 213 225 } 214 226 } … … 256 268 257 269 public void setDefaultConjunctionOperator(String default_conjunction_operator) { 258 this.default_conjunction_operator = default_conjunction_operator.toUpperCase(); 270 super.setDefaultConjunctionOperator(default_conjunction_operator); 271 259 272 if (default_conjunction_operator.equals("AND")) { 260 273 query_parser.setDefaultOperator(query_parser.AND_OPERATOR); … … 265 278 } 266 279 } 267 268 public String getDefaultConjunctionOperator() { 269 return this.default_conjunction_operator; 270 } 271 272 public void setEndResults(int end_results) { 273 this.end_results = end_results; 274 } 275 public int getEndResults() { 276 return this.end_results; 277 } 278 279 public void setFilterString(String filter_string) { 280 this.filter_string = filter_string; 281 this.filter = parseFilterString(filter_string); 282 } 283 public String getFilterString() { 284 return this.filter_string ; 285 } 286 287 public Filter getFilter() { 288 return this.filter; 289 } 290 291 public void setIndexDir(String full_indexdir) { 292 this.full_indexdir = full_indexdir; 293 } 294 295 public void setFuzziness(String fuzziness) { 296 this.fuzziness = fuzziness; 297 } 298 public String getFuzziness() { 299 return this.fuzziness; 300 } 301 302 public void setSortField(String sort_field) { 303 this.sort_field = sort_field; 304 if (sort_field == null) { 305 this.sorter = new Sort(); 306 } else { 307 this.sorter = new Sort(sort_field); 308 } 309 } 310 public String getSortField() { 311 return this.sort_field; 312 } 313 314 public void setStartResults(int start_results) { 315 if (start_results < 1) { 316 start_results = 1; 317 } 318 this.start_results = start_results; 319 } 320 public int getStartResults() { 321 return this.start_results; 322 } 323 280 281 324 282 public void cleanUp() { 283 super.cleanUp(); 325 284 try { 326 285 if (searcher != null) { … … 332 291 } 333 292 334 private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 293 294 protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness) 335 295 throws java.io.IOException, org.apache.lucene.queryParser.ParseException 336 296 { … … 434 394 } 435 395 436 pr ivateFilter parseFilterString(String filter_string)396 protected Filter parseFilterString(String filter_string) 437 397 { 438 398 Filter result = null; … … 445 405 String upper_term = matcher.group(4); 446 406 boolean include_upper = matcher.group(5).equals("]"); 447 result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);407 result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper); 448 408 } 449 409 else { … … 452 412 return result; 453 413 } 454 455 456 protected void finalize() throws Throwable457 {458 try {459 utf8out.flush();460 } finally {461 super.finalize();462 }463 }464 465 414 415 466 416 /** command line program and auxiliary methods */ 467 417 468 418 // Fairly self-explanatory I should hope 469 static pr ivateboolean query_result_caching_enabled = false;419 static protected boolean query_result_caching_enabled = false; 470 420 471 421 472 422 static public void main (String args[]) 473 423 { 474 475 476 424 if (args.length == 0) { 477 425 System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]"); … … 566 514 } 567 515 568 pr ivatestatic void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)516 protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string) 569 517 throws IOException 570 518 { … … 654 602 } 655 603 656 pr ivatestatic String fileSafe(String text)604 protected static String fileSafe(String text) 657 605 { 658 606 StringBuffer file_safe_text = new StringBuffer();
Note:
See TracChangeset
for help on using the changeset viewer.