Ignore:
Timestamp:
2011-10-05T15:44:19+13:00 (13 years ago)
Author:
davidb
Message:

Restruturing of Lucene version 2.x and 3.x to make it easier to control which one is used

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java

    r20910 r24725  
    4545import org.apache.lucene.search.IndexSearcher;
    4646import org.apache.lucene.search.Query;
    47 import org.apache.lucene.search.RangeFilter;
     47import org.apache.lucene.search.TermRangeFilter;
    4848import org.apache.lucene.search.Searcher;
    4949import org.apache.lucene.search.ScoreDoc;
    5050import org.apache.lucene.search.Sort;
     51import org.apache.lucene.search.SortField;
    5152import org.apache.lucene.search.TopFieldDocs;
    5253
    53 
    54 public class GS2LuceneQuery
     54import org.apache.lucene.store.Directory;
     55import org.apache.lucene.store.FSDirectory;
     56import org.apache.lucene.util.Version;
     57
     58public class GS2LuceneQuery extends SharedSoleneQuery
    5559{
    56 
    57 
    58     static private String TEXTFIELD = "TX";
    59 
    60     // Use the standard set of English stop words by default
    61     static private String[] stop_words = GS2Analyzer.STOP_WORDS;
    62 
    63     private String full_indexdir="";
    64     private String default_conjunction_operator = "OR";
    65     private String fuzziness = null;
    66     private String sort_field = null;
    67     private Sort sorter=new Sort();
    68     private String filter_string = null;
    69     private Filter filter = null;
    70     private int start_results=1;
    71     private int end_results=Integer.MAX_VALUE;
    72 
    73     private QueryParser query_parser = null;
    74     private QueryParser query_parser_no_stop_words = null;
    75     private Searcher searcher = null;
    76     private IndexReader reader = null;
    77 
    78     static private PrintWriter utf8out = null;
    79 
    80     static
    81     {
    82     try {
    83         OutputStreamWriter osw = new OutputStreamWriter(System.out, "UTF-8");
    84         utf8out = new PrintWriter(osw, true);
    85     }
    86         catch (UnsupportedEncodingException e) {
    87         System.out.println(e);
    88     }
    89     }
    90 
    91    
     60    protected String full_indexdir="";
     61
     62    protected Sort sorter=new Sort();
     63    protected Filter filter = null;
     64
     65    protected static Version matchVersion = Version.LUCENE_24;
     66
     67    protected QueryParser query_parser = null;
     68    protected QueryParser query_parser_no_stop_words = null;
     69    protected Searcher searcher = null;
     70    protected IndexReader reader = null;
     71
    9272    public GS2LuceneQuery() {
     73    super();
    9374
    9475    // Create one query parser with the standard set of stop words, and one with none
    9576
    96     query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words));
    97         query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { }));
     77    query_parser = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
     78        query_parser_no_stop_words = new QueryParser(matchVersion, TEXTFIELD, new GS2Analyzer(new String[] { }));
    9879    }
    9980   
    10081   
    10182    public boolean initialise() {
     83
     84    if (!super.initialise()) {
     85        return false;
     86    }
     87
    10288
    10389        if (full_indexdir==null || full_indexdir.length()==-1){
     
    10692        return false;
    10793        }
     94
    10895        try {
    109             searcher = new IndexSearcher(full_indexdir);
     96        Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
     97            searcher = new IndexSearcher(full_indexdir_dir,true);
    11098            reader = ((IndexSearcher) searcher).getIndexReader();
    11199       
     
    118106
    119107    }
     108
     109    public void setIndexDir(String full_indexdir) {
     110    this.full_indexdir = full_indexdir;
     111    }
     112
     113    public void setSortField(String sort_field) {
     114    super.setSortField(sort_field);
     115
     116    if (sort_field == null) {
     117        this.sorter = new Sort();
     118    } else {
     119        this.sorter = new Sort(new SortField(sort_field,SortField.STRING)); // **** can do better than this?!?
     120    }
     121    }
     122
     123    public void setFilterString(String filter_string) {
     124    super.setFilterString(filter_string);
     125    this.filter = parseFilterString(filter_string);
     126    }
     127
     128    public Filter getFilter() {
     129    return this.filter;
     130    }
     131
    120132   
    121133    public LuceneQueryResult runQuery(String query_string) {
     
    194206        if (end_results == Integer.MAX_VALUE) {
    195207        // Perform the query (filter and sorter may be null)
    196         Hits hits = searcher.search(query, filter, sorter);
    197         lucene_query_result.setTotalDocs(hits.length());
     208        TopFieldDocs hits = searcher.search(query, filter, end_results, sorter);
     209        lucene_query_result.setTotalDocs(hits.totalHits);
    198210
    199211        // Output the matching documents
    200212        lucene_query_result.setStartResults(start_results);
    201         lucene_query_result.setEndResults(hits.length());
    202 
    203         for (int i = start_results; i <= hits.length(); i++) {
    204             int lucene_doc_num = hits.id(i - 1);
    205             Document doc = hits.doc(i - 1);
     213        lucene_query_result.setEndResults(hits.totalHits);
     214
     215        for (int i = start_results; i <= hits.totalHits; i++) {
     216            int lucene_doc_num = hits.scoreDocs[i - 1].doc;
     217            Document doc = reader.document(lucene_doc_num);
    206218            int doc_term_freq = 0;
    207219            Integer doc_term_freq_object = (Integer) doc_term_freq_map.get(new Integer(lucene_doc_num));
     
    210222            doc_term_freq = doc_term_freq_object.intValue();
    211223            }
    212             lucene_query_result.addDoc(doc.get("docOID").trim(), hits.score(i-1), doc_term_freq);
     224            lucene_query_result.addDoc(doc.get("docOID").trim(), hits.scoreDocs[i-1].score, doc_term_freq);
    213225        }
    214226        }
     
    256268
    257269    public void setDefaultConjunctionOperator(String default_conjunction_operator) {
    258     this.default_conjunction_operator = default_conjunction_operator.toUpperCase();
     270    super.setDefaultConjunctionOperator(default_conjunction_operator);
     271
    259272    if (default_conjunction_operator.equals("AND")) {
    260273        query_parser.setDefaultOperator(query_parser.AND_OPERATOR);
     
    265278    }
    266279    }
    267    
    268     public String getDefaultConjunctionOperator() {
    269     return this.default_conjunction_operator;
    270     }
    271    
    272     public void setEndResults(int end_results) {
    273     this.end_results = end_results;
    274     }
    275     public int getEndResults() {
    276     return this.end_results;
    277     }
    278        
    279     public void setFilterString(String filter_string) {
    280     this.filter_string = filter_string;
    281     this.filter = parseFilterString(filter_string);
    282     }
    283     public String getFilterString() {
    284     return this.filter_string ;
    285     }
    286    
    287     public Filter getFilter() {
    288     return this.filter;
    289     }
    290 
    291     public void setIndexDir(String full_indexdir) {
    292     this.full_indexdir = full_indexdir;
    293     }
    294    
    295     public void setFuzziness(String fuzziness) {
    296     this.fuzziness = fuzziness;
    297     }
    298     public String getFuzziness() {
    299     return this.fuzziness;
    300     }
    301    
    302     public void setSortField(String sort_field) {
    303     this.sort_field = sort_field;
    304     if (sort_field == null) {
    305         this.sorter = new Sort();
    306     } else {
    307         this.sorter = new Sort(sort_field);
    308     }
    309     }
    310     public String getSortField() {
    311     return this.sort_field;
    312     }
    313        
    314     public void setStartResults(int start_results) {
    315     if (start_results < 1) {
    316         start_results = 1;
    317     }
    318     this.start_results = start_results;
    319     }
    320     public int getStartResults() {
    321     return this.start_results;
    322     }
    323        
     280     
     281       
    324282    public void cleanUp() {
     283    super.cleanUp();
    325284    try {
    326285        if (searcher != null) {
     
    332291    }
    333292
    334     private Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
     293
     294    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
    335295    throws java.io.IOException, org.apache.lucene.queryParser.ParseException
    336296    {
     
    434394    }
    435395
    436     private Filter parseFilterString(String filter_string)
     396    protected Filter parseFilterString(String filter_string)
    437397    {
    438398    Filter result = null;
     
    445405        String upper_term = matcher.group(4);
    446406        boolean include_upper = matcher.group(5).equals("]");
    447         result = new RangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
     407        result = new TermRangeFilter(field_name, lower_term, upper_term, include_lower, include_upper);
    448408    }
    449409    else {
     
    452412    return result;
    453413    }
    454 
    455 
    456     protected void finalize() throws Throwable
    457     {
    458     try {
    459         utf8out.flush();
    460     } finally {
    461         super.finalize();
    462     }
    463     }
    464 
    465414   
     415
    466416    /** command line program and auxiliary methods */
    467417
    468418    // Fairly self-explanatory I should hope
    469     static private boolean query_result_caching_enabled = false;
     419    static protected boolean query_result_caching_enabled = false;
    470420
    471421
    472422    static public void main (String args[])
    473423    {
    474 
    475 
    476424    if (args.length == 0) {
    477425        System.out.println("Usage: GS2LuceneQuery <index directory> [-fuzziness value] [-filter filter_string] [-sort sort_field] [-dco AND|OR] [-startresults number -endresults number] [query]");
     
    566514    }
    567515
    568     private static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
     516    protected static void runQueryCaching(String index_directory, GS2LuceneQuery queryer, String query_string)
    569517    throws IOException
    570518    {
     
    654602    }
    655603   
    656     private static String fileSafe(String text)
     604    protected static String fileSafe(String text)
    657605    {
    658606    StringBuffer file_safe_text = new StringBuffer();
Note: See TracChangeset for help on using the changeset viewer.