Changeset 32620

Show
Ignore:
Timestamp:
20.11.2018 21:35:42 (3 weeks ago)
Author:
ak19
Message:

Directly related to previous commit revision. 3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader? objects for every query. Since IndexReaders? support concurrent access, we'd like to use one IndexReader? per collection index (one for didx, one for sidx) with the IndexReaders? existing for the life of a collection. This meant moving the maintaining of IndexReader? objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap? to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader? objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper4/GS2LuceneQuery.java

    r32616 r32620  
    4444import org.apache.lucene.search.ConstantScoreQuery; 
    4545import org.apache.lucene.search.Filter; 
    46 import org.apache.lucene.search.IndexSearcher; 
     46import org.apache.lucene.search.IndexSearcher; // Searcher is deprecated 
    4747import org.apache.lucene.search.MultiTermQuery; 
    4848import org.apache.lucene.search.MultiTermQuery.ConstantScoreAutoRewrite; 
    4949import org.apache.lucene.search.Query; 
    5050import org.apache.lucene.search.TermRangeFilter; 
    51 import org.apache.lucene.search.IndexSearcher; // Searcher is deprecated 
    5251import org.apache.lucene.search.ScoreDoc; 
    5352import org.apache.lucene.search.Sort; 
     
    8079    protected QueryParser query_parser_no_stop_words = null; 
    8180    protected IndexSearcher searcher = null; 
    82     protected IndexReader reader = null; 
     81    protected IndexReader reader = null; // reference to a Reader resource. GS2LuceneQuery doesn't maintain it, GS2LuceneSearch maintains it! 
     82        // GS2LuceneSearch locally instantiates one GS2LuceneQuery object per query then allows each Query instance use a relevant Reader. 
     83        // But GS2LuceneSearch opens the IndexReaders and, more importantly, closes them all when a collection is deactivated. 
    8384 
    8485    public GS2LuceneQuery() { 
     
    8990    query_parser = new QueryParser(GSLuceneConstants.MATCH_VERSION, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set 
    9091        query_parser_no_stop_words = new QueryParser(GSLuceneConstants.MATCH_VERSION, TEXTFIELD, new GS2Analyzer(new String[] { })); 
    91     } 
    92      
    93      
    94     public boolean initialise() { 
    95  
    96     if (!super.initialise()) { 
    97         return false; 
    98     } 
     92    }    
     93     
     94    public boolean initialise(IndexReader reader) { 
     95 
     96        if (!super.initialise()) { 
     97            return false; 
     98        } 
    9999 
    100100 
     
    104104        return false; 
    105105        } 
    106  
    107         try {    
    108106         
    109             if(reader != null) { 
    110                     reader.close(); 
    111                     searcher = null; 
    112             }    
    113              
    114         Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir)); 
    115  
    116         reader = DirectoryReader.open(full_indexdir_dir); // Returns a IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher 
    117         searcher = new IndexSearcher(reader); // during searcher.search() will get it to compute ranks when sorting by fields 
    118          
    119         this.sorter = new Sort(new SortField(this.sort_field, this.sort_type, this.reverse_sort)); 
    120     } 
    121     catch (IOException exception) { 
    122             exception.printStackTrace(); 
    123         return false; 
    124         } 
    125     return true; 
    126  
     107        if(reader == null) { 
     108            return false; 
     109        } 
     110        else { 
     111            this.reader = reader; 
     112            this.searcher = new IndexSearcher(reader); // during searcher.search() will get it to compute ranks when sorting by fields 
     113            this.sorter = new Sort(new SortField(this.sort_field, this.sort_type, this.reverse_sort));       
     114            return true; 
     115        } 
    127116    } 
    128117 
     
    170159    LuceneQueryResult lucene_query_result=new LuceneQueryResult(); 
    171160    lucene_query_result.clear(); 
    172          
     161     
     162    if(this.reader == null) { 
     163        System.err.println("#### Reader is null!"); 
     164    } 
     165     
    173166    try {                
    174167        Query query_including_stop_words = query_parser_no_stop_words.parse(query_string); 
     
    348341    } 
    349342      
    350         
     343    // This version of the cleanUp() method is just to clean up anything associated only with this instance of GS2LuceneQuery. 
     344    // So it won't clean up the singleton IndexReader instances maintained by the encapsulating GS2LuceneSearch class. 
    351345    public void cleanUp() { 
    352     super.cleanUp(); 
    353     try { 
    354         if(reader != null) { 
    355         reader.close(); 
    356         // Closes files associated with this index. Also saves any new deletions to disk.  
    357         // No other methods should be called after this has been called.  
    358         } 
     346        super.cleanUp(); 
    359347         
    360     } catch (IOException exception) { 
    361         exception.printStackTrace(); 
    362     } 
    363     } 
    364  
     348        searcher = null; 
     349     
     350        // Don't close the indexReader reference here. 
     351        // This has moved into the GS2LuceneSearch.cleanUp() method, as it maintains singleton IndexReaders 
     352        // for each index level (sidx, didix) with lifespans matching their collection's lifespan 
     353        // A collection's GS2LuceneSearch object lives for the duration of the Collection. 
     354        // A GS2LuceneQuery object is ephemeral: only lives for the duration of a query, allowing multiple 
     355        // users to queries concurrently, sharing a single IndexReader object for each indexing level 
     356        // as IndexReader support concurrency. 
     357    } 
    365358 
    366359    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)