Changeset 32620 for main/trunk


Ignore:
Timestamp:
2018-11-20T21:35:42+13:00 (5 years ago)
Author:
ak19
Message:

Directly related to previous commit revision. 3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader objects for every query. Since IndexReaders support concurrent access, we'd like to use one IndexReader per collection index (one for didx, one for sidx) with the IndexReaders existing for the life of a collection. This meant moving the maintaining of IndexReader objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper4/GS2LuceneQuery.java

    r32616 r32620  
    4444import org.apache.lucene.search.ConstantScoreQuery;
    4545import org.apache.lucene.search.Filter;
    46 import org.apache.lucene.search.IndexSearcher;
     46import org.apache.lucene.search.IndexSearcher; // Searcher is deprecated
    4747import org.apache.lucene.search.MultiTermQuery;
    4848import org.apache.lucene.search.MultiTermQuery.ConstantScoreAutoRewrite;
    4949import org.apache.lucene.search.Query;
    5050import org.apache.lucene.search.TermRangeFilter;
    51 import org.apache.lucene.search.IndexSearcher; // Searcher is deprecated
    5251import org.apache.lucene.search.ScoreDoc;
    5352import org.apache.lucene.search.Sort;
     
    8079    protected QueryParser query_parser_no_stop_words = null;
    8180    protected IndexSearcher searcher = null;
    82     protected IndexReader reader = null;
     81    protected IndexReader reader = null; // reference to a Reader resource. GS2LuceneQuery doesn't maintain it, GS2LuceneSearch maintains it!
     82        // GS2LuceneSearch locally instantiates one GS2LuceneQuery object per query then allows each Query instance use a relevant Reader.
     83        // But GS2LuceneSearch opens the IndexReaders and, more importantly, closes them all when a collection is deactivated.
    8384
    8485    public GS2LuceneQuery() {
     
    8990    query_parser = new QueryParser(GSLuceneConstants.MATCH_VERSION, TEXTFIELD, new GS2Analyzer()); // uses built-in stop_words_set
    9091        query_parser_no_stop_words = new QueryParser(GSLuceneConstants.MATCH_VERSION, TEXTFIELD, new GS2Analyzer(new String[] { }));
    91     }
    92    
    93    
    94     public boolean initialise() {
    95 
    96     if (!super.initialise()) {
    97         return false;
    98     }
     92    }   
     93   
     94    public boolean initialise(IndexReader reader) {
     95
     96        if (!super.initialise()) {
     97            return false;
     98        }
    9999
    100100
     
    104104        return false;
    105105        }
    106 
    107         try {   
    108106       
    109             if(reader != null) {
    110                     reader.close();
    111                     searcher = null;
    112             }   
    113            
    114         Directory full_indexdir_dir = FSDirectory.open(new File(full_indexdir));
    115 
    116         reader = DirectoryReader.open(full_indexdir_dir); // Returns a IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher
    117         searcher = new IndexSearcher(reader); // during searcher.search() will get it to compute ranks when sorting by fields
    118        
    119         this.sorter = new Sort(new SortField(this.sort_field, this.sort_type, this.reverse_sort));
    120     }
    121     catch (IOException exception) {
    122             exception.printStackTrace();
    123         return false;
    124         }
    125     return true;
    126 
     107        if(reader == null) {
     108            return false;
     109        }
     110        else {
     111            this.reader = reader;
     112            this.searcher = new IndexSearcher(reader); // during searcher.search() will get it to compute ranks when sorting by fields
     113            this.sorter = new Sort(new SortField(this.sort_field, this.sort_type, this.reverse_sort));     
     114            return true;
     115        }
    127116    }
    128117
     
    170159    LuceneQueryResult lucene_query_result=new LuceneQueryResult();
    171160    lucene_query_result.clear();
    172        
     161   
     162    if(this.reader == null) {
     163        System.err.println("#### Reader is null!");
     164    }
     165   
    173166    try {               
    174167        Query query_including_stop_words = query_parser_no_stop_words.parse(query_string);
     
    348341    }
    349342     
    350        
     343    // This version of the cleanUp() method is just to clean up anything associated only with this instance of GS2LuceneQuery.
     344    // So it won't clean up the singleton IndexReader instances maintained by the encapsulating GS2LuceneSearch class.
    351345    public void cleanUp() {
    352     super.cleanUp();
    353     try {
    354         if(reader != null) {
    355         reader.close();
    356         // Closes files associated with this index. Also saves any new deletions to disk.
    357         // No other methods should be called after this has been called.
    358         }
     346        super.cleanUp();
    359347       
    360     } catch (IOException exception) {
    361         exception.printStackTrace();
    362     }
    363     }
    364 
     348        searcher = null;
     349   
     350        // Don't close the indexReader reference here.
     351        // This has moved into the GS2LuceneSearch.cleanUp() method, as it maintains singleton IndexReaders
     352        // for each index level (sidx, didix) with lifespans matching their collection's lifespan
     353        // A collection's GS2LuceneSearch object lives for the duration of the Collection.
     354        // A GS2LuceneQuery object is ephemeral: only lives for the duration of a query, allowing multiple
     355        // users to queries concurrently, sharing a single IndexReader object for each indexing level
     356        // as IndexReader support concurrency.
     357    }
    365358
    366359    protected Query parseQuery(IndexReader reader, QueryParser query_parser, String query_string, String fuzziness)
Note: See TracChangeset for help on using the changeset viewer.