Changeset 32619

Show
Ignore:
Timestamp:
20.11.2018 21:34:48 (3 weeks ago)
Author:
ak19
Message:

3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader? objects for every query. Since IndexReaders? support concurrent access, we'd like to use one IndexReader? per collection index (one for didx, one for sidx) with the IndexReaders? existing for the life of a collection. This meant moving the maintaining of IndexReader? objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap? to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader? objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

Location:
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2FieldSearch.java

    r32547 r32619  
    672672        indexField = field; 
    673673        // set up the appropriate query system 
    674         if (!setUpQueryer(params)) 
    675         { 
     674        Object queryObject = setUpQueryer(params); 
     675        if (queryObject == null) 
     676        {            
    676677            return result; 
    677678        } 
     
    689690            query = parseAdvancedFieldQueryParams(params); 
    690691            break; 
    691         } 
     692        }        
    692693         
    693         // run the query 
    694         Object query_result = runQuery(query); 
     694        // run the query         
     695        Object query_result = runQuery(queryObject, query); 
     696         
    695697         
    696698        // We want highlighted text to be returned right now! 
     
    817819            } 
    818820        } 
    819  
    820821         
     822        queryObject = null; 
    821823        return result; 
    822824 
     
    824826 
    825827    /** methods to handle actually doing the query */ 
    826     /** do any initialisation of the query object */ 
    827     abstract protected boolean setUpQueryer(HashMap<String, Serializable> params); 
    828  
    829     /** do the query */ 
    830     abstract protected Object runQuery(String query); 
     828    /** do any initialisation of the query object. Call before runQuery() 
     829      * @return the queryObject (e.g. GS2LuceneQuery) 
     830    */   
     831    abstract protected Object setUpQueryer(HashMap<String, Serializable> params); 
     832 
     833    /** do the query  
     834      * The queryObject parameter is the return value of setUpQueryer. 
     835    */ 
     836    abstract protected Object runQuery(Object queryObject, String query); 
    831837 
    832838    /** get the total number of docs that match */ 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java

    r32453 r32619  
    2121// Greenstone classes 
    2222import java.io.File; 
     23import java.io.IOException; 
    2324import java.io.Serializable; 
    2425import java.util.ArrayList; 
     
    2930import java.util.Set; 
    3031import java.util.Vector; 
     32 
     33// For maintaining Lucene IndexReader objects at collection level 
     34import org.apache.lucene.index.DirectoryReader; 
     35import org.apache.lucene.index.IndexReader; 
     36import org.apache.lucene.store.Directory; 
     37import org.apache.lucene.store.FSDirectory; 
    3138 
    3239import org.apache.log4j.Logger; 
     
    4047import org.w3c.dom.Element; 
    4148 
     49 
    4250public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch 
    4351{ 
     
    4755  protected static final String SORT_ORDER_NORMAL = "0"; 
    4856 
     57    // IndexReader objects are to be opened for each index level (e.g. one for didx, one for sidx) of a  
     58    // collection and will live for the duration of that collection, which is from collection activation 
     59    // until deactivation. 
     60    // So we want singletons of each index level's IndexReader, since IndexReaders are "multi-threaded 
     61    // re-entrant", so there's support for just one reader per index with concurrent access by multiple users' 
     62    // search queries. 
     63    // When a collection is deactivated, we need to close the reader objects to prevent handles to the  
     64    // index lingering and causing file locking issues on windows. 
     65    // Since GS2LuceneQuery now becomes a local member variable instantiated per query, we have to maintain 
     66    // IndexReader objects in GS2LuceneSearch instead, as GS2LuceneSearch is a collection's service, and 
     67    // therefore activated and deactivated along with the collection. 
     68    // The uniqueness of an IndexReader is indicated in the filepath to its index folder (collection path + sidx/didx). 
     69    // It doesn't have to be a static map of index_dir to IndexReader, and can be a member variable, since 
     70    // no other collection will refer to the same didx and sidx index folders: each collection has unique filepaths 
     71    // to its collection folder's index subdirs, not shared with other collections so the Readers don't have to be 
     72    // shared between collections either. 
     73 
     74    // We now store IndexReaders in a map of singleton index_dir -> IndexReaders opened for this collection: 
     75    // one Reader singleton for each index_dir 
     76    private Map<String, IndexReader> index_to_reader_map = new HashMap<String, IndexReader>(); 
     77   
    4978    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 
    50  
    51     private GS2LuceneQuery lucene_src = null; 
    5279 
    5380    public GS2LuceneSearch() 
     
    5582      does_paging = true; 
    5683        paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_NORMAL); 
    57         this.lucene_src = new GS2LuceneQuery(); 
    5884    } 
    5985 
     
    6187    { 
    6288        super.cleanUp(); 
    63         this.lucene_src.cleanUp(); 
    64     } 
    65  
     89         
     90        // Prevent file locking issues: close all IndexReader objects maintained for this collection 
     91        synchronized(index_to_reader_map) { // Regular Map implementations are not synchronized, so adding/removing requires synchronizing on the map object. 
     92                                        // see https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html 
     93                                        // And ConcurrentHashMap seems complicated, https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/ConcurrentHashMap.html 
     94                                         
     95                                        // Synchronizing *outside* the loop because cleanUp() clears the entire HashMap. 
     96                                        // Don't let any other threads access the map, hence synchronizing. 
     97                                        // Not sure if there may be other threads accessing the map when deactivating a collection which calls cleanUp(). 
     98                                        // However, when multiple users' search queries lead to adding to the hashmap, definitely need to 
     99                                        // synchronize as there's a greater possibility of concurrent access then.                                       
     100                                             
     101            Iterator<Map.Entry<String,IndexReader>> map_iterator = index_to_reader_map.entrySet().iterator();  
     102                        // Can use the Map.Entry Set view iterator to remove (key, value) entry from underlying Map! 
     103                        // See https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html#keySet() 
     104                        // Same thread creates the iterator as synchronizes on the map, so we should be allowed to remove() from the map 
     105                        // but only through iterator! 
     106            while(map_iterator.hasNext()) {              
     107                Map.Entry<String,IndexReader> entry = map_iterator.next(); 
     108                //index_to_reader_map.remove(...); // concurrentmodexception! Only allowed to remove through iterator. Will remove recent object returned by next() 
     109                IndexReader reader = entry.getValue(); //keys are index dir paths, e.g. path to current collection's didx folder, values are IndexReader objects 
     110                map_iterator.remove();  // removes current key's (key,value) entry from underlying map! (Remember, we're iterating on the keyset) 
     111                                        // We're first removing the reader singleton from map because reader.close() will only close the reader 
     112                                        //if it's the final reference to it in case that has a bearing here 
     113                 
     114                if(reader != null) { // if there was a reader singleton instantiated for this index directory, e.g. coll-didx, close it                      
     115                    try { 
     116                        // We're opening an IndexReader per indexdir once and closing it once: at start and end of collection. 
     117                        // If Reader was a member var of GS2LuceneQuery and if multiple GS2LuceneQuery Objects were to call close() on the 
     118                        // same reader object (on the singleton instance of reader for an index dir), so close is called multiple times, 
     119                        // then would use incRef and decRef, see http://lucene.472066.n3.nabble.com/IndexReader-close-behavior-td2865515.html 
     120                        // But then when concurrent queries are done, the final one would have closed the IndexReader and it would have to 
     121                        // be reopened for the next query. We'd rather keep an opened IndexReader around until the collection's deactivated. 
     122                        reader.close(); 
     123                        // Closes files associated with this index. Also saves any new deletions to disk.  
     124                        // No other methods should be called after this has been called. 
     125                    } catch (IOException exception) { 
     126                        exception.printStackTrace(); 
     127                    }            
     128                }                
     129            } // end loop 
     130        } // end synchronising on index_to_reader_map 
     131 
     132        // Now we've closed all the Readers maintained for this collection and cleared the map. 
     133    } 
     134     
    66135  public boolean configure(Element info, Element extra_info) 
    67136  { 
     
    117186 
    118187    /** do any initialisation of the query object */ 
    119     protected boolean setUpQueryer(HashMap params) 
    120     { 
     188    protected Object setUpQueryer(HashMap params) 
     189    { 
     190        // local Query object 
     191        GS2LuceneQuery lucene_src = new GS2LuceneQuery(); 
     192         
    121193        String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar; 
    122194 
     
    158230                if (value.equals(MATCH_PARAM_ALL)) 
    159231                { 
    160                     this.lucene_src.setDefaultConjunctionOperator("AND"); 
     232                    lucene_src.setDefaultConjunctionOperator("AND"); 
    161233                } 
    162234                else 
    163235                { 
    164                     this.lucene_src.setDefaultConjunctionOperator("OR"); 
     236                    lucene_src.setDefaultConjunctionOperator("OR"); 
    165237                } 
    166238            } 
     
    168240            { 
    169241              sort_field = getLuceneSort(value); 
    170               this.lucene_src.setSortField(sort_field); 
     242              lucene_src.setSortField(sort_field); 
    171243 
    172244            } 
     
    205277          end_results = hits_per_page * start_page; 
    206278        } 
    207         this.lucene_src.setStartResults(start_results); 
    208         this.lucene_src.setEndResults(end_results); 
     279        lucene_src.setStartResults(start_results); 
     280        lucene_src.setEndResults(end_results); 
    209281 
    210282        if (index.equals("sidx") || index.equals("didx")) 
     
    221293 
    222294        if (sort_order.equals(SORT_ORDER_REVERSE)) { 
    223           this.lucene_src.setReverseSort(true); 
     295          lucene_src.setReverseSort(true); 
    224296        } else { 
    225           this.lucene_src.setReverseSort(false); 
    226         } 
    227         this.lucene_src.setIndexDir(indexdir + index); 
    228         this.lucene_src.initialise(); 
    229         return true; 
     297          lucene_src.setReverseSort(false); 
     298        } 
     299         
     300        String full_index_dir_str = indexdir + index; 
     301        lucene_src.setIndexDir(full_index_dir_str);      
     302         
     303        // Ensure we have an IndexReader for this full_index_dir_str: 
     304        // check the hashmap first, in case we already opened a reader and searcher for this index dir, e.g. didx 
     305        // if there was a reader singleton instantiated for this index directory, e.g. <coll>didx, use that. 
     306        // Else open a new reader for this index_dir and store it in the map. 
     307        IndexReader reader = index_to_reader_map.get(full_index_dir_str);        
     308        if(reader == null) {  
     309            try {            
     310                Directory full_indexdir_dir = FSDirectory.open(new File(full_index_dir_str)); 
     311                reader = DirectoryReader.open(full_indexdir_dir); // Returns an IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher 
     312                synchronized(index_to_reader_map) { 
     313                    // If storing searcher along with reader, mimic Pairs with: https://stackoverflow.com/questions/2670982/using-pairs-or-2-tuples-in-java 
     314                    index_to_reader_map.put(full_index_dir_str, reader); 
     315                } 
     316            } 
     317            catch (IOException exception) { 
     318                exception.printStackTrace();                 
     319            }            
     320        } 
     321         
     322        lucene_src.initialise(reader); // sets IndexReader and IndexSearcher 
     323         
     324        return lucene_src; // return the queryobject 
    230325    } 
    231326 
    232327    /** do the query */ 
    233     protected Object runQuery(String query) 
    234     { 
     328    protected Object runQuery(Object queryObject, String query) 
     329    { 
     330        GS2LuceneQuery lucene_src = (GS2LuceneQuery) queryObject; 
    235331        try 
    236332        { 
    237             LuceneQueryResult lqr = this.lucene_src.runQuery(query); 
     333            LuceneQueryResult lqr = lucene_src.runQuery(query); 
    238334            return lqr; 
    239335        } 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java

    r32084 r32619  
    4343public class GS2MGPPSearch extends AbstractGS2FieldSearch 
    4444{ 
    45     private static MGPPSearchWrapper mgpp_src = null; 
     45    private static MGPPSearchWrapper mgpp_src = null; // STATIC! 
    4646 
    4747    private String physical_index_name = "idx"; 
     
    6565        mgpp_src.reset(); // reset stored settings to defaults 
    6666    } 
    67  
     67     
    6868    /** process a query */ 
    6969    protected Element processAnyQuery(Element request, int query_type) 
    7070    { 
     71        // don't know that the static (class variable) mgpp_src is "multi-threaded re-entrant" allowing multiple users 
     72        // to search the same index at the same time. So leave code as-is: to synchronize on mgpp_src when running query 
    7173        synchronized (mgpp_src) 
    7274        { 
     
    102104    } 
    103105 
    104     protected boolean setUpQueryer(HashMap<String, Serializable> params) 
     106    protected Object setUpQueryer(HashMap<String, Serializable> params) 
    105107    { 
    106108 
     
    199201        mgpp_src.loadIndexData(indexdir); 
    200202 
    201         return true; 
    202     } 
    203  
    204     protected Object runQuery(String query) 
    205     { 
     203        return mgpp_src; //return the query object 
     204    } 
     205 
     206    protected Object runQuery(Object queryObject, String query) 
     207    { 
     208        // queryObject is mgpp_src, so use mgpp_src reference directly: 
     209         
    206210        mgpp_src.runQuery(query); 
    207211        MGPPQueryResult mqr = mgpp_src.getQueryResult();