Changeset 32619 for main


Ignore:
Timestamp:
2018-11-20T21:34:48+13:00 (5 years ago)
Author:
ak19
Message:

3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader objects for every query. Since IndexReaders support concurrent access, we'd like to use one IndexReader per collection index (one for didx, one for sidx) with the IndexReaders existing for the life of a collection. This meant moving the maintaining of IndexReader objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

Location:
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2FieldSearch.java

    r32547 r32619  
    672672        indexField = field;
    673673        // set up the appropriate query system
    674         if (!setUpQueryer(params))
    675         {
     674        Object queryObject = setUpQueryer(params);
     675        if (queryObject == null)
     676        {           
    676677            return result;
    677678        }
     
    689690            query = parseAdvancedFieldQueryParams(params);
    690691            break;
    691         }
     692        }       
    692693       
    693         // run the query
    694         Object query_result = runQuery(query);
     694        // run the query       
     695        Object query_result = runQuery(queryObject, query);
     696       
    695697       
    696698        // We want highlighted text to be returned right now!
     
    817819            }
    818820        }
    819 
    820821       
     822        queryObject = null;
    821823        return result;
    822824
     
    824826
    825827    /** methods to handle actually doing the query */
    826     /** do any initialisation of the query object */
    827     abstract protected boolean setUpQueryer(HashMap<String, Serializable> params);
    828 
    829     /** do the query */
    830     abstract protected Object runQuery(String query);
     828    /** do any initialisation of the query object. Call before runQuery()
     829      * @return the queryObject (e.g. GS2LuceneQuery)
     830    */ 
     831    abstract protected Object setUpQueryer(HashMap<String, Serializable> params);
     832
     833    /** do the query
     834      * The queryObject parameter is the return value of setUpQueryer.
     835    */
     836    abstract protected Object runQuery(Object queryObject, String query);
    831837
    832838    /** get the total number of docs that match */
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java

    r32453 r32619  
    2121// Greenstone classes
    2222import java.io.File;
     23import java.io.IOException;
    2324import java.io.Serializable;
    2425import java.util.ArrayList;
     
    2930import java.util.Set;
    3031import java.util.Vector;
     32
     33// For maintaining Lucene IndexReader objects at collection level
     34import org.apache.lucene.index.DirectoryReader;
     35import org.apache.lucene.index.IndexReader;
     36import org.apache.lucene.store.Directory;
     37import org.apache.lucene.store.FSDirectory;
    3138
    3239import org.apache.log4j.Logger;
     
    4047import org.w3c.dom.Element;
    4148
     49
    4250public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
    4351{
     
    4755  protected static final String SORT_ORDER_NORMAL = "0";
    4856
     57    // IndexReader objects are to be opened for each index level (e.g. one for didx, one for sidx) of a
     58    // collection and will live for the duration of that collection, which is from collection activation
     59    // until deactivation.
     60    // So we want singletons of each index level's IndexReader, since IndexReaders are "multi-threaded
     61    // re-entrant", so there's support for just one reader per index with concurrent access by multiple users'
     62    // search queries.
     63    // When a collection is deactivated, we need to close the reader objects to prevent handles to the
     64    // index lingering and causing file locking issues on windows.
     65    // Since GS2LuceneQuery now becomes a local member variable instantiated per query, we have to maintain
     66    // IndexReader objects in GS2LuceneSearch instead, as GS2LuceneSearch is a collection's service, and
     67    // therefore activated and deactivated along with the collection.
     68    // The uniqueness of an IndexReader is indicated in the filepath to its index folder (collection path + sidx/didx).
     69    // It doesn't have to be a static map of index_dir to IndexReader, and can be a member variable, since
     70    // no other collection will refer to the same didx and sidx index folders: each collection has unique filepaths
     71    // to its collection folder's index subdirs, not shared with other collections so the Readers don't have to be
     72    // shared between collections either.
     73
     74    // We now store IndexReaders in a map of singleton index_dir -> IndexReaders opened for this collection:
     75    // one Reader singleton for each index_dir
     76    private Map<String, IndexReader> index_to_reader_map = new HashMap<String, IndexReader>();
     77 
    4978    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
    50 
    51     private GS2LuceneQuery lucene_src = null;
    5279
    5380    public GS2LuceneSearch()
     
    5582      does_paging = true;
    5683        paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_NORMAL);
    57         this.lucene_src = new GS2LuceneQuery();
    5884    }
    5985
     
    6187    {
    6288        super.cleanUp();
    63         this.lucene_src.cleanUp();
    64     }
    65 
     89       
     90        // Prevent file locking issues: close all IndexReader objects maintained for this collection
     91        synchronized(index_to_reader_map) { // Regular Map implementations are not synchronized, so adding/removing requires synchronizing on the map object.
     92                                        // see https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html
     93                                        // And ConcurrentHashMap seems complicated, https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/ConcurrentHashMap.html
     94                                       
     95                                        // Synchronizing *outside* the loop because cleanUp() clears the entire HashMap.
     96                                        // Don't let any other threads access the map, hence synchronizing.
     97                                        // Not sure if there may be other threads accessing the map when deactivating a collection which calls cleanUp().
     98                                        // However, when multiple users' search queries lead to adding to the hashmap, definitely need to
     99                                        // synchronize as there's a greater possibility of concurrent access then.                                     
     100                                           
     101            Iterator<Map.Entry<String,IndexReader>> map_iterator = index_to_reader_map.entrySet().iterator();
     102                        // Can use the Map.Entry Set view iterator to remove (key, value) entry from underlying Map!
     103                        // See https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html#keySet()
     104                        // Same thread creates the iterator as synchronizes on the map, so we should be allowed to remove() from the map
     105                        // but only through iterator!
     106            while(map_iterator.hasNext()) {             
     107                Map.Entry<String,IndexReader> entry = map_iterator.next();
     108                //index_to_reader_map.remove(...); // concurrentmodexception! Only allowed to remove through iterator. Will remove recent object returned by next()
     109                IndexReader reader = entry.getValue(); //keys are index dir paths, e.g. path to current collection's didx folder, values are IndexReader objects
     110                map_iterator.remove();  // removes current key's (key,value) entry from underlying map! (Remember, we're iterating on the keyset)
     111                                        // We're first removing the reader singleton from map because reader.close() will only close the reader
     112                                        //if it's the final reference to it in case that has a bearing here
     113               
     114                if(reader != null) { // if there was a reader singleton instantiated for this index directory, e.g. coll-didx, close it                     
     115                    try {
     116                        // We're opening an IndexReader per indexdir once and closing it once: at start and end of collection.
     117                        // If Reader was a member var of GS2LuceneQuery and if multiple GS2LuceneQuery Objects were to call close() on the
     118                        // same reader object (on the singleton instance of reader for an index dir), so close is called multiple times,
     119                        // then would use incRef and decRef, see http://lucene.472066.n3.nabble.com/IndexReader-close-behavior-td2865515.html
     120                        // But then when concurrent queries are done, the final one would have closed the IndexReader and it would have to
     121                        // be reopened for the next query. We'd rather keep an opened IndexReader around until the collection's deactivated.
     122                        reader.close();
     123                        // Closes files associated with this index. Also saves any new deletions to disk.
     124                        // No other methods should be called after this has been called.
     125                    } catch (IOException exception) {
     126                        exception.printStackTrace();
     127                    }           
     128                }               
     129            } // end loop
     130        } // end synchronising on index_to_reader_map
     131
     132        // Now we've closed all the Readers maintained for this collection and cleared the map.
     133    }
     134   
    66135  public boolean configure(Element info, Element extra_info)
    67136  {
     
    117186
    118187    /** do any initialisation of the query object */
    119     protected boolean setUpQueryer(HashMap params)
    120     {
     188    protected Object setUpQueryer(HashMap params)
     189    {
     190        // local Query object
     191        GS2LuceneQuery lucene_src = new GS2LuceneQuery();
     192       
    121193        String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar;
    122194
     
    158230                if (value.equals(MATCH_PARAM_ALL))
    159231                {
    160                     this.lucene_src.setDefaultConjunctionOperator("AND");
     232                    lucene_src.setDefaultConjunctionOperator("AND");
    161233                }
    162234                else
    163235                {
    164                     this.lucene_src.setDefaultConjunctionOperator("OR");
     236                    lucene_src.setDefaultConjunctionOperator("OR");
    165237                }
    166238            }
     
    168240            {
    169241              sort_field = getLuceneSort(value);
    170               this.lucene_src.setSortField(sort_field);
     242              lucene_src.setSortField(sort_field);
    171243
    172244            }
     
    205277          end_results = hits_per_page * start_page;
    206278        }
    207         this.lucene_src.setStartResults(start_results);
    208         this.lucene_src.setEndResults(end_results);
     279        lucene_src.setStartResults(start_results);
     280        lucene_src.setEndResults(end_results);
    209281
    210282        if (index.equals("sidx") || index.equals("didx"))
     
    221293
    222294        if (sort_order.equals(SORT_ORDER_REVERSE)) {
    223           this.lucene_src.setReverseSort(true);
     295          lucene_src.setReverseSort(true);
    224296        } else {
    225           this.lucene_src.setReverseSort(false);
    226         }
    227         this.lucene_src.setIndexDir(indexdir + index);
    228         this.lucene_src.initialise();
    229         return true;
     297          lucene_src.setReverseSort(false);
     298        }
     299       
     300        String full_index_dir_str = indexdir + index;
     301        lucene_src.setIndexDir(full_index_dir_str);     
     302       
     303        // Ensure we have an IndexReader for this full_index_dir_str:
     304        // check the hashmap first, in case we already opened a reader and searcher for this index dir, e.g. didx
     305        // if there was a reader singleton instantiated for this index directory, e.g. <coll>didx, use that.
     306        // Else open a new reader for this index_dir and store it in the map.
     307        IndexReader reader = index_to_reader_map.get(full_index_dir_str);       
     308        if(reader == null) {
     309            try {           
     310                Directory full_indexdir_dir = FSDirectory.open(new File(full_index_dir_str));
     311                reader = DirectoryReader.open(full_indexdir_dir); // Returns an IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher
     312                synchronized(index_to_reader_map) {
     313                    // If storing searcher along with reader, mimic Pairs with: https://stackoverflow.com/questions/2670982/using-pairs-or-2-tuples-in-java
     314                    index_to_reader_map.put(full_index_dir_str, reader);
     315                }
     316            }
     317            catch (IOException exception) {
     318                exception.printStackTrace();               
     319            }           
     320        }
     321       
     322        lucene_src.initialise(reader); // sets IndexReader and IndexSearcher
     323       
     324        return lucene_src; // return the queryobject
    230325    }
    231326
    232327    /** do the query */
    233     protected Object runQuery(String query)
    234     {
     328    protected Object runQuery(Object queryObject, String query)
     329    {
     330        GS2LuceneQuery lucene_src = (GS2LuceneQuery) queryObject;
    235331        try
    236332        {
    237             LuceneQueryResult lqr = this.lucene_src.runQuery(query);
     333            LuceneQueryResult lqr = lucene_src.runQuery(query);
    238334            return lqr;
    239335        }
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java

    r32084 r32619  
    4343public class GS2MGPPSearch extends AbstractGS2FieldSearch
    4444{
    45     private static MGPPSearchWrapper mgpp_src = null;
     45    private static MGPPSearchWrapper mgpp_src = null; // STATIC!
    4646
    4747    private String physical_index_name = "idx";
     
    6565        mgpp_src.reset(); // reset stored settings to defaults
    6666    }
    67 
     67   
    6868    /** process a query */
    6969    protected Element processAnyQuery(Element request, int query_type)
    7070    {
     71        // don't know that the static (class variable) mgpp_src is "multi-threaded re-entrant" allowing multiple users
     72        // to search the same index at the same time. So leave code as-is: to synchronize on mgpp_src when running query
    7173        synchronized (mgpp_src)
    7274        {
     
    102104    }
    103105
    104     protected boolean setUpQueryer(HashMap<String, Serializable> params)
     106    protected Object setUpQueryer(HashMap<String, Serializable> params)
    105107    {
    106108
     
    199201        mgpp_src.loadIndexData(indexdir);
    200202
    201         return true;
    202     }
    203 
    204     protected Object runQuery(String query)
    205     {
     203        return mgpp_src; //return the query object
     204    }
     205
     206    protected Object runQuery(Object queryObject, String query)
     207    {
     208        // queryObject is mgpp_src, so use mgpp_src reference directly:
     209       
    206210        mgpp_src.runQuery(query);
    207211        MGPPQueryResult mqr = mgpp_src.getQueryResult();
Note: See TracChangeset for help on using the changeset viewer.