Context Navigation

← Previous Change
Next Change →

Changeset 32619 for main

Timestamp:

2018-11-20T21:34:48+13:00 (5 years ago)

Author:

ak19

Message:

3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader objects for every query. Since IndexReaders support concurrent access, we'd like to use one IndexReader per collection index (one for didx, one for sidx) with the IndexReaders existing for the life of a collection. This meant moving the maintaining of IndexReader objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

Location:

main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service

Files:

: 3 edited

AbstractGS2FieldSearch.java (modified) (4 diffs)
GS2LuceneSearch.java (modified) (11 diffs)
GS2MGPPSearch.java (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2FieldSearch.java

-              r32547
+              r32619
         indexField = field;
         // set up the appropriate query system
+        if (!setUpQueryer(params))
+        {
+        Object queryObject = setUpQueryer(params);
+        if (queryObject == null)
+        {
             return result;
+        }
 …
             query = parseAdvancedFieldQueryParams(params);
             break;
+        }
+        }
+        // run the query
+        Object query_result = runQuery(query);
+        // run the query
+        Object query_result = runQuery(queryObject, query);
         // We want highlighted text to be returned right now!
 …
+            }
+        }
+        queryObject = null;
         return result;
 …
     /** methods to handle actually doing the query */
+    /** do any initialisation of the query object */
+    abstract protected boolean setUpQueryer(HashMap<String, Serializable> params);
+    /** do the query */
+    abstract protected Object runQuery(String query);
+    /** do any initialisation of the query object. Call before runQuery()
+      * @return the queryObject (e.g. GS2LuceneQuery)
+    */
+    abstract protected Object setUpQueryer(HashMap<String, Serializable> params);
+    /** do the query
+      * The queryObject parameter is the return value of setUpQueryer.
+    */
+    abstract protected Object runQuery(Object queryObject, String query);
     /** get the total number of docs that match */

main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java

-              r32453
+              r32619
 // Greenstone classes
 import java.io.File;
+import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
 …
 import java.util.Set;
 import java.util.Vector;
+// For maintaining Lucene IndexReader objects at collection level
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.log4j.Logger;
 …
 import org.w3c.dom.Element;
 public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch
+{
 …
   protected static final String SORT_ORDER_NORMAL = "0";
+    // IndexReader objects are to be opened for each index level (e.g. one for didx, one for sidx) of a
+    // collection and will live for the duration of that collection, which is from collection activation
+    // until deactivation.
+    // So we want singletons of each index level's IndexReader, since IndexReaders are "multi-threaded
+    // re-entrant", so there's support for just one reader per index with concurrent access by multiple users'
+    // search queries.
+    // When a collection is deactivated, we need to close the reader objects to prevent handles to the
+    // index lingering and causing file locking issues on windows.
+    // Since GS2LuceneQuery now becomes a local member variable instantiated per query, we have to maintain
+    // IndexReader objects in GS2LuceneSearch instead, as GS2LuceneSearch is a collection's service, and
+    // therefore activated and deactivated along with the collection.
+    // The uniqueness of an IndexReader is indicated in the filepath to its index folder (collection path + sidx/didx).
+    // It doesn't have to be a static map of index_dir to IndexReader, and can be a member variable, since
+    // no other collection will refer to the same didx and sidx index folders: each collection has unique filepaths
+    // to its collection folder's index subdirs, not shared with other collections so the Readers don't have to be
+    // shared between collections either.
+    // We now store IndexReaders in a map of singleton index_dir -> IndexReaders opened for this collection:
+    // one Reader singleton for each index_dir
+    private Map<String, IndexReader> index_to_reader_map = new HashMap<String, IndexReader>();
     static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName());
-    private GS2LuceneQuery lucene_src = null;
     public GS2LuceneSearch()
 …
       does_paging = true;
         paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_NORMAL);
-        this.lucene_src = new GS2LuceneQuery();
+    }
 …
+    {
         super.cleanUp();
+        this.lucene_src.cleanUp();
+    }
+        // Prevent file locking issues: close all IndexReader objects maintained for this collection
+        synchronized(index_to_reader_map) { // Regular Map implementations are not synchronized, so adding/removing requires synchronizing on the map object.
+                                        // see https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html
+                                        // And ConcurrentHashMap seems complicated, https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/ConcurrentHashMap.html
+                                        // Synchronizing *outside* the loop because cleanUp() clears the entire HashMap.
+                                        // Don't let any other threads access the map, hence synchronizing.
+                                        // Not sure if there may be other threads accessing the map when deactivating a collection which calls cleanUp().
+                                        // However, when multiple users' search queries lead to adding to the hashmap, definitely need to
+                                        // synchronize as there's a greater possibility of concurrent access then.
+            Iterator<Map.Entry<String,IndexReader>> map_iterator = index_to_reader_map.entrySet().iterator();
+                        // Can use the Map.Entry Set view iterator to remove (key, value) entry from underlying Map!
+                        // See https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html#keySet()
+                        // Same thread creates the iterator as synchronizes on the map, so we should be allowed to remove() from the map
+                        // but only through iterator!
+            while(map_iterator.hasNext()) {
+                Map.Entry<String,IndexReader> entry = map_iterator.next();
+                //index_to_reader_map.remove(...); // concurrentmodexception! Only allowed to remove through iterator. Will remove recent object returned by next()
+                IndexReader reader = entry.getValue(); //keys are index dir paths, e.g. path to current collection's didx folder, values are IndexReader objects
+                map_iterator.remove();  // removes current key's (key,value) entry from underlying map! (Remember, we're iterating on the keyset)
+                                        // We're first removing the reader singleton from map because reader.close() will only close the reader
+                                        //if it's the final reference to it in case that has a bearing here
+                if(reader != null) { // if there was a reader singleton instantiated for this index directory, e.g. coll-didx, close it
+                    try {
+                        // We're opening an IndexReader per indexdir once and closing it once: at start and end of collection.
+                        // If Reader was a member var of GS2LuceneQuery and if multiple GS2LuceneQuery Objects were to call close() on the
+                        // same reader object (on the singleton instance of reader for an index dir), so close is called multiple times,
+                        // then would use incRef and decRef, see http://lucene.472066.n3.nabble.com/IndexReader-close-behavior-td2865515.html
+                        // But then when concurrent queries are done, the final one would have closed the IndexReader and it would have to
+                        // be reopened for the next query. We'd rather keep an opened IndexReader around until the collection's deactivated.
+                        reader.close();
+                        // Closes files associated with this index. Also saves any new deletions to disk.
+                        // No other methods should be called after this has been called.
+                    } catch (IOException exception) {
+                        exception.printStackTrace();
+                    }
+                }
+            } // end loop
+        } // end synchronising on index_to_reader_map
+        // Now we've closed all the Readers maintained for this collection and cleared the map.
+    }
   public boolean configure(Element info, Element extra_info)
+  {
 …
     /** do any initialisation of the query object */
+    protected boolean setUpQueryer(HashMap params)
+    {
+    protected Object setUpQueryer(HashMap params)
+    {
+        // local Query object
+        GS2LuceneQuery lucene_src = new GS2LuceneQuery();
         String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar;
 …
                 if (value.equals(MATCH_PARAM_ALL))
+                {
                     this.lucene_src.setDefaultConjunctionOperator("AND");
+                    lucene_src.setDefaultConjunctionOperator("AND");
+                }
                 else
+                {
                     this.lucene_src.setDefaultConjunctionOperator("OR");
+                    lucene_src.setDefaultConjunctionOperator("OR");
+                }
+            }
 …
+            {
               sort_field = getLuceneSort(value);
               this.lucene_src.setSortField(sort_field);
+              lucene_src.setSortField(sort_field);
+            }
 …
           end_results = hits_per_page * start_page;
+        }
         this.lucene_src.setStartResults(start_results);
         this.lucene_src.setEndResults(end_results);
+        lucene_src.setStartResults(start_results);
+        lucene_src.setEndResults(end_results);
         if (index.equals("sidx") || index.equals("didx"))
 …
         if (sort_order.equals(SORT_ORDER_REVERSE)) {
           this.lucene_src.setReverseSort(true);
+          lucene_src.setReverseSort(true);
         } else {
+          this.lucene_src.setReverseSort(false);
+        }
+        this.lucene_src.setIndexDir(indexdir + index);
+        this.lucene_src.initialise();
+        return true;
+          lucene_src.setReverseSort(false);
+        }
+        String full_index_dir_str = indexdir + index;
+        lucene_src.setIndexDir(full_index_dir_str);
+        // Ensure we have an IndexReader for this full_index_dir_str:
+        // check the hashmap first, in case we already opened a reader and searcher for this index dir, e.g. didx
+        // if there was a reader singleton instantiated for this index directory, e.g. <coll>didx, use that.
+        // Else open a new reader for this index_dir and store it in the map.
+        IndexReader reader = index_to_reader_map.get(full_index_dir_str);
+        if(reader == null) {
+            try {
+                Directory full_indexdir_dir = FSDirectory.open(new File(full_index_dir_str));
+                reader = DirectoryReader.open(full_indexdir_dir); // Returns an IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher
+                synchronized(index_to_reader_map) {
+                    // If storing searcher along with reader, mimic Pairs with: https://stackoverflow.com/questions/2670982/using-pairs-or-2-tuples-in-java
+                    index_to_reader_map.put(full_index_dir_str, reader);
+                }
+            }
+            catch (IOException exception) {
+                exception.printStackTrace();
+            }
+        }
+        lucene_src.initialise(reader); // sets IndexReader and IndexSearcher
+        return lucene_src; // return the queryobject
+    }
     /** do the query */
+    protected Object runQuery(String query)
+    {
+    protected Object runQuery(Object queryObject, String query)
+    {
+        GS2LuceneQuery lucene_src = (GS2LuceneQuery) queryObject;
         try
+        {
             LuceneQueryResult lqr = this.lucene_src.runQuery(query);
+            LuceneQueryResult lqr = lucene_src.runQuery(query);
             return lqr;
+        }

main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java

-              r32084
+              r32619
 public class GS2MGPPSearch extends AbstractGS2FieldSearch
+{
     private static MGPPSearchWrapper mgpp_src = null;
+    private static MGPPSearchWrapper mgpp_src = null; // STATIC!
     private String physical_index_name = "idx";
 …
         mgpp_src.reset(); // reset stored settings to defaults
+    }
     /** process a query */
     protected Element processAnyQuery(Element request, int query_type)
+    {
+        // don't know that the static (class variable) mgpp_src is "multi-threaded re-entrant" allowing multiple users
+        // to search the same index at the same time. So leave code as-is: to synchronize on mgpp_src when running query
         synchronized (mgpp_src)
+        {
 …
+    }
     protected boolean setUpQueryer(HashMap<String, Serializable> params)
+    protected Object setUpQueryer(HashMap<String, Serializable> params)
+    {
 …
         mgpp_src.loadIndexData(indexdir);
+        return true;
+    }
+    protected Object runQuery(String query)
+    {
+        return mgpp_src; //return the query object
+    }
+    protected Object runQuery(Object queryObject, String query)
+    {
+        // queryObject is mgpp_src, so use mgpp_src reference directly:
         mgpp_src.runQuery(query);
         MGPPQueryResult mqr = mgpp_src.getQueryResult();

Note: See TracChangeset for help on using the changeset viewer.