- Timestamp:
- 2018-11-20T21:34:48+13:00 (5 years ago)
- Location:
- main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2FieldSearch.java
r32547 r32619 672 672 indexField = field; 673 673 // set up the appropriate query system 674 if (!setUpQueryer(params)) 675 { 674 Object queryObject = setUpQueryer(params); 675 if (queryObject == null) 676 { 676 677 return result; 677 678 } … … 689 690 query = parseAdvancedFieldQueryParams(params); 690 691 break; 691 } 692 } 692 693 693 // run the query 694 Object query_result = runQuery(query); 694 // run the query 695 Object query_result = runQuery(queryObject, query); 696 695 697 696 698 // We want highlighted text to be returned right now! … … 817 819 } 818 820 } 819 820 821 822 queryObject = null; 821 823 return result; 822 824 … … 824 826 825 827 /** methods to handle actually doing the query */ 826 /** do any initialisation of the query object */ 827 abstract protected boolean setUpQueryer(HashMap<String, Serializable> params); 828 829 /** do the query */ 830 abstract protected Object runQuery(String query); 828 /** do any initialisation of the query object. Call before runQuery() 829 * @return the queryObject (e.g. GS2LuceneQuery) 830 */ 831 abstract protected Object setUpQueryer(HashMap<String, Serializable> params); 832 833 /** do the query 834 * The queryObject parameter is the return value of setUpQueryer. 835 */ 836 abstract protected Object runQuery(Object queryObject, String query); 831 837 832 838 /** get the total number of docs that match */ -
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java
r32453 r32619 21 21 // Greenstone classes 22 22 import java.io.File; 23 import java.io.IOException; 23 24 import java.io.Serializable; 24 25 import java.util.ArrayList; … … 29 30 import java.util.Set; 30 31 import java.util.Vector; 32 33 // For maintaining Lucene IndexReader objects at collection level 34 import org.apache.lucene.index.DirectoryReader; 35 import org.apache.lucene.index.IndexReader; 36 import org.apache.lucene.store.Directory; 37 import org.apache.lucene.store.FSDirectory; 31 38 32 39 import org.apache.log4j.Logger; … … 40 47 import org.w3c.dom.Element; 41 48 49 42 50 public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch 43 51 { … … 47 55 protected static final String SORT_ORDER_NORMAL = "0"; 48 56 57 // IndexReader objects are to be opened for each index level (e.g. one for didx, one for sidx) of a 58 // collection and will live for the duration of that collection, which is from collection activation 59 // until deactivation. 60 // So we want singletons of each index level's IndexReader, since IndexReaders are "multi-threaded 61 // re-entrant", so there's support for just one reader per index with concurrent access by multiple users' 62 // search queries. 63 // When a collection is deactivated, we need to close the reader objects to prevent handles to the 64 // index lingering and causing file locking issues on windows. 65 // Since GS2LuceneQuery now becomes a local member variable instantiated per query, we have to maintain 66 // IndexReader objects in GS2LuceneSearch instead, as GS2LuceneSearch is a collection's service, and 67 // therefore activated and deactivated along with the collection. 68 // The uniqueness of an IndexReader is indicated in the filepath to its index folder (collection path + sidx/didx). 69 // It doesn't have to be a static map of index_dir to IndexReader, and can be a member variable, since 70 // no other collection will refer to the same didx and sidx index folders: each collection has unique filepaths 71 // to its collection folder's index subdirs, not shared with other collections so the Readers don't have to be 72 // shared between collections either. 73 74 // We now store IndexReaders in a map of singleton index_dir -> IndexReaders opened for this collection: 75 // one Reader singleton for each index_dir 76 private Map<String, IndexReader> index_to_reader_map = new HashMap<String, IndexReader>(); 77 49 78 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 50 51 private GS2LuceneQuery lucene_src = null;52 79 53 80 public GS2LuceneSearch() … … 55 82 does_paging = true; 56 83 paramDefaults.put(SORT_ORDER_PARAM, SORT_ORDER_NORMAL); 57 this.lucene_src = new GS2LuceneQuery();58 84 } 59 85 … … 61 87 { 62 88 super.cleanUp(); 63 this.lucene_src.cleanUp(); 64 } 65 89 90 // Prevent file locking issues: close all IndexReader objects maintained for this collection 91 synchronized(index_to_reader_map) { // Regular Map implementations are not synchronized, so adding/removing requires synchronizing on the map object. 92 // see https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html 93 // And ConcurrentHashMap seems complicated, https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/ConcurrentHashMap.html 94 95 // Synchronizing *outside* the loop because cleanUp() clears the entire HashMap. 96 // Don't let any other threads access the map, hence synchronizing. 97 // Not sure if there may be other threads accessing the map when deactivating a collection which calls cleanUp(). 98 // However, when multiple users' search queries lead to adding to the hashmap, definitely need to 99 // synchronize as there's a greater possibility of concurrent access then. 100 101 Iterator<Map.Entry<String,IndexReader>> map_iterator = index_to_reader_map.entrySet().iterator(); 102 // Can use the Map.Entry Set view iterator to remove (key, value) entry from underlying Map! 103 // See https://docs.oracle.com/javase/7/docs/api/java/util/HashMap.html#keySet() 104 // Same thread creates the iterator as synchronizes on the map, so we should be allowed to remove() from the map 105 // but only through iterator! 106 while(map_iterator.hasNext()) { 107 Map.Entry<String,IndexReader> entry = map_iterator.next(); 108 //index_to_reader_map.remove(...); // concurrentmodexception! Only allowed to remove through iterator. Will remove recent object returned by next() 109 IndexReader reader = entry.getValue(); //keys are index dir paths, e.g. path to current collection's didx folder, values are IndexReader objects 110 map_iterator.remove(); // removes current key's (key,value) entry from underlying map! (Remember, we're iterating on the keyset) 111 // We're first removing the reader singleton from map because reader.close() will only close the reader 112 //if it's the final reference to it in case that has a bearing here 113 114 if(reader != null) { // if there was a reader singleton instantiated for this index directory, e.g. coll-didx, close it 115 try { 116 // We're opening an IndexReader per indexdir once and closing it once: at start and end of collection. 117 // If Reader was a member var of GS2LuceneQuery and if multiple GS2LuceneQuery Objects were to call close() on the 118 // same reader object (on the singleton instance of reader for an index dir), so close is called multiple times, 119 // then would use incRef and decRef, see http://lucene.472066.n3.nabble.com/IndexReader-close-behavior-td2865515.html 120 // But then when concurrent queries are done, the final one would have closed the IndexReader and it would have to 121 // be reopened for the next query. We'd rather keep an opened IndexReader around until the collection's deactivated. 122 reader.close(); 123 // Closes files associated with this index. Also saves any new deletions to disk. 124 // No other methods should be called after this has been called. 125 } catch (IOException exception) { 126 exception.printStackTrace(); 127 } 128 } 129 } // end loop 130 } // end synchronising on index_to_reader_map 131 132 // Now we've closed all the Readers maintained for this collection and cleared the map. 133 } 134 66 135 public boolean configure(Element info, Element extra_info) 67 136 { … … 117 186 118 187 /** do any initialisation of the query object */ 119 protected boolean setUpQueryer(HashMap params) 120 { 188 protected Object setUpQueryer(HashMap params) 189 { 190 // local Query object 191 GS2LuceneQuery lucene_src = new GS2LuceneQuery(); 192 121 193 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar; 122 194 … … 158 230 if (value.equals(MATCH_PARAM_ALL)) 159 231 { 160 this.lucene_src.setDefaultConjunctionOperator("AND");232 lucene_src.setDefaultConjunctionOperator("AND"); 161 233 } 162 234 else 163 235 { 164 this.lucene_src.setDefaultConjunctionOperator("OR");236 lucene_src.setDefaultConjunctionOperator("OR"); 165 237 } 166 238 } … … 168 240 { 169 241 sort_field = getLuceneSort(value); 170 this.lucene_src.setSortField(sort_field);242 lucene_src.setSortField(sort_field); 171 243 172 244 } … … 205 277 end_results = hits_per_page * start_page; 206 278 } 207 this.lucene_src.setStartResults(start_results);208 this.lucene_src.setEndResults(end_results);279 lucene_src.setStartResults(start_results); 280 lucene_src.setEndResults(end_results); 209 281 210 282 if (index.equals("sidx") || index.equals("didx")) … … 221 293 222 294 if (sort_order.equals(SORT_ORDER_REVERSE)) { 223 this.lucene_src.setReverseSort(true);295 lucene_src.setReverseSort(true); 224 296 } else { 225 this.lucene_src.setReverseSort(false); 226 } 227 this.lucene_src.setIndexDir(indexdir + index); 228 this.lucene_src.initialise(); 229 return true; 297 lucene_src.setReverseSort(false); 298 } 299 300 String full_index_dir_str = indexdir + index; 301 lucene_src.setIndexDir(full_index_dir_str); 302 303 // Ensure we have an IndexReader for this full_index_dir_str: 304 // check the hashmap first, in case we already opened a reader and searcher for this index dir, e.g. didx 305 // if there was a reader singleton instantiated for this index directory, e.g. <coll>didx, use that. 306 // Else open a new reader for this index_dir and store it in the map. 307 IndexReader reader = index_to_reader_map.get(full_index_dir_str); 308 if(reader == null) { 309 try { 310 Directory full_indexdir_dir = FSDirectory.open(new File(full_index_dir_str)); 311 reader = DirectoryReader.open(full_indexdir_dir); // Returns an IndexReader reading the index in the given Directory. now readOnly=true by default, and therefore also for searcher 312 synchronized(index_to_reader_map) { 313 // If storing searcher along with reader, mimic Pairs with: https://stackoverflow.com/questions/2670982/using-pairs-or-2-tuples-in-java 314 index_to_reader_map.put(full_index_dir_str, reader); 315 } 316 } 317 catch (IOException exception) { 318 exception.printStackTrace(); 319 } 320 } 321 322 lucene_src.initialise(reader); // sets IndexReader and IndexSearcher 323 324 return lucene_src; // return the queryobject 230 325 } 231 326 232 327 /** do the query */ 233 protected Object runQuery(String query) 234 { 328 protected Object runQuery(Object queryObject, String query) 329 { 330 GS2LuceneQuery lucene_src = (GS2LuceneQuery) queryObject; 235 331 try 236 332 { 237 LuceneQueryResult lqr = this.lucene_src.runQuery(query);333 LuceneQueryResult lqr = lucene_src.runQuery(query); 238 334 return lqr; 239 335 } -
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java
r32084 r32619 43 43 public class GS2MGPPSearch extends AbstractGS2FieldSearch 44 44 { 45 private static MGPPSearchWrapper mgpp_src = null; 45 private static MGPPSearchWrapper mgpp_src = null; // STATIC! 46 46 47 47 private String physical_index_name = "idx"; … … 65 65 mgpp_src.reset(); // reset stored settings to defaults 66 66 } 67 67 68 68 /** process a query */ 69 69 protected Element processAnyQuery(Element request, int query_type) 70 70 { 71 // don't know that the static (class variable) mgpp_src is "multi-threaded re-entrant" allowing multiple users 72 // to search the same index at the same time. So leave code as-is: to synchronize on mgpp_src when running query 71 73 synchronized (mgpp_src) 72 74 { … … 102 104 } 103 105 104 protected booleansetUpQueryer(HashMap<String, Serializable> params)106 protected Object setUpQueryer(HashMap<String, Serializable> params) 105 107 { 106 108 … … 199 201 mgpp_src.loadIndexData(indexdir); 200 202 201 return true; 202 } 203 204 protected Object runQuery(String query) 205 { 203 return mgpp_src; //return the query object 204 } 205 206 protected Object runQuery(Object queryObject, String query) 207 { 208 // queryObject is mgpp_src, so use mgpp_src reference directly: 209 206 210 mgpp_src.runQuery(query); 207 211 MGPPQueryResult mqr = mgpp_src.getQueryResult();
Note:
See TracChangeset
for help on using the changeset viewer.