- Timestamp:
- 2012-09-06T16:54:10+12:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneQuery.java
r26155 r26157 39 39 import org.apache.lucene.queryParser.ParseException; 40 40 import org.apache.lucene.queryParser.QueryParser; 41 import org.apache.lucene.search.BooleanQuery .TooManyClauses;41 import org.apache.lucene.search.BooleanQuery; // for the TooManyClauses exception 42 42 import org.apache.lucene.search.Filter; 43 43 import org.apache.lucene.search.IndexSearcher; 44 import org.apache.lucene.search.MultiTermQuery; 45 import org.apache.lucene.search.MultiTermQuery.ConstantScoreAutoRewrite; 44 46 import org.apache.lucene.search.Query; 45 47 import org.apache.lucene.search.TermRangeFilter; … … 54 56 import org.apache.lucene.util.Version; 55 57 56 import org.apache.lucene.search.MultiTermQuery;57 import org.apache.lucene.search.MultiTermQuery.ConstantScoreAutoRewrite;58 58 59 59 public class GS2LuceneQuery extends SharedSoleneQuery … … 152 152 153 153 // GS2's LuceneWrapper uses lucene-2.3.2. GS3's LuceneWrapper3 works with lucene-3.3.0. 154 // This change in lucene core library for GS3 had the side-effect that searching on 155 // "econom*" didn't display what terms it was searching for, whereas it had done so in GS2. 154 // This change in lucene core library for GS3 (present since after version 2.4.1) had the 155 // side-effect that searching on "econom*" didn't display what terms it was searching for, 156 // whereas it had done so in GS2. 156 157 157 158 // The details of this problem and its current solution are explained in the ticket 158 159 // http://trac.greenstone.org/ticket/845 159 160 160 // We need to change the settings for rewriteMethod in order to get searches on wildcards to 161 // produce search terms again when the query is rewritten. 161 // We need to change the settings for the rewriteMethod in order to get searches on wildcards 162 // to produce search terms again when the query gets rewritten. 163 164 // We try, in order: 165 // 1. RewriteMethod set to BooleanQuery, to get it working as in GS2 which uses lucene-2.3.2 166 // it will expand wildcard searches to its terms when searching at both section AND doc level. 167 // If that throws a TooManyClauses exception (like when searching for "a*" over lucene demo collection) 168 // 2. Then try a custom rewriteMethod which sets termCountCutoff=350 and docCountPercent cutoff=0.1% 169 // If that throws a TooManyClauses exception (could perhaps happen if the collection has a huge number of docs 170 // 3. Then try the default apache rewriteMethod with its optimum defaults of 171 // termCountCutoff=350 and docCountPercent cutoff=0.1% 172 // See http://lucene.apache.org/core/3_6_1/api/core/org/apache/lucene/search/MultiTermQuery.html 162 173 163 174 if(query instanceof MultiTermQuery) { 164 165 // default docCountPercent=0.1; default termCountCutoff=350166 167 // Creating custom cutoff values, taking into account of existing cutoff values168 MultiTermQuery.ConstantScoreAutoRewrite customRewriteMethod = new MultiTermQuery.ConstantScoreAutoRewrite();169 customRewriteMethod.setDocCountPercent(100.0);//MultiTermQuery.ConstantScoreAutoRewrite.DEFAULT_DOC_COUNT_PERCENT);170 customRewriteMethod.setTermCountCutoff(350);171 172 175 MultiTermQuery multiTermQuery = (MultiTermQuery)query; 173 multiTermQuery.setRewriteMethod(customRewriteMethod); 174 175 // the above works when searching with wildcards over sections, the following also 176 // works on book searches, but has been discouraged as it can throw an exception if 177 // the number of terms exceeds BooleanQuery.getMaxClauseCount(). 178 // http://lucene.apache.org/core/3_6_1/api/core/org/apache/lucene/search/MultiTermQuery.html 179 180 //multiTermQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);//MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); 181 } 182 183 query = query.rewrite(reader); 184 176 multiTermQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); 177 // less CPU intensive than MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) 178 } 179 180 try { 181 query = query.rewrite(reader); 182 } 183 catch(BooleanQuery.TooManyClauses clauseException) { 184 // Example test case: try searching the lucene demo collection for "a*" 185 // and you'll hit this exception 186 187 lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR); 188 189 if(query instanceof MultiTermQuery) { 190 191 // CustomRewriteMethod: setting the docCountPercent cutoff to a custom 100%. 192 // This will at least expand the query to its terms when searching with wildcards at section-level 193 // (though it doesn't seem to work for doc-level searches, no matter what the cutoffs are set to). 194 195 MultiTermQuery.ConstantScoreAutoRewrite customRewriteMethod = new MultiTermQuery.ConstantScoreAutoRewrite(); 196 customRewriteMethod.setDocCountPercent(100.0); 197 customRewriteMethod.setTermCountCutoff(350); // same as default 198 199 MultiTermQuery multiTermQuery = (MultiTermQuery)query; 200 multiTermQuery.setRewriteMethod(customRewriteMethod); 201 try { 202 query = query.rewrite(reader); 203 } 204 catch(BooleanQuery.TooManyClauses clauseExceptionAgain) { 205 206 // do what the code originally did: use the default rewriteMethod which 207 // uses a default docCountPercent=0.1 (%) and termCountCutoff=350 208 209 multiTermQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); 210 query = query.rewrite(reader); 211 } 212 } 213 } 214 185 215 // Get the list of expanded query terms and their frequencies 186 216 // num docs matching, and total frequency … … 285 315 lucene_query_result.setError(LuceneQueryResult.PARSE_ERROR); 286 316 } 287 catch ( TooManyClauses too_many_clauses_exception) {317 catch (BooleanQuery.TooManyClauses too_many_clauses_exception) { 288 318 lucene_query_result.setError(LuceneQueryResult.TOO_MANY_CLAUSES_ERROR); 289 319 }
Note:
See TracChangeset
for help on using the changeset viewer.