source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/solrserver/Greenstone3SearchHandler.java@ 32110

Last change on this file since 32110 was 32110, checked in by ak19, 6 years ago

Forgot to commit bugfix. The bug was that when searching a solr collection, if you switch on a facet containing an apostrophe (try searching solr-demo for query term farm and then selecting the facet with apostrophe), there are 0 search results displayed for that facet despite the facet listing a positive number of matching docs for it. 1. Greenstone3SearchHandler.java and facet-scripts.js contain the fix. 2. GS2SolrSearch.java and SolrQueryWrapper.java just contain further debug statements, some commented out.

File size: 10.5 KB
Line 
1/**********************************************************************
2 *
3 * Greenstone3SearchHandler.java
4 *
5 * Copyright 2015 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27package org.greenstone.solrserver;
28
29import org.apache.solr.handler.component.SearchHandler;
30
31
32import org.slf4j.Logger;
33import org.slf4j.LoggerFactory;
34//import org.apache.log4j.Logger;
35
36import org.apache.solr.client.solrj.SolrQuery; // subclass of ModifiableSolrParams, a subclass of SolrParams
37
38import org.apache.solr.core.CoreContainer;
39import org.apache.solr.core.SolrCore;
40
41import org.apache.lucene.index.IndexReader;
42import org.apache.lucene.index.Term;
43
44import org.apache.lucene.search.BooleanClause;
45import org.apache.lucene.search.BooleanQuery;
46import org.apache.lucene.search.MultiTermQuery;
47import org.apache.lucene.search.Query; // Query, TermQuery, BooleanQuery, BooleanClause and more
48
49
50import org.apache.solr.search.QParser;
51import org.apache.solr.search.SolrIndexSearcher;
52
53import org.apache.solr.request.SolrQueryRequest;
54import org.apache.solr.response.SolrQueryResponse;
55
56import java.util.ArrayList;
57import java.util.Collection;
58import java.util.Iterator;
59import java.util.List;
60import java.util.Set;
61import java.util.HashSet;
62
63/**
64 * This class is a custom Solr RequestHandler that sits on the solr server side (in tomcat's solr webapp)
65 * and when it receives a query request (sent to this SearchHandler), it will expand the query terms
66 * by calling query.rewrite and then request the totaltermfreq and totalfreq for these individual terms.
67 * This class was made necessary by the fact that solr/lucene index locking exceptions occurred when
68 * this code used to be in ext/solr's SolrQueryWrapper.java::getTerms().
69 *
70 * With the customisations in this class, can search a Solr collection for: econom* cat
71 * And the total and term frequencies will be returned for all expanded forms, depending on the analyzer.
72 */
73
74
75// Important page:
76// https://wiki.apache.org/solr/SolrPlugins
77public class Greenstone3SearchHandler extends SearchHandler
78{
79 // IMPORTANT NOTES: 1. Logging doesn't work in this class either with log4j or slf4j,
80 // but System.err goes to catalina.out.
81 // 2. To compile this class, "ant compile" in ext/solr is insufficient. The class file produced
82 // isn't copied into tomcat. Need to do "ant compile-gs3-solrserver".
83
84 //protected static Logger log = LoggerFactory.getLogger(Greenstone3SearchHandler.class);
85 //static Logger logger = LoggerFactory.getLogger(org.greenstone.solrserver.Greenstone3SearchHandler.class.getName());
86
87 protected MultiTermQuery.RewriteMethod currentRewriteMethod
88 = MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
89 // which is less CPU intensive than MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
90
91 // This recursive method calls setRewriteMethod on any MultiTermQueries inside the given (boolean)query,
92 // since by default PrefixQueries like farm* get rewritten to ConstantScoreQueries and don't get expanded.
93 // Calling setRewriteMethod on each MultiTermQuery in query here is useful to later ensure that any
94 // MultiTermQueries like PrefixQueries and WildcardQueries can get expanded,
95 // including when embedded in BooleanQueries.
96 protected Query getSimplified(Query query)
97 {
98
99 // base case
100 if(query instanceof MultiTermQuery) { // PrefixQuery or WildcardQuery
101
102 // for some reason, when a PrefixQuery (e.g. econom*) gets rewritten to a ConstantScoreQuery
103 // it no longer rewrites the query to produce the expanded terms. Need to setRewriteMethod
104 // http://stackoverflow.com/questions/3060636/lucene-score-calculation-with-a-prefixquery
105 // See also http://trac.greenstone.org/ticket/845 and http://trac.greenstone.org/changeset/26157
106
107 MultiTermQuery mtQuery = (MultiTermQuery)query;
108 mtQuery.setRewriteMethod(currentRewriteMethod);
109
110 }
111
112 else if(query instanceof BooleanQuery) {
113
114 BooleanQuery bQuery = (BooleanQuery)query;
115 Iterator<BooleanClause> clauses = bQuery.iterator();
116
117 while(clauses.hasNext()) {
118 BooleanClause clause = clauses.next();
119 Query clauseQuery = clause.getQuery();
120 Query expandedClauseQuery = getSimplified(clauseQuery);
121 clause.setQuery(expandedClauseQuery);
122 }
123 }
124
125 // another type of query, leave as-is
126 return query;
127 }
128
129 protected Query expandQuery(SolrQueryRequest req, Query parsedQuery) throws Exception {
130
131 // calls setRewriteMethod on any MultiTermQueries inside the given (boolean)query,
132 // doing so ensures MultiTermQueries like PrefixQueries and WildcardQueries can get expanded
133 parsedQuery = getSimplified(parsedQuery); // can throw exception
134
135 // now finally rewrite the query to any expanded Prefix- and WildCardQueries contained in here
136 SolrIndexSearcher searcher = req.getSearcher();
137 IndexReader indexReader = searcher.getIndexReader(); // returns a DirectoryReader
138 parsedQuery = parsedQuery.rewrite(indexReader); // used to get rewritten to ConstantScoreQuery
139
140 return parsedQuery;
141 }
142
143 @Override
144 public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
145 {
146
147 // do getTerms() here:
148 // getParams, modify solrparams (q is query_string)
149 // if q exists, then do extractTerms and queryRewrite
150 // then req.setSolrParams
151 // and continue on as before: super.handleRequestBody(req, rsp);
152
153 SolrQuery solrParams = new SolrQuery();
154 solrParams.add(req.getParams());
155
156 //String query_string = "TX:(farming)";
157 String query_string = solrParams.get("q");
158
159
160 if(query_string == null || query_string.equals("")) {
161 //log.error("@@@@@@@@@ " + this.getClass() + " - QUERY STRING EMPTY"); // logging won't work
162 System.err.println("@@@@@@@@@ " + this.getClass() + " - QUERY STRING EMPTY");
163 }
164 else {
165 //System.err.println("@@@ Parsing query_string " + query_string);
166
167
168 QParser qParser = QParser.getParser(query_string, "lucene", req);
169 Query parsedQuery = qParser.getQuery();
170
171 // For PrefixQuery or WildCardQuery (a subclass of AutomatonQuery, incl RegexpQ),
172 // like ZZ:econom* and ZZ:*date/regex queries, Query.extractTerms() throws an Exception
173 // because it has not done the Query.rewrite() step yet. So do that manually for them.
174 // This still doesn't provide us with the terms that econom* or *date break down into.
175
176 //if(parsedQuery instanceof PrefixQuery || parsedQuery instanceof AutomatonQuery) {
177 // Should we just check superclass MultiTermQuery?
178 // Can be a BooleanQuery containing PrefixQuery/WildCardQuery among its clauses, so
179 // just test for * in the query_string to determine if we need to do a rewrite() or not
180 if(query_string.contains("*")) {
181
182 //System.err.println("@@@@ query's class: " + parsedQuery.getClass().getName());
183
184
185 // See also common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneQuery.java
186 // Of http://trac.greenstone.org/changeset/26157 and http://trac.greenstone.org/ticket/845
187 try {
188 parsedQuery = expandQuery(req, parsedQuery);
189
190 } catch(BooleanQuery.TooManyClauses ex) { // hits this exception if searching solr coll for "a*"
191 System.err.println("@@@@ Encountered TooManyClauses Exception: " + ex.getMessage());
192 System.err.println("@@@@ Trying CustomRewriteMethod");
193
194 MultiTermQuery.ConstantScoreAutoRewrite customRewriteMethod = new MultiTermQuery.ConstantScoreAutoRewrite();
195 customRewriteMethod.setDocCountPercent(100.0);
196 customRewriteMethod.setTermCountCutoff(350); // same as default
197 this.currentRewriteMethod = customRewriteMethod;
198
199 try {
200 // try query.rewrite() again now
201 parsedQuery = expandQuery(req, parsedQuery);
202
203 } catch(BooleanQuery.TooManyClauses bex) { // still too many clauses
204 System.err.println("@@@@ Encountered TooManyClauses Exception despite CustomRewriteMethod: "
205 + bex.getMessage());
206 System.err.println("@@@@ Using default Multiterm RewriteMethod");
207
208 // do what the code originally did: use the default rewriteMethod which
209 // uses a default docCountPercent=0.1 (%) and termCountCutoff=350
210 currentRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
211
212 // this will succeed, but probably won't expand * in Prefix- and WildcardQueries
213 parsedQuery = expandQuery(req, parsedQuery);
214 }
215 }
216 //System.err.println("@@@@ rewritten query is now: " + parsedQuery);
217 }
218
219
220 // extract the terms
221 Set<Term> extractedQueryTerms = new HashSet<Term>();
222 parsedQuery.extractTerms(extractedQueryTerms);
223
224 // need to sort the terms for presentation, since a Set is unsorted
225 List<Term> termsList = new ArrayList<Term>(extractedQueryTerms);
226 java.util.Collections.sort(termsList); // Term implements Comparable, terms sorted alphabetically
227
228
229
230 Iterator<Term> termsIterator = termsList.iterator();//extractedQueryTerms.iterator();
231 while(termsIterator.hasNext()) {
232 Term term = termsIterator.next();
233 //System.err.println("#### Found query term: " + term);
234
235 String field = term.field();
236 String queryTerm = term.text();
237
238 // totaltermfreq(TI, 'farming')
239 // termfreq(TI, 'farming')
240 //System.err.println("@@@@ SOLR FACET queryTerm: " + queryTerm);
241 solrParams.addField("totaltermfreq(" + field + ",'" + queryTerm + "')");
242 solrParams.addField("termfreq(" + field + ",'" + queryTerm + "')");
243
244 // handle the special case of apostrophes in facet query terms
245 // (facet_scripts.js does the other half of handling them)
246 query_string = query_string.replace("%27", "'");
247 solrParams.set("q", query_string);
248
249 System.err.println("@@@@ SOLR FACET query_string: " + query_string);
250 }
251 }
252
253 // set to modified SolrQuery SolrParams
254 req.setParams(solrParams);
255 // send off modified request
256 super.handleRequestBody(req, rsp);
257 }
258}
Note: See TracBrowser for help on using the repository browser.