Context Navigation

Greenstone3SearchHandler.java@ 32110

Last change on this file since 32110 was 32110, checked in by ak19, 6 years ago

Forgot to commit bugfix. The bug was that when searching a solr collection, if you switch on a facet containing an apostrophe (try searching solr-demo for query term farm and then selecting the facet with apostrophe), there are 0 search results displayed for that facet despite the facet listing a positive number of matching docs for it. 1. Greenstone3SearchHandler.java and facet-scripts.js contain the fix. 2. GS2SolrSearch.java and SolrQueryWrapper.java just contain further debug statements, some commented out.

File size: 10.5 KB

Line
1	/**********************************************************************
2	*
3	* Greenstone3SearchHandler.java
4	*
5	* Copyright 2015 The New Zealand Digital Library Project
6	*
7	* A component of the Greenstone digital library software
8	* from the New Zealand Digital Library Project at the
9	* University of Waikato, New Zealand.
10	*
11	* This program is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	*
25	*********************************************************************/
26
27	package org.greenstone.solrserver;
28
29	import org.apache.solr.handler.component.SearchHandler;
30
31
32	import org.slf4j.Logger;
33	import org.slf4j.LoggerFactory;
34	//import org.apache.log4j.Logger;
35
36	import org.apache.solr.client.solrj.SolrQuery; // subclass of ModifiableSolrParams, a subclass of SolrParams
37
38	import org.apache.solr.core.CoreContainer;
39	import org.apache.solr.core.SolrCore;
40
41	import org.apache.lucene.index.IndexReader;
42	import org.apache.lucene.index.Term;
43
44	import org.apache.lucene.search.BooleanClause;
45	import org.apache.lucene.search.BooleanQuery;
46	import org.apache.lucene.search.MultiTermQuery;
47	import org.apache.lucene.search.Query; // Query, TermQuery, BooleanQuery, BooleanClause and more
48
49
50	import org.apache.solr.search.QParser;
51	import org.apache.solr.search.SolrIndexSearcher;
52
53	import org.apache.solr.request.SolrQueryRequest;
54	import org.apache.solr.response.SolrQueryResponse;
55
56	import java.util.ArrayList;
57	import java.util.Collection;
58	import java.util.Iterator;
59	import java.util.List;
60	import java.util.Set;
61	import java.util.HashSet;
62
63	/**
64	* This class is a custom Solr RequestHandler that sits on the solr server side (in tomcat's solr webapp)
65	* and when it receives a query request (sent to this SearchHandler), it will expand the query terms
66	* by calling query.rewrite and then request the totaltermfreq and totalfreq for these individual terms.
67	* This class was made necessary by the fact that solr/lucene index locking exceptions occurred when
68	* this code used to be in ext/solr's SolrQueryWrapper.java::getTerms().
69	*
70	* With the customisations in this class, can search a Solr collection for: econom* cat
71	* And the total and term frequencies will be returned for all expanded forms, depending on the analyzer.
72	*/
73
74
75	// Important page:
76	// https://wiki.apache.org/solr/SolrPlugins
77	public class Greenstone3SearchHandler extends SearchHandler
78	{
79	// IMPORTANT NOTES: 1. Logging doesn't work in this class either with log4j or slf4j,
80	// but System.err goes to catalina.out.
81	// 2. To compile this class, "ant compile" in ext/solr is insufficient. The class file produced
82	// isn't copied into tomcat. Need to do "ant compile-gs3-solrserver".
83
84	//protected static Logger log = LoggerFactory.getLogger(Greenstone3SearchHandler.class);
85	//static Logger logger = LoggerFactory.getLogger(org.greenstone.solrserver.Greenstone3SearchHandler.class.getName());
86
87	protected MultiTermQuery.RewriteMethod currentRewriteMethod
88	= MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
89	// which is less CPU intensive than MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
90
91	// This recursive method calls setRewriteMethod on any MultiTermQueries inside the given (boolean)query,
92	// since by default PrefixQueries like farm* get rewritten to ConstantScoreQueries and don't get expanded.
93	// Calling setRewriteMethod on each MultiTermQuery in query here is useful to later ensure that any
94	// MultiTermQueries like PrefixQueries and WildcardQueries can get expanded,
95	// including when embedded in BooleanQueries.
96	protected Query getSimplified(Query query)
97	{
98
99	// base case
100	if(query instanceof MultiTermQuery) { // PrefixQuery or WildcardQuery
101
102	// for some reason, when a PrefixQuery (e.g. econom*) gets rewritten to a ConstantScoreQuery
103	// it no longer rewrites the query to produce the expanded terms. Need to setRewriteMethod
104	// http://stackoverflow.com/questions/3060636/lucene-score-calculation-with-a-prefixquery
105	// See also http://trac.greenstone.org/ticket/845 and http://trac.greenstone.org/changeset/26157
106
107	MultiTermQuery mtQuery = (MultiTermQuery)query;
108	mtQuery.setRewriteMethod(currentRewriteMethod);
109
110	}
111
112	else if(query instanceof BooleanQuery) {
113
114	BooleanQuery bQuery = (BooleanQuery)query;
115	Iterator<BooleanClause> clauses = bQuery.iterator();
116
117	while(clauses.hasNext()) {
118	BooleanClause clause = clauses.next();
119	Query clauseQuery = clause.getQuery();
120	Query expandedClauseQuery = getSimplified(clauseQuery);
121	clause.setQuery(expandedClauseQuery);
122	}
123	}
124
125	// another type of query, leave as-is
126	return query;
127	}
128
129	protected Query expandQuery(SolrQueryRequest req, Query parsedQuery) throws Exception {
130
131	// calls setRewriteMethod on any MultiTermQueries inside the given (boolean)query,
132	// doing so ensures MultiTermQueries like PrefixQueries and WildcardQueries can get expanded
133	parsedQuery = getSimplified(parsedQuery); // can throw exception
134
135	// now finally rewrite the query to any expanded Prefix- and WildCardQueries contained in here
136	SolrIndexSearcher searcher = req.getSearcher();
137	IndexReader indexReader = searcher.getIndexReader(); // returns a DirectoryReader
138	parsedQuery = parsedQuery.rewrite(indexReader); // used to get rewritten to ConstantScoreQuery
139
140	return parsedQuery;
141	}
142
143	@Override
144	public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
145	{
146
147	// do getTerms() here:
148	// getParams, modify solrparams (q is query_string)
149	// if q exists, then do extractTerms and queryRewrite
150	// then req.setSolrParams
151	// and continue on as before: super.handleRequestBody(req, rsp);
152
153	SolrQuery solrParams = new SolrQuery();
154	solrParams.add(req.getParams());
155
156	//String query_string = "TX:(farming)";
157	String query_string = solrParams.get("q");
158
159
160	if(query_string == null \|\| query_string.equals("")) {
161	//log.error("@@@@@@@@@ " + this.getClass() + " - QUERY STRING EMPTY"); // logging won't work
162	System.err.println("@@@@@@@@@ " + this.getClass() + " - QUERY STRING EMPTY");
163	}
164	else {
165	//System.err.println("@@@ Parsing query_string " + query_string);
166
167
168	QParser qParser = QParser.getParser(query_string, "lucene", req);
169	Query parsedQuery = qParser.getQuery();
170
171	// For PrefixQuery or WildCardQuery (a subclass of AutomatonQuery, incl RegexpQ),
172	// like ZZ:econom* and ZZ:*date/regex queries, Query.extractTerms() throws an Exception
173	// because it has not done the Query.rewrite() step yet. So do that manually for them.
174	// This still doesn't provide us with the terms that econom* or *date break down into.
175
176	//if(parsedQuery instanceof PrefixQuery \|\| parsedQuery instanceof AutomatonQuery) {
177	// Should we just check superclass MultiTermQuery?
178	// Can be a BooleanQuery containing PrefixQuery/WildCardQuery among its clauses, so
179	// just test for * in the query_string to determine if we need to do a rewrite() or not
180	if(query_string.contains("*")) {
181
182	//System.err.println("@@@@ query's class: " + parsedQuery.getClass().getName());
183
184
185	// See also common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper3/GS2LuceneQuery.java
186	// Of http://trac.greenstone.org/changeset/26157 and http://trac.greenstone.org/ticket/845
187	try {
188	parsedQuery = expandQuery(req, parsedQuery);
189
190	} catch(BooleanQuery.TooManyClauses ex) { // hits this exception if searching solr coll for "a*"
191	System.err.println("@@@@ Encountered TooManyClauses Exception: " + ex.getMessage());
192	System.err.println("@@@@ Trying CustomRewriteMethod");
193
194	MultiTermQuery.ConstantScoreAutoRewrite customRewriteMethod = new MultiTermQuery.ConstantScoreAutoRewrite();
195	customRewriteMethod.setDocCountPercent(100.0);
196	customRewriteMethod.setTermCountCutoff(350); // same as default
197	this.currentRewriteMethod = customRewriteMethod;
198
199	try {
200	// try query.rewrite() again now
201	parsedQuery = expandQuery(req, parsedQuery);
202
203	} catch(BooleanQuery.TooManyClauses bex) { // still too many clauses
204	System.err.println("@@@@ Encountered TooManyClauses Exception despite CustomRewriteMethod: "
205	+ bex.getMessage());
206	System.err.println("@@@@ Using default Multiterm RewriteMethod");
207
208	// do what the code originally did: use the default rewriteMethod which
209	// uses a default docCountPercent=0.1 (%) and termCountCutoff=350
210	currentRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
211
212	// this will succeed, but probably won't expand * in Prefix- and WildcardQueries
213	parsedQuery = expandQuery(req, parsedQuery);
214	}
215	}
216	//System.err.println("@@@@ rewritten query is now: " + parsedQuery);
217	}
218
219
220	// extract the terms
221	Set<Term> extractedQueryTerms = new HashSet<Term>();
222	parsedQuery.extractTerms(extractedQueryTerms);
223
224	// need to sort the terms for presentation, since a Set is unsorted
225	List<Term> termsList = new ArrayList<Term>(extractedQueryTerms);
226	java.util.Collections.sort(termsList); // Term implements Comparable, terms sorted alphabetically
227
228
229
230	Iterator<Term> termsIterator = termsList.iterator();//extractedQueryTerms.iterator();
231	while(termsIterator.hasNext()) {
232	Term term = termsIterator.next();
233	//System.err.println("#### Found query term: " + term);
234
235	String field = term.field();
236	String queryTerm = term.text();
237
238	// totaltermfreq(TI, 'farming')
239	// termfreq(TI, 'farming')
240	//System.err.println("@@@@ SOLR FACET queryTerm: " + queryTerm);
241	solrParams.addField("totaltermfreq(" + field + ",'" + queryTerm + "')");
242	solrParams.addField("termfreq(" + field + ",'" + queryTerm + "')");
243
244	// handle the special case of apostrophes in facet query terms
245	// (facet_scripts.js does the other half of handling them)
246	query_string = query_string.replace("%27", "'");
247	solrParams.set("q", query_string);
248
249	System.err.println("@@@@ SOLR FACET query_string: " + query_string);
250	}
251	}
252
253	// set to modified SolrQuery SolrParams
254	req.setParams(solrParams);
255	// send off modified request
256	super.handleRequestBody(req, rsp);
257	}
258	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/solrserver/Greenstone3SearchHandler.java@ 32110

Download in other formats: