- Timestamp:
- 2014-08-06T19:27:07+12:00 (10 years ago)
- Location:
- gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/service/GS2SolrSearch.java
r29142 r29170 251 251 String value = (String) m.getValue(); 252 252 253 ///System.err.println("### GS2SolrSearch.java: name " + name + " - value " + value); 254 253 255 if (name.equals(MAXDOCS_PARAM) && !value.equals("")) 254 256 { -
gs3-extensions/solr/trunk/src/src/java/org/greenstone/gsdl3/util/SolrQueryWrapper.java
r29142 r29170 29 29 import java.net.URLDecoder; 30 30 import java.util.ArrayList; 31 import java.util.Collection; 31 32 import java.util.HashMap; 33 import java.util.Iterator; 32 34 import java.util.List; 35 import java.util.Set; 36 import java.util.HashSet; 33 37 34 38 import org.apache.log4j.Logger; 39 import org.apache.solr.client.solrj.SolrQuery; // subclass of ModifiableSolrParams 35 40 import org.apache.solr.client.solrj.SolrServer; 36 41 import org.apache.solr.client.solrj.SolrServerException; 42 import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 37 43 import org.apache.solr.client.solrj.response.QueryResponse; 44 import org.apache.solr.client.solrj.response.TermsResponse; 45 46 import org.apache.solr.core.CoreContainer; 47 import org.apache.solr.core.SolrCore; 48 38 49 import org.apache.solr.common.SolrDocument; 39 50 import org.apache.solr.common.SolrDocumentList; … … 41 52 import org.greenstone.LuceneWrapper4.SharedSoleneQuery; 42 53 import org.greenstone.LuceneWrapper4.SharedSoleneQueryResult; 54 55 import org.apache.lucene.search.Query; // Query, TermQuery, BooleanQuery, BooleanClause and more 56 import org.apache.lucene.index.IndexReader; 57 import org.apache.lucene.index.Term; 58 import org.apache.solr.search.QParser; 59 import org.apache.solr.search.SolrIndexSearcher; 60 import org.apache.solr.request.LocalSolrQueryRequest; 43 61 44 62 import com.google.gson.Gson; … … 122 140 return true; 123 141 } 142 143 144 /** Extracts the query terms from the query string. The query string can be a boolean 145 * combination of the various search fields with their search terms or phrases 146 */ 147 public Term[] getTerms(SolrQuery solrQuery, String query_string) 148 { 149 Term terms[] = null; 150 151 if(solr_core instanceof EmbeddedSolrServer) { 152 EmbeddedSolrServer solrServer = (EmbeddedSolrServer)solr_core; 153 154 CoreContainer coreContainer = solrServer.getCoreContainer(); 155 156 Collection<SolrCore> solrCores = coreContainer.getCores(); 157 if(!solrCores.isEmpty()) { 158 Iterator<SolrCore> coreIterator = solrCores.iterator(); 159 160 // Just use the first core, since the term frequency of any term is the same regardless of core 161 if(coreIterator.hasNext()) { 162 SolrCore solrCore = coreIterator.next(); 163 164 165 LocalSolrQueryRequest solrQueryRequest = new LocalSolrQueryRequest(solrCore, solrQuery); 166 Query parsedQuery = null; 167 168 try { 169 170 // get the qparser, default is LuceneQParserPlugin, which is called "lucene" see http://wiki.apache.org/solr/QueryParser 171 QParser qParser = QParser.getParser(query_string, "lucene", solrQueryRequest); 172 parsedQuery = qParser.getQuery(); 173 174 // For PrefixQuery or WildCardQuery (a subclass of AutomatonQuery, incl RegexpQ), 175 // like ZZ:econom* and ZZ:*date/regex queries, Query.extractTerms() throws an Exception 176 // because it has not done the Query.rewrite() step yet. So do that manually for them. 177 // This still doesn't provide us with the terms that econom* or *date break down into. 178 179 //if(parsedQuery instanceof PrefixQuery || parsedQuery instanceof AutomatonQuery) { 180 // Should we just check superclass MultiTermQuery? 181 // Can be a BooleanQuery containing PrefixQuery/WildCardQuery among its clauses, so 182 // just test for * in the query_string to determine if we need to do a rewrite() or not 183 if(query_string.contains("*")) { 184 SolrIndexSearcher searcher = solrQueryRequest.getSearcher(); 185 IndexReader indexReader = searcher.getIndexReader(); // returns a DirectoryReader 186 parsedQuery = parsedQuery.rewrite(indexReader); // gets rewritten to ConstantScoreQuery 187 } 188 189 //System.err.println("#### Query type was: " + parsedQuery.getClass()); 190 //logger.error("#### Query type was: " + parsedQuery.getClass()); 191 192 // extract the terms 193 Set<Term> extractedQueryTerms = new HashSet<Term>(); 194 parsedQuery.extractTerms(extractedQueryTerms); 195 196 terms = new Term[extractedQueryTerms.size()]; 197 198 Iterator<Term> termsIterator = extractedQueryTerms.iterator(); 199 for(int i = 0; termsIterator.hasNext(); i++) { 200 Term term = termsIterator.next(); 201 ///System.err.println("#### Found query term: " + term); 202 ///logger.error("#### Found query term: " + term); 203 204 terms[i] = term; //(term.field(), term.text()); 205 } 206 207 } catch(Exception queryParseException) { 208 queryParseException.printStackTrace(); 209 System.err.println("Exception when parsing query: " + queryParseException.getMessage()); 210 System.err.println("#### Query type was: " + parsedQuery.getClass()); 211 logger.error("#### Query type was: " + parsedQuery.getClass()); 212 } 213 } 214 215 } else { 216 System.err.println("#### Not an EmbeddedSolrServer. This shouldn't happen."); 217 logger.error("#### Not an EmbeddedSolrServer. This shouldn't happen."); 218 } 219 } 220 221 return terms; 222 } 124 223 125 224 public SharedSoleneQueryResult runQuery(String query_string) … … 205 304 } 206 305 207 ModifiableSolrParams solrParams = new ModifiableSolrParams(); 208 solrParams.set("q", query_string); 209 // sort param, like "score desc" or "byORG asc" 210 solrParams.set("sort", this.sort_field+" "+this.sort_order); 211 // which result to start from 212 solrParams.set("start", start_results); 213 // how many results per "page" 214 solrParams.set("rows", (end_results - start_results) + 1); 215 // which fields to return for each document 216 solrParams.set("fl", "docOID score"); 217 // turn on the termsComponent 218 solrParams.set("terms", true); 219 // which field to get the terms from 220 solrParams.set("terms.fl", "ZZ"); 306 307 SolrQuery solrQuery = new SolrQuery(query_string); 308 solrQuery.addSort(this.sort_field, SolrQuery.ORDER.valueOf(this.sort_order)); // sort param, like "score desc" or "byORG asc" 309 solrQuery.setStart(start_results); // which result to start from 310 solrQuery.setRows((end_results - start_results) + 1); // how many results per "page" 311 312 // http://lucene.472066.n3.nabble.com/get-term-frequency-just-only-keywords-search-td4084510.html 313 // WORKS (search didx core): 314 //TI:farming 315 //docOID,score,termfreq(TI,'farming'),totaltermfreq(TI,'farming') 316 317 318 // which fields to return for each document, we'll add the request for totaltermfreq later 319 // fl=docOID score termfreq(TI,'farming') totaltermfreq(TI,'farming') 320 solrQuery.setFields("docOID", "score"); //solrParams.set("fl", "docOID score totaltermfreq(field,'queryterm')"); 321 322 //solrQuery.setTerms(true); // turn on the termsComponent 323 //solrQuery.set("terms.fl", "ZZ"); // which field to get the terms from. ModifiableSolrParams method 324 325 // http://wiki.apache.org/solr/TermVectorComponent and https://cwiki.apache.org/confluence/display/solr/The+Term+Vector+Component 326 // http://lucene.472066.n3.nabble.com/get-term-frequency-just-only-keywords-search-td4084510.html 327 // http://stackoverflow.com/questions/13031534/word-frequency-in-solr 328 // http://wiki.apache.org/solr/FunctionQuery#tf and #termfreq and #totaltermfreq 329 // https://wiki.apache.org/solr/TermsComponent 330 331 //solrParams.set("tv.tf", true);// turn on the terms vector Component 332 //solrParams.set("tv.fl", "ZZ");// which field to get the terms from /// ZZ 333 221 334 222 335 if (_facets.size() > 0) 223 336 { 224 337 // enable facet counts in the query response 225 solr Params.set("facet", "true");338 solrQuery.setFacet(true); //solrParams.set("facet", "true"); 226 339 for (int i = 0; i < _facets.size(); i++) 227 340 { 228 341 // add this field as a facet 229 solrParams.add("facet.field", _facets.get(i)); 230 } 231 } 232 342 solrQuery.addFacetField(_facets.get(i)); // solrParams.add("facet.field", _facets.get(i)); 343 } 344 } 345 346 // get the individual terms that make up the query, then request solr to return the totaltermfreq for each term 347 Term[] terms = getTerms(solrQuery, query_string); 348 if(terms != null) { 349 for(int i = 0; i < terms.length; i++) { 350 Term term = terms[i]; 351 String field = term.field(); 352 String queryTerm = term.text(); 353 // totaltermfreq(TI, 'farming') termfreq(TI, 'farming') 354 355 solrQuery.addField("totaltermfreq(" + field + ",'" + queryTerm + "')"); 356 solrQuery.addField("termfreq(" + field + ",'" + queryTerm + "')"); 357 } 358 } 359 360 // do the query 233 361 try 234 362 { 235 QueryResponse solrResponse = solr_core.query(solr Params);363 QueryResponse solrResponse = solr_core.query(solrQuery); //solr_core.query(solrParams); 236 364 SolrDocumentList hits = solrResponse.getResults(); 365 //TermsResponse termResponse = solrResponse.getTermsResponse(); // null unless termvectors=true in schema.xml 237 366 238 367 if (hits != null) … … 253 382 solr_query_result.setEndResults(start_results + hits.size()); 254 383 384 385 // get the first field we're searching in, this will be the fallback field 255 386 int sepIndex = query_string.indexOf(":"); 256 String field = query_string.substring(0, sepIndex);257 String query = query_string.substring(sepIndex + 2, query_string.length() - 1);258 259 solr_query_result.addTerm(query, field, (int) hits.getNumFound(), -1);387 String defaultField = query_string.substring(0, sepIndex); 388 //String query = query_string.substring(sepIndex + 2, query_string.length() - 1); // Replaced by call to getTerms() 389 390 //solr_query_result.addTerm(query, field, (int) hits.getNumFound(), -1); 260 391 261 392 // Output the matching documents … … 263 394 { 264 395 SolrDocument doc = hits.get(i); 265 396 266 397 // Need to think about how to support document term frequency. Make zero for now 267 398 int doc_term_freq = 0; … … 271 402 logger.info("**** docOID = " + docOID); 272 403 logger.info("**** score = " + score); 273 274 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq); 404 405 406 // solr returns each term's totaltermfreq, ttf, at the document level, even though 407 // the ttf is the same for each document. So extract this information just for the first document 408 if(i == 0) { // first document 409 410 if(terms != null) { 411 for(int j = 0; j < terms.length; j++) { 412 Term term = terms[j]; 413 String field = term.field(); 414 String queryTerm = term.text(); 415 416 // totaltermfreq(TI, 'farming') termfreq(TI, 'farming') 417 Long totaltermfreq = (Long)doc.get("totaltermfreq("+field+",'"+queryTerm+"')"); 418 Integer termfreq = (Integer)doc.get("termfreq("+field+",'"+queryTerm+"')"); 419 420 //System.err.println("**** ttf = " + totaltermfreq); 421 //System.err.println("**** tf = " + termfreq); 422 //logger.info("**** ttf = " + totaltermfreq); 423 //logger.info("**** tf = " + termfreq); 424 425 solr_query_result.addTerm(queryTerm, field, (int) hits.getNumFound(), totaltermfreq.intValue()); // long totaltermfreq to int 426 } 427 } else { // no terms extracted from query_string 428 solr_query_result.addTerm(query_string, defaultField, (int) hits.getNumFound(), -1); // no terms 429 } 430 } 431 432 solr_query_result.addDoc(docOID, score.floatValue(), doc_term_freq); // doc_termfreq for which term???? 275 433 } 276 434 } … … 309 467 super.cleanUp(); 310 468 } 469 311 470 }
Note:
See TracChangeset
for help on using the changeset viewer.