Changeset 25854
- Timestamp:
- 2012-06-28T11:29:07+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java
r25635 r25854 1 1 /* 2 * GS2LuceneSearch.java3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org4 *5 * This program is free software; you can redistribute it and/or modify6 * the Free Software Foundation; either version 2 of the License, or7 * (at your option) any later version.8 *9 * This program is distributed in the hope that it will be useful,10 * but WITHOUT ANY WARRANTY; without even the implied warranty of11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the12 * GNU General Public License for more details.13 *14 * You should have received a copy of the GNU General Public License15 * along with this program; if not, write to the Free Software16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.17 */2 * GS2LuceneSearch.java 3 * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 18 19 19 package org.greenstone.gsdl3.service; 20 20 21 21 // Greenstone classes 22 import org.greenstone.gsdl3.util.*;23 24 // XML classes25 import org.w3c.dom.Element;26 import org.w3c.dom.NodeList;27 import org.w3c.dom.Document;28 // java classes29 import java.util.ArrayList;30 import java.util.HashMap;31 22 import java.io.File; 32 23 import java.io.Serializable; 24 import java.util.HashMap; 33 25 import java.util.Iterator; 26 import java.util.Map; 34 27 import java.util.Set; 35 import java.util.Map;36 28 import java.util.Vector; 37 29 38 // Logging39 30 import org.apache.log4j.Logger; 40 41 31 import org.greenstone.LuceneWrapper3.GS2LuceneQuery; 42 32 import org.greenstone.LuceneWrapper3.LuceneQueryResult; 33 import org.greenstone.gsdl3.util.GSFile; 34 import org.greenstone.gsdl3.util.GSXML; 35 import org.w3c.dom.Element; 43 36 44 37 public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch 45 38 { 46 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 47 48 private GS2LuceneQuery lucene_src=null; 49 50 public GS2LuceneSearch() 51 { 52 this.lucene_src = new GS2LuceneQuery(); 53 } 54 55 56 public void cleanUp() { 57 super.cleanUp(); 58 this.lucene_src.cleanUp(); 59 } 60 61 62 /** methods to handle actually doing the query */ 63 64 /** do any initialisation of the query object */ 65 protected boolean setUpQueryer(HashMap<String, Serializable> params) { 66 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar; 67 68 String index = "didx"; 69 String physical_index_language_name=null; 70 String physical_sub_index_name=null; 71 int maxdocs = 100; 72 int hits_per_page = 20; 73 int start_page = 1; 74 // set up the query params 75 Set entries = params.entrySet(); 76 Iterator i = entries.iterator(); 77 while (i.hasNext()) { 78 Map.Entry m = (Map.Entry)i.next(); 79 String name = (String)m.getKey(); 80 String value = (String)m.getValue(); 81 82 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 83 maxdocs = Integer.parseInt(value); 84 } else if (name.equals(HITS_PER_PAGE_PARAM)) { 85 hits_per_page = Integer.parseInt(value); 86 } else if (name.equals(START_PAGE_PARAM)) { 87 start_page = Integer.parseInt(value); 88 89 } else if (name.equals(MATCH_PARAM)) { 90 if (value.equals(MATCH_PARAM_ALL)) { 91 this.lucene_src.setDefaultConjunctionOperator("AND"); 92 } else{ 93 this.lucene_src.setDefaultConjunctionOperator("OR"); 94 } 95 } else if (name.equals(RANK_PARAM)) { 96 if (value.equals(RANK_PARAM_RANK_VALUE)) { 97 value = null; 98 } 99 this.lucene_src.setSortField(value); 100 } else if (name.equals(LEVEL_PARAM)) { 101 if (value.toUpperCase().equals("SEC")){ 102 index = "sidx"; 103 } 104 else { 105 index = "didx"; 106 } 107 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { 108 physical_sub_index_name=value; 109 } else if (name.equals(INDEX_LANGUAGE_PARAM)){ 110 physical_index_language_name=value; 111 } // ignore any others 112 } 113 // set up start and end results if necessary 114 int start_results = 1; 115 if (start_page != 1) { 116 start_results = ((start_page-1) * hits_per_page) + 1; 117 } 118 int end_results = hits_per_page * start_page; 119 this.lucene_src.setStartResults(start_results); 120 this.lucene_src.setEndResults(end_results); 121 122 if (index.equals("sidx") || index.equals("didx")){ 123 if (physical_sub_index_name!=null) { 124 index+=physical_sub_index_name; 125 } 126 if (physical_index_language_name!=null){ 127 index+=physical_index_language_name; 128 } 129 } 130 131 this.lucene_src.setIndexDir(indexdir+index); 132 this.lucene_src.initialise(); 133 return true; 134 } 135 136 /** do the query */ 137 protected Object runQuery(String query) { 138 try { 139 LuceneQueryResult lqr=this.lucene_src.runQuery(query); 140 return lqr; 141 } catch (Exception e) { 142 logger.error ("Exception happened in runQuery(): ", e); 143 } 144 145 return null; 146 } 147 148 /** get the total number of docs that match */ 149 protected long numDocsMatched(Object query_result) { 150 return ((LuceneQueryResult)query_result).getTotalDocs(); 151 152 } 153 154 /** get the list of doc ids */ 155 protected String [] getDocIDs(Object query_result) { 156 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 157 String [] doc_nums = new String [docs.size()]; 158 for (int d = 0; d < docs.size(); d++) { 159 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_; 160 doc_nums[d] = doc_num; 161 } 162 return doc_nums; 163 } 164 165 /** get the list of doc ranks */ 166 protected String [] getDocRanks(Object query_result) { 167 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 168 String [] doc_ranks = new String [docs.size()]; 169 for (int d = 0; d < docs.size(); d++) { 170 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_); 171 } 172 return doc_ranks; 173 } 174 175 /** add in term info if available */ 176 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, 177 Object query_result) { 178 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 179 180 Vector terms = ((LuceneQueryResult)query_result).getTerms(); 181 for (int t = 0; t < terms.size(); t++) { 182 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t); 183 184 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 185 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 186 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 187 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 188 term_elem.setAttribute(FIELD_ATT, term_info.field_); 189 term_list.appendChild(term_elem); 190 } 191 192 Vector stopwords = ((LuceneQueryResult)query_result).getStopWords(); 193 for (int t = 0; t < stopwords.size(); t++) { 194 String stopword = (String) stopwords.get(t); 195 196 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 197 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 198 term_list.appendChild(stopword_elem); 199 } 200 201 return true; 202 } 39 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 40 41 private GS2LuceneQuery lucene_src = null; 42 43 public GS2LuceneSearch() 44 { 45 this.lucene_src = new GS2LuceneQuery(); 46 } 47 48 public void cleanUp() 49 { 50 super.cleanUp(); 51 this.lucene_src.cleanUp(); 52 } 53 54 /** methods to handle actually doing the query */ 55 56 /** do any initialisation of the query object */ 57 protected boolean setUpQueryer(HashMap<String, Serializable> params) 58 { 59 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index" + File.separatorChar; 60 61 String index = "didx"; 62 String physical_index_language_name = null; 63 String physical_sub_index_name = null; 64 int maxdocs = 100; 65 int hits_per_page = 20; 66 int start_page = 1; 67 // set up the query params 68 Set entries = params.entrySet(); 69 Iterator i = entries.iterator(); 70 while (i.hasNext()) 71 { 72 Map.Entry m = (Map.Entry) i.next(); 73 String name = (String) m.getKey(); 74 String value = (String) m.getValue(); 75 76 if (name.equals(MAXDOCS_PARAM) && !value.equals("")) 77 { 78 maxdocs = Integer.parseInt(value); 79 } 80 else if (name.equals(HITS_PER_PAGE_PARAM)) 81 { 82 hits_per_page = Integer.parseInt(value); 83 } 84 else if (name.equals(START_PAGE_PARAM)) 85 { 86 start_page = Integer.parseInt(value); 87 88 } 89 else if (name.equals(MATCH_PARAM)) 90 { 91 if (value.equals(MATCH_PARAM_ALL)) 92 { 93 this.lucene_src.setDefaultConjunctionOperator("AND"); 94 } 95 else 96 { 97 this.lucene_src.setDefaultConjunctionOperator("OR"); 98 } 99 } 100 else if (name.equals(RANK_PARAM)) 101 { 102 if (value.equals(RANK_PARAM_RANK_VALUE)) 103 { 104 value = null; 105 } 106 this.lucene_src.setSortField(value); 107 } 108 else if (name.equals(LEVEL_PARAM)) 109 { 110 if (value.toUpperCase().equals("SEC")) 111 { 112 index = "sidx"; 113 } 114 else 115 { 116 index = "didx"; 117 } 118 } 119 else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) 120 { 121 physical_sub_index_name = value; 122 } 123 else if (name.equals(INDEX_LANGUAGE_PARAM)) 124 { 125 physical_index_language_name = value; 126 } // ignore any others 127 } 128 // set up start and end results if necessary 129 int start_results = 1; 130 if (start_page != 1) 131 { 132 start_results = ((start_page - 1) * hits_per_page) + 1; 133 } 134 int end_results = hits_per_page * start_page; 135 this.lucene_src.setStartResults(start_results); 136 this.lucene_src.setEndResults(end_results); 137 138 if (index.equals("sidx") || index.equals("didx")) 139 { 140 if (physical_sub_index_name != null) 141 { 142 index += physical_sub_index_name; 143 } 144 if (physical_index_language_name != null) 145 { 146 index += physical_index_language_name; 147 } 148 } 149 150 this.lucene_src.setIndexDir(indexdir + index); 151 this.lucene_src.initialise(); 152 return true; 153 } 154 155 /** do the query */ 156 protected Object runQuery(String query) 157 { 158 try 159 { 160 LuceneQueryResult lqr = this.lucene_src.runQuery(query); 161 return lqr; 162 } 163 catch (Exception e) 164 { 165 logger.error("Exception happened in runQuery(): ", e); 166 } 167 168 return null; 169 } 170 171 /** get the total number of docs that match */ 172 protected long numDocsMatched(Object query_result) 173 { 174 return ((LuceneQueryResult) query_result).getTotalDocs(); 175 176 } 177 178 /** get the list of doc ids */ 179 protected String[] getDocIDs(Object query_result) 180 { 181 Vector docs = ((LuceneQueryResult) query_result).getDocs(); 182 String[] doc_nums = new String[docs.size()]; 183 for (int d = 0; d < docs.size(); d++) 184 { 185 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_; 186 doc_nums[d] = doc_num; 187 } 188 return doc_nums; 189 } 190 191 /** get the list of doc ranks */ 192 protected String[] getDocRanks(Object query_result) 193 { 194 Vector docs = ((LuceneQueryResult) query_result).getDocs(); 195 String[] doc_ranks = new String[docs.size()]; 196 for (int d = 0; d < docs.size(); d++) 197 { 198 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_); 199 } 200 return doc_ranks; 201 } 202 203 /** add in term info if available */ 204 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result) 205 { 206 String query_level = (String) params.get(LEVEL_PARAM); // the current query level 207 208 Vector terms = ((LuceneQueryResult) query_result).getTerms(); 209 for (int t = 0; t < terms.size(); t++) 210 { 211 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t); 212 213 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 214 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 215 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 216 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 217 term_elem.setAttribute(FIELD_ATT, term_info.field_); 218 term_list.appendChild(term_elem); 219 } 220 221 Vector stopwords = ((LuceneQueryResult) query_result).getStopWords(); 222 for (int t = 0; t < stopwords.size(); t++) 223 { 224 String stopword = (String) stopwords.get(t); 225 226 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 227 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 228 term_list.appendChild(stopword_elem); 229 } 230 231 return true; 232 } 203 233 }
Note:
See TracChangeset
for help on using the changeset viewer.