Changeset 24722
- Timestamp:
- 2011-10-05T14:43:47+13:00 (13 years ago)
- Location:
- main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneSearch.java
r24024 r24722 41 41 import org.greenstone.LuceneWrapper.LuceneQueryResult; 42 42 43 public class GS2LuceneSearch 44 extends AbstractGS2FieldSearch 43 public class GS2LuceneSearch extends SharedSoleneGS2FieldSearch 45 44 { 46 protected static final String RANK_PARAM_RANK_VALUE = "rank"; 47 48 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 49 50 private GS2LuceneQuery lucene_src=null; 51 52 public GS2LuceneSearch() 53 { 54 this.lucene_src = new GS2LuceneQuery(); 55 // Lucene uses double operators, not single 56 AND_OPERATOR = "&&"; 57 OR_OPERATOR = "||"; 58 59 does_paging = true; 60 does_chunking = true; 61 } 62 63 public void cleanUp() { 64 super.cleanUp(); 65 this.lucene_src.cleanUp(); 66 } 67 68 /** configure this service */ 69 public boolean configure(Element info, Element extra_info) 70 { 71 if (!super.configure(info, extra_info)){ 72 return false; 73 } 45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneSearch.class.getName()); 46 47 private GS2LuceneQuery lucene_src=null; 48 49 public GS2LuceneSearch() 50 { 51 this.lucene_src = new GS2LuceneQuery(); 52 } 53 54 55 public void cleanUp() { 56 super.cleanUp(); 57 this.lucene_src.cleanUp(); 58 } 59 60 61 /** methods to handle actually doing the query */ 62 63 /** do any initialisation of the query object */ 64 protected boolean setUpQueryer(HashMap params) { 65 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar; 66 67 String index = "didx"; 68 String physical_index_language_name=null; 69 String physical_sub_index_name=null; 70 int maxdocs = 100; 71 int hits_per_page = 20; 72 int start_page = 1; 73 // set up the query params 74 Set entries = params.entrySet(); 75 Iterator i = entries.iterator(); 76 while (i.hasNext()) { 77 Map.Entry m = (Map.Entry)i.next(); 78 String name = (String)m.getKey(); 79 String value = (String)m.getValue(); 80 81 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 82 maxdocs = Integer.parseInt(value); 83 } else if (name.equals(HITS_PER_PAGE_PARAM)) { 84 hits_per_page = Integer.parseInt(value); 85 } else if (name.equals(START_PAGE_PARAM)) { 86 start_page = Integer.parseInt(value); 74 87 75 // Lucene doesn't do case folding or stemming or accent folding at the 76 // moment 77 does_case = false; 78 does_stem = false; 79 does_accent = false; 80 81 return true; 82 } 83 84 /** add in the lucene specific params to TextQuery */ 85 protected void addCustomQueryParams(Element param_list, String lang) 86 { 87 super.addCustomQueryParams(param_list, lang); 88 /** lucenes rank param is based on index fields, not ranked/not */ 89 createParameter(RANK_PARAM, param_list, lang); 90 91 } 92 93 /** create a param and add to the list */ 94 /** we override this to do a special rank param */ 95 protected void createParameter(String name, Element param_list, String lang) 96 { 97 Element param = null; 98 if (name.equals(RANK_PARAM)) { 99 // get the fields 100 ArrayList fields = new ArrayList(); 101 fields.add(RANK_PARAM_RANK_VALUE); 102 ArrayList field_names = new ArrayList(); 103 field_names.add(getTextString("param.sortBy.rank", lang)); 104 getSortByIndexData(fields, field_names, lang); 105 106 param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names ); 107 } 108 if (param != null) { 109 param_list.appendChild(param); 110 } else { 111 super.createParameter(name, param_list, lang); 112 } 113 } 114 115 protected void getSortByIndexData(ArrayList index_ids, ArrayList index_names, String lang) { 116 // the index info - 117 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER); 118 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 119 int len = indexes.getLength(); 120 // now add even if there is only one 121 for (int i=0; i<len; i++) { 122 Element index = (Element)indexes.item(i); 123 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT); 124 if (shortname.equals("") || shortname.equals("ZZ") || shortname.equals("TX")) { 125 continue; 126 } 127 index_ids.add("by"+shortname); 128 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 129 if (display_name.equals("")) { 130 display_name = index.getAttribute(GSXML.NAME_ATT); 131 if (display_name.equals("")) { 132 display_name = shortname; 133 } 134 } 135 index_names.add(display_name); 136 137 } 138 139 } 140 141 /** methods to handle actually doing the query */ 142 143 /** do any initialisation of the query object */ 144 protected boolean setUpQueryer(HashMap params) { 145 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + "index"+File.separatorChar; 146 147 String index = "didx"; 148 String physical_index_language_name=null; 149 String physical_sub_index_name=null; 150 int maxdocs = 100; 151 int hits_per_page = 20; 152 int start_page = 1; 153 // set up the query params 154 Set entries = params.entrySet(); 155 Iterator i = entries.iterator(); 156 while (i.hasNext()) { 157 Map.Entry m = (Map.Entry)i.next(); 158 String name = (String)m.getKey(); 159 String value = (String)m.getValue(); 160 161 if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 162 maxdocs = Integer.parseInt(value); 163 } else if (name.equals(HITS_PER_PAGE_PARAM)) { 164 hits_per_page = Integer.parseInt(value); 165 } else if (name.equals(START_PAGE_PARAM)) { 166 start_page = Integer.parseInt(value); 167 168 } else if (name.equals(MATCH_PARAM)) { 169 if (value.equals(MATCH_PARAM_ALL)) { 170 this.lucene_src.setDefaultConjunctionOperator("AND"); 171 } else{ 172 this.lucene_src.setDefaultConjunctionOperator("OR"); 173 } 174 } else if (name.equals(RANK_PARAM)) { 175 if (value.equals(RANK_PARAM_RANK_VALUE)) { 176 value = null; 177 } 178 this.lucene_src.setSortField(value); 179 } else if (name.equals(LEVEL_PARAM)) { 180 if (value.toUpperCase().equals("SEC")){ 181 index = "sidx"; 182 } 183 else { 184 index = "didx"; 185 } 186 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { 187 physical_sub_index_name=value; 188 } else if (name.equals(INDEX_LANGUAGE_PARAM)){ 189 physical_index_language_name=value; 190 } // ignore any others 191 } 192 // set up start and end results if necessary 193 int start_results = 1; 194 if (start_page != 1) { 195 start_results = ((start_page-1) * hits_per_page) + 1; 196 } 197 int end_results = hits_per_page * start_page; 198 this.lucene_src.setStartResults(start_results); 199 this.lucene_src.setEndResults(end_results); 200 201 202 if (index.equals("sidx") || index.equals("didx")){ 203 if (physical_sub_index_name!=null) { 204 index+=physical_sub_index_name; 205 } 206 if (physical_index_language_name!=null){ 207 index+=physical_index_language_name; 208 } 209 } 210 211 this.lucene_src.setIndexDir(indexdir+index); 212 this.lucene_src.initialise(); 213 return true; 214 } 215 /** do the query */ 216 protected Object runQuery(String query) { 217 try { 218 LuceneQueryResult lqr=this.lucene_src.runQuery(query); 219 return lqr; 220 } catch (Exception e) { 221 logger.error ("exception happened in run query: ", e); 222 } 223 224 return null; 225 } 226 /** get the total number of docs that match */ 227 protected long numDocsMatched(Object query_result) { 228 return ((LuceneQueryResult)query_result).getTotalDocs(); 229 230 } 231 /** get the list of doc ids */ 232 protected String [] getDocIDs(Object query_result) { 233 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 234 String [] doc_nums = new String [docs.size()]; 235 for (int d = 0; d < docs.size(); d++) { 236 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_; 237 doc_nums[d] = doc_num; 238 } 239 return doc_nums; 240 } 241 /** get the list of doc ranks */ 242 protected String [] getDocRanks(Object query_result) { 243 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 244 String [] doc_ranks = new String [docs.size()]; 245 for (int d = 0; d < docs.size(); d++) { 246 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_); 247 } 248 return doc_ranks; 249 } 250 /** add in term info if available */ 251 protected boolean addTermInfo(Element term_list, HashMap params, 252 Object query_result) { 253 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 254 255 Vector terms = ((LuceneQueryResult)query_result).getTerms(); 256 for (int t = 0; t < terms.size(); t++) { 257 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t); 258 259 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 260 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 261 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 262 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 263 term_elem.setAttribute(FIELD_ATT, term_info.field_); 264 term_list.appendChild(term_elem); 265 } 266 267 Vector stopwords = ((LuceneQueryResult)query_result).getStopWords(); 268 for (int t = 0; t < stopwords.size(); t++) { 269 String stopword = (String) stopwords.get(t); 270 271 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 272 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 273 term_list.appendChild(stopword_elem); 274 } 275 276 return true; 277 } 278 279 protected String addFieldInfo(String query, String field) { 280 if (field.equals("") || field.equals("ZZ")) { 281 return query; 282 } 283 return field+":("+query+")"; 284 } 285 286 protected void addQueryElem(StringBuffer s, String q, String f, String c) { 287 288 String combine=""; 289 if (s.length()>0) { 290 combine = " "+c+" "; 291 } 292 s.append(combine + addFieldInfo(q,f)); 293 } 294 295 /** Lucene doesn't use these options at the moment */ 296 protected String addStemOptions(String query, String stem, 297 String casef, String accent) { 298 return query; 299 } 300 301 /** Lucene no longer uses internal ids. It just uses hash ids. So we need 302 to override these methods so no conversion is done. */ 303 /** convert indexer internal id to Greenstone oid */ 304 protected String internalNum2OID(long docnum) 305 { 306 return Long.toString(docnum); 307 308 } 309 protected String internalNum2OID(String docnum) 310 { 311 return docnum; 312 313 } 314 88 } else if (name.equals(MATCH_PARAM)) { 89 if (value.equals(MATCH_PARAM_ALL)) { 90 this.lucene_src.setDefaultConjunctionOperator("AND"); 91 } else{ 92 this.lucene_src.setDefaultConjunctionOperator("OR"); 93 } 94 } else if (name.equals(RANK_PARAM)) { 95 if (value.equals(RANK_PARAM_RANK_VALUE)) { 96 value = null; 97 } 98 this.lucene_src.setSortField(value); 99 } else if (name.equals(LEVEL_PARAM)) { 100 if (value.toUpperCase().equals("SEC")){ 101 index = "sidx"; 102 } 103 else { 104 index = "didx"; 105 } 106 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { 107 physical_sub_index_name=value; 108 } else if (name.equals(INDEX_LANGUAGE_PARAM)){ 109 physical_index_language_name=value; 110 } // ignore any others 111 } 112 // set up start and end results if necessary 113 int start_results = 1; 114 if (start_page != 1) { 115 start_results = ((start_page-1) * hits_per_page) + 1; 116 } 117 int end_results = hits_per_page * start_page; 118 this.lucene_src.setStartResults(start_results); 119 this.lucene_src.setEndResults(end_results); 120 121 if (index.equals("sidx") || index.equals("didx")){ 122 if (physical_sub_index_name!=null) { 123 index+=physical_sub_index_name; 124 } 125 if (physical_index_language_name!=null){ 126 index+=physical_index_language_name; 127 } 128 } 129 130 this.lucene_src.setIndexDir(indexdir+index); 131 this.lucene_src.initialise(); 132 return true; 133 } 134 135 /** do the query */ 136 protected Object runQuery(String query) { 137 try { 138 LuceneQueryResult lqr=this.lucene_src.runQuery(query); 139 return lqr; 140 } catch (Exception e) { 141 logger.error ("Exception happened in runQuery(): ", e); 142 } 143 144 return null; 145 } 146 147 /** get the total number of docs that match */ 148 protected long numDocsMatched(Object query_result) { 149 return ((LuceneQueryResult)query_result).getTotalDocs(); 150 151 } 152 153 /** get the list of doc ids */ 154 protected String [] getDocIDs(Object query_result) { 155 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 156 String [] doc_nums = new String [docs.size()]; 157 for (int d = 0; d < docs.size(); d++) { 158 String doc_num = ((LuceneQueryResult.DocInfo) docs.elementAt(d)).id_; 159 doc_nums[d] = doc_num; 160 } 161 return doc_nums; 162 } 163 164 /** get the list of doc ranks */ 165 protected String [] getDocRanks(Object query_result) { 166 Vector docs = ((LuceneQueryResult)query_result).getDocs(); 167 String [] doc_ranks = new String [docs.size()]; 168 for (int d = 0; d < docs.size(); d++) { 169 doc_ranks[d] = Float.toString(((LuceneQueryResult.DocInfo) docs.elementAt(d)).rank_); 170 } 171 return doc_ranks; 172 } 173 174 /** add in term info if available */ 175 protected boolean addTermInfo(Element term_list, HashMap params, 176 Object query_result) { 177 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 178 179 Vector terms = ((LuceneQueryResult)query_result).getTerms(); 180 for (int t = 0; t < terms.size(); t++) { 181 LuceneQueryResult.TermInfo term_info = (LuceneQueryResult.TermInfo) terms.get(t); 182 183 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 184 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 185 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 186 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 187 term_elem.setAttribute(FIELD_ATT, term_info.field_); 188 term_list.appendChild(term_elem); 189 } 190 191 Vector stopwords = ((LuceneQueryResult)query_result).getStopWords(); 192 for (int t = 0; t < stopwords.size(); t++) { 193 String stopword = (String) stopwords.get(t); 194 195 Element stopword_elem = this.doc.createElement(GSXML.STOPWORD_ELEM); 196 stopword_elem.setAttribute(GSXML.NAME_ATT, stopword); 197 term_list.appendChild(stopword_elem); 198 } 199 200 return true; 201 } 315 202 } -
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java
r24394 r24722 37 37 38 38 protected static final String INDEX_ELEM = "index"; 39 40 protected ArrayList index_ids; 41 42 public LuceneSearch() 43 { 44 index_ids = new ArrayList(); 45 } 39 46 40 47 public boolean configure(Element info, Element extra_info) { … … 44 51 45 52 default_index = "idx"; 46 return true; 47 } 48 49 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) 50 { 51 // the index info - read from config file - cache it?? 52 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER); 53 54 // cache index info read from config file 55 Element index_list 56 = (Element)GSXML.getChildByTagName(this.config_info, 57 INDEX_ELEM+GSXML.LIST_MODIFIER); 53 58 if (index_list != null) { 54 59 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); … … 57 62 for (int i=0; i<len; i++) { 58 63 Element index = (Element)indexes.item(i); 59 index_ids.add(index.getAttribute(GSXML.NAME_ATT)); 64 index_ids.add(index.getAttribute(GSXML.NAME_ATT)); 65 } 66 } else { 67 // there is only one index, so we assume the default 68 index_ids.add(this.default_index); 69 } 70 71 return true; 72 } 73 74 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) 75 { 76 // copying exercise for index_ids, 77 for (int i=0; i<this.index_ids.size(); i++) { 78 index_ids.add(this.index_ids.get(i)); 79 } 80 81 // But need to work out display name from scratch as this uses 82 // the 'lang' parameter 83 84 Element index_list 85 = (Element)GSXML.getChildByTagName(this.config_info, 86 INDEX_ELEM+GSXML.LIST_MODIFIER); 87 if (index_list != null) { 88 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 89 int len = indexes.getLength(); 90 // now add even if there is only one 91 for (int i=0; i<len; i++) { 92 Element index = (Element)indexes.item(i); 60 93 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en")); 61 94 … … 63 96 } else { 64 97 // there is only one index, so we assume the default 65 index_ids.add(this.default_index);66 98 index_names.add("default index"); 67 99 } 68 69 100 } 70 101 71 /** Process a text query - implemented by concrete subclasses */ 72 protected Element processTextQuery(Element request) { 102 103 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list) 104 { 73 105 74 106 // Create a new (empty) result message 75 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);76 107 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE); 77 108 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); 78 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);79 109 result.appendChild(doc_node_list); 80 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);81 110 result.appendChild(metadata_list); 111 } 112 113 protected boolean hasParamList(Element request, Element metadata_list) 114 { 82 115 // Get the parameters of the request 83 116 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); … … 85 118 logger.error("TextQuery request had no paramList."); 86 119 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 87 return result; // Return the empty result120 return false; // signal that an empty result should be return 88 121 } 89 122 90 // Process the request parameters 123 return true; 124 } 125 126 protected boolean hasQueryString(Element param_list, Element metadata_list) 127 { 128 129 // Process the request parameters to make sure a query has been specified 91 130 HashMap params = GSXML.extractParams(param_list, false); 131 String query_string = (String) params.get(QUERY_PARAM); 92 132 93 // Make sure a query has been specified94 String query_string = (String) params.get(QUERY_PARAM);95 133 if (query_string == null || query_string.equals("")) { 96 134 logger.error("TextQuery request had no query string."); 97 135 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0"); 98 return result; // Return the empty result136 return false; // signal that an empty result should be return 99 137 } 138 139 return true; 140 } 141 142 143 144 /** Process a text query - implemented by concrete subclasses */ 145 protected Element processTextQuery(Element request) { 146 147 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); 148 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); 149 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); 150 initResultElement(result,doc_node_list,metadata_list); 151 152 if (!hasParamList(request,metadata_list)) { 153 return result; 154 } 155 156 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); 157 if (!hasQueryString(param_list,metadata_list)) { 158 return result; 159 } 160 161 HashMap params = GSXML.extractParams(param_list, false); 162 String query_string = (String) params.get(QUERY_PARAM); 100 163 101 164 // Get the index … … 104 167 index = this.default_index; // assume the default 105 168 } 106 try { 169 170 try { 107 171 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name); 108 172 index_dir += File.separator+index; … … 111 175 112 176 Term term = new Term("content", query_string); 113 177 114 178 Query query = new TermQuery(term); 115 179 116 180 Hits hits = searcher.search(query); 117 181 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.length()); … … 127 191 e.printStackTrace(); 128 192 } 129 130 return result; 131 193 194 return result; 132 195 } 133 196
Note:
See TracChangeset
for help on using the changeset viewer.