Changeset 25849
- Timestamp:
- 2012-06-28T11:16:55+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java
r25635 r25849 18 18 package org.greenstone.gsdl3.service; 19 19 20 21 20 // Greenstone classes 21 import java.io.File; 22 import java.io.Serializable; 23 import java.util.HashMap; 24 import java.util.Iterator; 25 import java.util.Map; 26 import java.util.Set; 27 import java.util.Vector; 28 29 import org.apache.log4j.Logger; 30 import org.greenstone.gsdl3.util.GSFile; 31 import org.greenstone.gsdl3.util.GSXML; 32 import org.greenstone.mgpp.MGPPDocInfo; 33 import org.greenstone.mgpp.MGPPQueryResult; 22 34 import org.greenstone.mgpp.MGPPSearchWrapper; 23 35 import org.greenstone.mgpp.MGPPTermInfo; 24 import org.greenstone.mgpp.MGPPQueryResult;25 import org.greenstone.mgpp.MGPPDocInfo;26 27 import org.greenstone.gsdl3.util.GSFile;28 import org.greenstone.gsdl3.util.GSXML;29 30 31 // XML classes32 import org.w3c.dom.Document;33 36 import org.w3c.dom.Element; 34 import org.w3c.dom.NodeList; 35 36 // java classes 37 import java.util.Iterator; 38 import java.util.Set; 39 import java.util.HashMap; 40 import java.util.Map; 41 import java.util.ArrayList; 42 import java.util.Vector; 43 import java.io.File; 44 import java.io.Serializable; 45 46 import org.apache.log4j.*; 47 48 49 public class GS2MGPPSearch 50 extends AbstractGS2FieldSearch { 51 private static MGPPSearchWrapper mgpp_src=null; 52 53 private String physical_index_name = "idx"; 54 55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName()); 56 57 /** constructor */ 58 public GS2MGPPSearch() { 59 if(mgpp_src == null) { 60 mgpp_src = new MGPPSearchWrapper(); 61 } 62 } 63 64 public void cleanUp() { 65 super.cleanUp(); 66 mgpp_src.unloadIndexData(); 67 } 68 /** process a query */ 69 protected Element processAnyQuery(Element request, int query_type) { 70 synchronized (mgpp_src) { 71 return super.processAnyQuery(request, query_type); 72 } 73 } 74 /** configure this service */ 75 public boolean configure(Element info, Element extra_info) { 76 if (!super.configure(info, extra_info)){ 77 return false; 78 } 79 80 // set up the defaults which are not dependent on query parameters 81 // the default level is also the level which the database is expecting 82 // this must not be overwritten 83 mgpp_src.setReturnLevel(this.default_db_level); 84 // return term info 85 mgpp_src.setReturnTerms(true); 86 mgpp_src.setMaxNumeric(this.maxnumeric); 87 return true; 88 } 89 90 /** add in the mgpp specific params to TextQuery */ 91 protected void addCustomQueryParams(Element param_list, String lang) { 92 super.addCustomQueryParams(param_list, lang); 93 createParameter(RANK_PARAM, param_list, lang); 94 } 95 96 protected boolean setUpQueryer(HashMap<String, Serializable> params) { 97 98 // set up the defaults that may be changed by query params 99 mgpp_src.setQueryLevel(this.default_level); 100 // we have case folding on by default 101 mgpp_src.setCase(true); 102 103 // set up the query params 104 Set entries = params.entrySet(); 105 Iterator i = entries.iterator(); 106 String physical_sub_index_name= this.default_index_subcollection; 107 String physical_index_language_name= this.default_index_language; 108 while (i.hasNext()) { 109 Map.Entry m = (Map.Entry)i.next(); 110 String name = (String)m.getKey(); 111 String value = (String)m.getValue(); 112 113 if (name.equals(CASE_PARAM)) { 114 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); 115 mgpp_src.setCase(val); 116 } else if (name.equals(STEM_PARAM)) { 117 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); 118 mgpp_src.setStem(val); 119 } else if (name.equals(ACCENT_PARAM)) { 120 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); 121 mgpp_src.setAccentFold(val); 122 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { 123 int docs = Integer.parseInt(value); 124 mgpp_src.setMaxDocs(docs); 125 } else if (name.equals(LEVEL_PARAM)) { 126 mgpp_src.setQueryLevel(value); 127 } else if (name.equals(MATCH_PARAM)) { 128 int mode; 129 if (value.equals(MATCH_PARAM_ALL)) mode=1; 130 else mode=0; 131 mgpp_src.setMatchMode(mode); 132 } else if (name.equals(RANK_PARAM)) { 133 if (value.equals(RANK_PARAM_RANK)) { 134 mgpp_src.setSortByRank(true); 135 } else if (value.equals(RANK_PARAM_NONE)) { 136 mgpp_src.setSortByRank(false); 137 } 138 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { 139 physical_sub_index_name=value; 140 }else if (name.equals(INDEX_LANGUAGE_PARAM)){ 141 physical_index_language_name=value; 142 } // ignore any others 143 } 144 145 if (physical_index_name.equals("idx")){ 146 if (physical_sub_index_name!=null) { 147 physical_index_name+=physical_sub_index_name; 148 } 149 if (physical_index_language_name!=null){ 150 physical_index_name+=physical_index_language_name; 151 } 152 } 153 154 // set up mgpp_src 155 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name); 156 mgpp_src.loadIndexData(indexdir); 157 158 return true; 159 } 160 161 protected Object runQuery(String query) { 162 mgpp_src.runQuery(query); 163 MGPPQueryResult mqr= mgpp_src.getQueryResult(); 164 return mqr; 165 166 } 167 168 protected long numDocsMatched(Object query_result) { 169 return ((MGPPQueryResult)query_result).getTotalDocs(); 170 } 171 172 protected String [] getDocIDs(Object query_result) { 173 174 Vector docs = ((MGPPQueryResult)query_result).getDocs(); 175 String [] doc_nums = new String [docs.size()]; 176 for (int d = 0; d < docs.size(); d++) { 177 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_)); 178 } 179 return doc_nums; 180 } 181 182 protected String [] getDocRanks(Object query_result) { 183 184 Vector docs = ((MGPPQueryResult)query_result).getDocs(); 185 String [] doc_ranks = new String [docs.size()]; 186 for (int d = 0; d < docs.size(); d++) { 187 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_); 188 } 189 return doc_ranks; 190 } 191 192 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, 193 Object query_result) { 194 195 String query_level = (String)params.get(LEVEL_PARAM); // the current query level 196 197 Vector terms = ((MGPPQueryResult)query_result).getTerms(); 198 for (int t = 0; t < terms.size(); t++) { 199 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t); 200 201 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 202 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 203 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_); 204 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 205 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 206 String field = term_info.tag_; 207 if (field.equals(query_level)) { 208 // ignore 209 field = ""; 210 } 211 term_elem.setAttribute(FIELD_ATT, field); 212 213 Vector equiv_terms = term_info.equiv_terms_; 214 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER); 215 term_elem.appendChild(equiv_term_list); 216 217 for (int et = 0; et < equiv_terms.size(); et++) { 218 String equiv_term = (String) equiv_terms.get(et); 219 220 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); 221 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term); 222 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, ""); 223 equiv_term_elem.setAttribute(FREQ_ATT, ""); 224 equiv_term_list.appendChild(equiv_term_elem); 225 } 226 227 term_list.appendChild(term_elem); 228 } 229 return true; 230 } 231 232 233 protected String addFieldInfo(String query, String field) { 234 if (field.equals("") || field.equals("ZZ")) { 235 return query; 236 } 237 return "["+query+"]:"+field; 238 } 239 protected void addQueryElem(StringBuffer final_query, String query, 240 String field, String combine) { 241 242 String comb=""; 243 if (final_query.length()>0) { 244 comb = " "+combine+" "; 245 } 246 final_query.append(comb+addFieldInfo(query,field)); 247 } 248 249 protected String addStemOptions(String query, String stem, 250 String casef, String accent) { 251 String mods = "#"; 252 if (casef != null) { 253 if (casef.equals("1")) { 254 mods += "i"; 255 } else { 256 mods += "c"; 257 } 258 } 259 if (stem != null) { 260 if (stem.equals("1")) { 261 mods += "s"; 262 } else { 263 mods+= "u"; 264 } 265 } 266 if (accent != null) { 267 if (accent.equals("1")) { 268 mods += "f"; 269 } else { 270 mods += "a"; 271 } 272 } 273 274 StringBuffer temp = new StringBuffer(); 275 String [] terms = query.split(" "); 276 for (int i=0; i<terms.length; i++) { 277 String t = terms[i].trim(); 278 // what is the TX bit about??? 279 if (!t.equals("") && !t.equals("TX")) { 280 temp.append(" "+t+mods); 281 } 282 } 283 return temp.toString(); 284 } 285 37 38 public class GS2MGPPSearch extends AbstractGS2FieldSearch 39 { 40 private static MGPPSearchWrapper mgpp_src = null; 41 42 private String physical_index_name = "idx"; 43 44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName()); 45 46 /** constructor */ 47 public GS2MGPPSearch() 48 { 49 if (mgpp_src == null) 50 { 51 mgpp_src = new MGPPSearchWrapper(); 52 } 53 } 54 55 public void cleanUp() 56 { 57 super.cleanUp(); 58 mgpp_src.unloadIndexData(); 59 } 60 61 /** process a query */ 62 protected Element processAnyQuery(Element request, int query_type) 63 { 64 synchronized (mgpp_src) 65 { 66 return super.processAnyQuery(request, query_type); 67 } 68 } 69 70 /** configure this service */ 71 public boolean configure(Element info, Element extra_info) 72 { 73 if (!super.configure(info, extra_info)) 74 { 75 return false; 76 } 77 78 // set up the defaults which are not dependent on query parameters 79 // the default level is also the level which the database is expecting 80 // this must not be overwritten 81 mgpp_src.setReturnLevel(this.default_db_level); 82 // return term info 83 mgpp_src.setReturnTerms(true); 84 mgpp_src.setMaxNumeric(this.maxnumeric); 85 return true; 86 } 87 88 /** add in the mgpp specific params to TextQuery */ 89 protected void addCustomQueryParams(Element param_list, String lang) 90 { 91 super.addCustomQueryParams(param_list, lang); 92 createParameter(RANK_PARAM, param_list, lang); 93 } 94 95 protected boolean setUpQueryer(HashMap<String, Serializable> params) 96 { 97 98 // set up the defaults that may be changed by query params 99 mgpp_src.setQueryLevel(this.default_level); 100 // we have case folding on by default 101 mgpp_src.setCase(true); 102 103 // set up the query params 104 Set entries = params.entrySet(); 105 Iterator i = entries.iterator(); 106 String physical_sub_index_name = this.default_index_subcollection; 107 String physical_index_language_name = this.default_index_language; 108 while (i.hasNext()) 109 { 110 Map.Entry m = (Map.Entry) i.next(); 111 String name = (String) m.getKey(); 112 String value = (String) m.getValue(); 113 114 if (name.equals(CASE_PARAM)) 115 { 116 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false); 117 mgpp_src.setCase(val); 118 } 119 else if (name.equals(STEM_PARAM)) 120 { 121 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false); 122 mgpp_src.setStem(val); 123 } 124 else if (name.equals(ACCENT_PARAM)) 125 { 126 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false); 127 mgpp_src.setAccentFold(val); 128 } 129 else if (name.equals(MAXDOCS_PARAM) && !value.equals("")) 130 { 131 int docs = Integer.parseInt(value); 132 mgpp_src.setMaxDocs(docs); 133 } 134 else if (name.equals(LEVEL_PARAM)) 135 { 136 mgpp_src.setQueryLevel(value); 137 } 138 else if (name.equals(MATCH_PARAM)) 139 { 140 int mode; 141 if (value.equals(MATCH_PARAM_ALL)) 142 mode = 1; 143 else 144 mode = 0; 145 mgpp_src.setMatchMode(mode); 146 } 147 else if (name.equals(RANK_PARAM)) 148 { 149 if (value.equals(RANK_PARAM_RANK)) 150 { 151 mgpp_src.setSortByRank(true); 152 } 153 else if (value.equals(RANK_PARAM_NONE)) 154 { 155 mgpp_src.setSortByRank(false); 156 } 157 } 158 else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) 159 { 160 physical_sub_index_name = value; 161 } 162 else if (name.equals(INDEX_LANGUAGE_PARAM)) 163 { 164 physical_index_language_name = value; 165 } // ignore any others 166 } 167 168 if (physical_index_name.equals("idx")) 169 { 170 if (physical_sub_index_name != null) 171 { 172 physical_index_name += physical_sub_index_name; 173 } 174 if (physical_index_language_name != null) 175 { 176 physical_index_name += physical_index_language_name; 177 } 178 } 179 180 // set up mgpp_src 181 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name); 182 mgpp_src.loadIndexData(indexdir); 183 184 return true; 185 } 186 187 protected Object runQuery(String query) 188 { 189 mgpp_src.runQuery(query); 190 MGPPQueryResult mqr = mgpp_src.getQueryResult(); 191 return mqr; 192 193 } 194 195 protected long numDocsMatched(Object query_result) 196 { 197 return ((MGPPQueryResult) query_result).getTotalDocs(); 198 } 199 200 protected String[] getDocIDs(Object query_result) 201 { 202 203 Vector docs = ((MGPPQueryResult) query_result).getDocs(); 204 String[] doc_nums = new String[docs.size()]; 205 for (int d = 0; d < docs.size(); d++) 206 { 207 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_)); 208 } 209 return doc_nums; 210 } 211 212 protected String[] getDocRanks(Object query_result) 213 { 214 215 Vector docs = ((MGPPQueryResult) query_result).getDocs(); 216 String[] doc_ranks = new String[docs.size()]; 217 for (int d = 0; d < docs.size(); d++) 218 { 219 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_); 220 } 221 return doc_ranks; 222 } 223 224 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result) 225 { 226 227 String query_level = (String) params.get(LEVEL_PARAM); // the current query level 228 229 Vector terms = ((MGPPQueryResult) query_result).getTerms(); 230 for (int t = 0; t < terms.size(); t++) 231 { 232 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t); 233 234 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); 235 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); 236 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_); 237 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); 238 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); 239 String field = term_info.tag_; 240 if (field.equals(query_level)) 241 { 242 // ignore 243 field = ""; 244 } 245 term_elem.setAttribute(FIELD_ATT, field); 246 247 Vector equiv_terms = term_info.equiv_terms_; 248 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM + GSXML.LIST_MODIFIER); 249 term_elem.appendChild(equiv_term_list); 250 251 for (int et = 0; et < equiv_terms.size(); et++) 252 { 253 String equiv_term = (String) equiv_terms.get(et); 254 255 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); 256 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term); 257 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, ""); 258 equiv_term_elem.setAttribute(FREQ_ATT, ""); 259 equiv_term_list.appendChild(equiv_term_elem); 260 } 261 262 term_list.appendChild(term_elem); 263 } 264 return true; 265 } 266 267 protected String addFieldInfo(String query, String field) 268 { 269 if (field.equals("") || field.equals("ZZ")) 270 { 271 return query; 272 } 273 return "[" + query + "]:" + field; 274 } 275 276 protected void addQueryElem(StringBuffer final_query, String query, String field, String combine) 277 { 278 279 String comb = ""; 280 if (final_query.length() > 0) 281 { 282 comb = " " + combine + " "; 283 } 284 final_query.append(comb + addFieldInfo(query, field)); 285 } 286 287 protected String addStemOptions(String query, String stem, String casef, String accent) 288 { 289 String mods = "#"; 290 if (casef != null) 291 { 292 if (casef.equals("1")) 293 { 294 mods += "i"; 295 } 296 else 297 { 298 mods += "c"; 299 } 300 } 301 if (stem != null) 302 { 303 if (stem.equals("1")) 304 { 305 mods += "s"; 306 } 307 else 308 { 309 mods += "u"; 310 } 311 } 312 if (accent != null) 313 { 314 if (accent.equals("1")) 315 { 316 mods += "f"; 317 } 318 else 319 { 320 mods += "a"; 321 } 322 } 323 324 StringBuffer temp = new StringBuffer(); 325 String[] terms = query.split(" "); 326 for (int i = 0; i < terms.length; i++) 327 { 328 String t = terms[i].trim(); 329 // what is the TX bit about??? 330 if (!t.equals("") && !t.equals("TX")) 331 { 332 temp.append(" " + t + mods); 333 } 334 } 335 return temp.toString(); 336 } 337 286 338 } 287 288
Note:
See TracChangeset
for help on using the changeset viewer.