Changeset 24857
- Timestamp:
- 2011-12-06T11:15:10+13:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java
r24394 r24857 18 18 package org.greenstone.gsdl3.service; 19 19 20 21 20 // Greenstone classes 22 21 import org.greenstone.gsdl3.util.OID; … … 28 27 // XML classes 29 28 import org.w3c.dom.Document; 30 import org.w3c.dom.Element; 29 import org.w3c.dom.Element; 31 30 import org.w3c.dom.NodeList; 32 31 … … 42 41 import org.apache.log4j.*; 43 42 44 public abstract class AbstractGS2TextSearch 45 extends AbstractTextSearch 43 public abstract class AbstractGS2TextSearch extends AbstractTextSearch 46 44 { 47 45 48 protected static final String EQUIV_TERM_ELEM = "equivTerm"; 49 50 protected static final String STEM_ATT = "stem"; 51 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch"; 52 protected static final String FREQ_ATT = "freq"; 53 54 // Elements used in the config file that are specific to this class 55 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex"; 56 protected static final String INDEX_STEM_ELEM = "indexStem"; 57 protected static final String INDEX_ELEM = "index"; 58 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection"; 59 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage"; 60 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection"; 61 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage"; 62 63 64 // Some indexing options 65 protected static final String STEMINDEX_OPTION = "stemIndexes"; 66 protected static final String MAXNUMERIC_OPTION = "maxnumeric"; 67 68 /** the stem used for the index files */ 69 protected String index_stem = null; 70 71 // stem indexes available 72 protected boolean does_case=true; 73 protected boolean does_stem=true; 74 protected boolean does_accent=false; 75 76 // maxnumeric - 77 protected int maxnumeric = 4; 78 79 SimpleDocumentDatabase gs_doc_db = null; 80 81 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName()); 82 83 84 /** constructor */ 85 public AbstractGS2TextSearch() 86 { 87 88 } 89 public void cleanUp() { 90 super.cleanUp(); 91 this.gs_doc_db.cleanUp(); 92 } 93 94 /** configure this service */ 95 public boolean configure(Element info, Element extra_info) 96 { 97 if (!super.configure(info,extra_info)) { 98 return false; 99 } 100 101 // find out what kind of database we have 102 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM); 103 String database_type = null; 104 if (database_type_elem != null) { 105 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT); 106 } 107 if (database_type == null || database_type.equals("")) { 108 database_type = "gdbm"; // the default 109 } 110 111 // the index stem is either the collection name or is specified in the config file 112 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM); 113 if (index_stem_elem != null) { 114 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT); 115 } 116 if (this.index_stem == null || this.index_stem.equals("")) { 117 logger.warn("indexStem element not found, stem will default to collection name"); 118 this.index_stem = this.cluster_name; 119 } 120 121 // replaces default AbstractSearch version with one tied to database 122 gs_doc_db = new SimpleDocumentDatabase(this.doc, 123 database_type,this.site_home, 124 this.cluster_name, 125 this.index_stem); 126 if (!gs_doc_db.isValid()) { 127 logger.error("Failed to open Document Database."); 128 return false; 129 } 130 this.gs_doc = gs_doc_db; 131 132 // do we support any of the extended features? 133 does_chunking = true; 134 135 // Get the default index out of <defaultIndex> (buildConfig.xml) 136 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM); 137 if (def != null) { 138 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT); 139 } // otherwise will be "", and the first one will be the default 140 141 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml) 142 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM); 143 if (defSub != null) { 144 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT); 145 } 146 147 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml) 148 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM); 149 if (defLang != null) { 150 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT); 151 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage 152 153 // get index options 154 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER); 155 if (index_option_list != null) { 156 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM); 157 for (int i=0; i<options.getLength(); i++) { 158 Element opt = (Element)options.item(i); 159 String name = opt.getAttribute(GSXML.NAME_ATT); 160 String value = opt.getAttribute(GSXML.VALUE_ATT); 161 if (name.equals(MAXNUMERIC_OPTION)) { 162 int maxnum = Integer.parseInt(value); 163 if (4 <= maxnum && maxnum < 512) { 164 maxnumeric = maxnum; 165 } 166 } 167 else if (name.equals(STEMINDEX_OPTION)) { 168 int stemindex = Integer.parseInt(value); 169 // stem and case are true by default, accent folding false by default 170 if ((stemindex & 1) == 0) { 171 does_case = false; 172 } 173 if ((stemindex & 2) == 0) { 174 does_stem = false; 175 } 176 if ((stemindex & 4) != 0) { 177 does_accent = true; 178 } 179 } 180 } 181 } 182 183 // get display info from extra info 184 if (extra_info !=null) { 185 Document owner = info.getOwnerDocument(); 186 // so far we have index specific display elements, and global format elements 187 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM); 188 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM); 189 190 for (int i=0; i<indexes.getLength();i++) { 191 Element ind = (Element)indexes.item(i); 192 String name = ind.getAttribute(GSXML.NAME_ATT); 193 Element node_extra = GSXML.getNamedElement(config_search, 194 GSXML.INDEX_ELEM, 195 GSXML.NAME_ATT, 196 name); 197 if (node_extra == null) { 198 logger.error("haven't found extra info for index named "+name); 199 continue; 200 } 201 202 // get the display elements if any - displayName 203 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM); 204 if (display_names !=null) { 205 for (int j=0; j<display_names.getLength(); j++) { 206 Element e = (Element)display_names.item(j); 207 ind.appendChild(owner.importNode(e, true)); 208 } 209 } 210 } // for each index 211 } 212 return true; 213 } 214 215 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) 216 { 217 // the index info - 218 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER); 219 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 220 int len = indexes.getLength(); 221 // now add even if there is only one 222 for (int i=0; i<len; i++) { 223 Element index = (Element)indexes.item(i); 224 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT); 225 if (shortname.equals("")) { 226 continue; 227 } 228 index_ids.add(shortname); 229 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 230 if (display_name.equals("")) { 231 display_name = index.getAttribute(GSXML.NAME_ATT); 232 if (display_name.equals("")) { 233 display_name = shortname; 234 } 235 } 236 index_names.add(display_name); 237 } 238 } 239 240 protected void getIndexSubcollectionData(ArrayList index_sub_ids, ArrayList index_sub_names, String lang) 241 { 242 // the index info - 243 Element index_sub_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM+GSXML.LIST_MODIFIER); 244 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); 245 int len = index_subs.getLength(); 246 // now add even if there is only one 247 for (int i=0; i<len; i++) { 248 Element indexsub = (Element)index_subs.item(i); 249 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT); 250 if (shortname.equals("")) { 251 continue; 252 } 253 index_sub_ids.add(shortname); 254 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 255 if (display_name.equals("")) { 256 display_name = indexsub.getAttribute(GSXML.NAME_ATT); 257 if (display_name.equals("")) { 258 display_name = shortname; 259 } 260 } 261 index_sub_names.add(display_name); 262 } 263 } 264 265 protected void getIndexLanguageData(ArrayList index_lang_ids, ArrayList index_lang_names, String lang) 266 { 267 // the index info - 268 Element index_lang_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM+GSXML.LIST_MODIFIER); 269 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM); 270 int len = index_langs.getLength(); 271 // now add even if there is only one 272 for (int i=0; i<len; i++) { 273 Element indexlang = (Element)index_langs.item(i); 274 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT); 275 if (shortname.equals("")) { 276 continue; 277 } 278 index_lang_ids.add(shortname); 279 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 280 if (display_name.equals("")) { 281 display_name = indexlang.getAttribute(GSXML.NAME_ATT); 282 if (display_name.equals("")) { 283 display_name = shortname; 284 } 285 } 286 index_lang_names.add(display_name); 287 } 288 289 290 } 291 292 293 protected void addCustomQueryParams(Element param_list, String lang) 294 { 295 if (this.does_case){ 296 // gs2 has case on by default 297 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON); 298 } 299 if (this.does_stem){ 300 // but stem is off by default 301 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF); 302 } 303 if (this.does_accent){ 304 // and so is accent folding 305 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF); 306 } 307 createParameter(MATCH_PARAM, param_list, lang); 308 } 309 310 311 /** convert indexer internal id to Greenstone oid */ 312 protected String internalNum2OID(long docnum) 313 { 314 return this.gs_doc_db.internalNum2OID(docnum); 315 } 316 317 protected String internalNum2OID(String docnum) 318 { 319 return this.gs_doc_db.internalNum2OID(docnum); 320 } 46 protected static final String EQUIV_TERM_ELEM = "equivTerm"; 47 48 protected static final String STEM_ATT = "stem"; 49 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch"; 50 protected static final String FREQ_ATT = "freq"; 51 52 // Elements used in the config file that are specific to this class 53 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex"; 54 protected static final String INDEX_STEM_ELEM = "indexStem"; 55 protected static final String INDEX_ELEM = "index"; 56 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection"; 57 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage"; 58 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection"; 59 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage"; 60 61 // Some indexing options 62 protected static final String STEMINDEX_OPTION = "stemIndexes"; 63 protected static final String MAXNUMERIC_OPTION = "maxnumeric"; 64 65 /** the stem used for the index files */ 66 protected String index_stem = null; 67 68 // stem indexes available 69 protected boolean does_case = true; 70 protected boolean does_stem = true; 71 protected boolean does_accent = false; 72 73 // maxnumeric - 74 protected int maxnumeric = 4; 75 76 SimpleDocumentDatabase gs_doc_db = null; 77 78 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName()); 79 80 /** constructor */ 81 public AbstractGS2TextSearch() 82 { 83 84 } 85 86 public void cleanUp() 87 { 88 super.cleanUp(); 89 this.gs_doc_db.cleanUp(); 90 } 91 92 /** configure this service */ 93 public boolean configure(Element info, Element extra_info) 94 { 95 if (!super.configure(info, extra_info)) 96 { 97 return false; 98 } 99 100 // find out what kind of database we have 101 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM); 102 String database_type = null; 103 if (database_type_elem != null) 104 { 105 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT); 106 } 107 if (database_type == null || database_type.equals("")) 108 { 109 database_type = "gdbm"; // the default 110 } 111 112 // the index stem is either the collection name or is specified in the config file 113 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM); 114 if (index_stem_elem != null) 115 { 116 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT); 117 } 118 if (this.index_stem == null || this.index_stem.equals("")) 119 { 120 logger.warn("indexStem element not found, stem will default to collection name"); 121 this.index_stem = this.cluster_name; 122 } 123 124 // replaces default AbstractSearch version with one tied to database 125 gs_doc_db = new SimpleDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem); 126 if (!gs_doc_db.isValid()) 127 { 128 logger.error("Failed to open Document Database."); 129 return false; 130 } 131 this.gs_doc = gs_doc_db; 132 133 // do we support any of the extended features? 134 does_chunking = true; 135 136 // Get the default index out of <defaultIndex> (buildConfig.xml) 137 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM); 138 if (def != null) 139 { 140 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT); 141 } // otherwise will be "", and the first one will be the default 142 143 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml) 144 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM); 145 if (defSub != null) 146 { 147 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT); 148 } 149 150 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml) 151 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM); 152 if (defLang != null) 153 { 154 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT); 155 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage 156 157 // get index options 158 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER); 159 if (index_option_list != null) 160 { 161 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM); 162 for (int i = 0; i < options.getLength(); i++) 163 { 164 Element opt = (Element) options.item(i); 165 String name = opt.getAttribute(GSXML.NAME_ATT); 166 String value = opt.getAttribute(GSXML.VALUE_ATT); 167 if (name.equals(MAXNUMERIC_OPTION)) 168 { 169 int maxnum = Integer.parseInt(value); 170 if (4 <= maxnum && maxnum < 512) 171 { 172 maxnumeric = maxnum; 173 } 174 } 175 else if (name.equals(STEMINDEX_OPTION)) 176 { 177 int stemindex = Integer.parseInt(value); 178 // stem and case are true by default, accent folding false by default 179 if ((stemindex & 1) == 0) 180 { 181 does_case = false; 182 } 183 if ((stemindex & 2) == 0) 184 { 185 does_stem = false; 186 } 187 if ((stemindex & 4) != 0) 188 { 189 does_accent = true; 190 } 191 } 192 } 193 } 194 195 // get display info from extra info 196 if (extra_info != null) 197 { 198 Document owner = info.getOwnerDocument(); 199 // so far we have index specific display elements, and global format elements 200 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM); 201 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM); 202 203 for (int i = 0; i < indexes.getLength(); i++) 204 { 205 Element ind = (Element) indexes.item(i); 206 String name = ind.getAttribute(GSXML.NAME_ATT); 207 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name); 208 if (node_extra == null) 209 { 210 logger.error("haven't found extra info for index named " + name); 211 continue; 212 } 213 214 // get the display elements if any - displayName 215 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM); 216 if (display_names != null) 217 { 218 for (int j = 0; j < display_names.getLength(); j++) 219 { 220 Element e = (Element) display_names.item(j); 221 ind.appendChild(owner.importNode(e, true)); 222 } 223 } 224 } // for each index 225 } 226 return true; 227 } 228 229 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) 230 { 231 // the index info - 232 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER); 233 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM); 234 int len = indexes.getLength(); 235 // now add even if there is only one 236 for (int i = 0; i < len; i++) 237 { 238 Element index = (Element) indexes.item(i); 239 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT); 240 if (shortname.equals("")) 241 { 242 continue; 243 } 244 index_ids.add(shortname); 245 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 246 if (display_name.equals("")) 247 { 248 display_name = index.getAttribute(GSXML.NAME_ATT); 249 if (display_name.equals("")) 250 { 251 display_name = shortname; 252 } 253 } 254 index_names.add(display_name); 255 } 256 } 257 258 protected void getIndexSubcollectionData(ArrayList index_sub_ids, ArrayList index_sub_names, String lang) 259 { 260 // the index info - 261 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER); 262 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); 263 int len = index_subs.getLength(); 264 // now add even if there is only one 265 for (int i = 0; i < len; i++) 266 { 267 Element indexsub = (Element) index_subs.item(i); 268 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT); 269 if (shortname.equals("")) 270 { 271 continue; 272 } 273 index_sub_ids.add(shortname); 274 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 275 if (display_name.equals("")) 276 { 277 display_name = indexsub.getAttribute(GSXML.NAME_ATT); 278 if (display_name.equals("")) 279 { 280 display_name = shortname; 281 } 282 } 283 index_sub_names.add(display_name); 284 } 285 } 286 287 protected void getIndexLanguageData(ArrayList index_lang_ids, ArrayList index_lang_names, String lang) 288 { 289 // the index info - 290 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER); 291 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM); 292 int len = index_langs.getLength(); 293 // now add even if there is only one 294 for (int i = 0; i < len; i++) 295 { 296 Element indexlang = (Element) index_langs.item(i); 297 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT); 298 if (shortname.equals("")) 299 { 300 continue; 301 } 302 index_lang_ids.add(shortname); 303 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en"); 304 if (display_name.equals("")) 305 { 306 display_name = indexlang.getAttribute(GSXML.NAME_ATT); 307 if (display_name.equals("")) 308 { 309 display_name = shortname; 310 } 311 } 312 index_lang_names.add(display_name); 313 } 314 315 } 316 317 protected void addCustomQueryParams(Element param_list, String lang) 318 { 319 if (this.does_case) 320 { 321 // gs2 has case on by default 322 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON); 323 } 324 if (this.does_stem) 325 { 326 // but stem is off by default 327 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF); 328 } 329 if (this.does_accent) 330 { 331 // and so is accent folding 332 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF); 333 } 334 createParameter(MATCH_PARAM, param_list, lang); 335 } 336 337 /** convert indexer internal id to Greenstone oid */ 338 protected String internalNum2OID(long docnum) 339 { 340 return this.gs_doc_db.internalNum2OID(docnum); 341 } 342 343 protected String internalNum2OID(String docnum) 344 { 345 return this.gs_doc_db.internalNum2OID(docnum); 346 } 321 347 322 348 } 323 324
Note:
See TracChangeset
for help on using the changeset viewer.