source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 25852

Last change on this file since 25852 was 25851, checked in by sjm84, 12 years ago

Some minor tidying

File size: 10.7 KB
RevLine 
[24394]1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
[25851]21import java.util.ArrayList;
22
23import org.apache.log4j.Logger;
[24394]24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.SimpleDocumentDatabase;
26import org.w3c.dom.Document;
[24857]27import org.w3c.dom.Element;
[24394]28import org.w3c.dom.NodeList;
29
[24857]30public abstract class AbstractGS2TextSearch extends AbstractTextSearch
[24394]31{
[24857]32 protected static final String EQUIV_TERM_ELEM = "equivTerm";
[24394]33
[24857]34 protected static final String STEM_ATT = "stem";
35 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
36 protected static final String FREQ_ATT = "freq";
[24394]37
[24857]38 // Elements used in the config file that are specific to this class
39 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
40 protected static final String INDEX_STEM_ELEM = "indexStem";
41 protected static final String INDEX_ELEM = "index";
42 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
43 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
44 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
45 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
[24394]46
[24857]47 // Some indexing options
48 protected static final String STEMINDEX_OPTION = "stemIndexes";
49 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
[24394]50
[24857]51 /** the stem used for the index files */
52 protected String index_stem = null;
[24394]53
[24857]54 // stem indexes available
55 protected boolean does_case = true;
56 protected boolean does_stem = true;
57 protected boolean does_accent = false;
[24394]58
[24857]59 // maxnumeric -
60 protected int maxnumeric = 4;
[24394]61
[24857]62 SimpleDocumentDatabase gs_doc_db = null;
[24394]63
[24857]64 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
[24394]65
[24857]66 /** constructor */
67 public AbstractGS2TextSearch()
68 {
[24394]69
70 }
71
[24857]72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.gs_doc_db.cleanUp();
[24394]76 }
77
[24857]78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
[24394]85
[24857]86 // find out what kind of database we have
87 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
88 String database_type = null;
89 if (database_type_elem != null)
90 {
91 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
[24394]92 }
[24857]93 if (database_type == null || database_type.equals(""))
94 {
95 database_type = "gdbm"; // the default
[24394]96 }
97
[24857]98 // the index stem is either the collection name or is specified in the config file
99 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
100 if (index_stem_elem != null)
101 {
102 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
[24394]103 }
[24857]104 if (this.index_stem == null || this.index_stem.equals(""))
105 {
106 logger.warn("indexStem element not found, stem will default to collection name");
107 this.index_stem = this.cluster_name;
[24394]108 }
109
[24857]110 // replaces default AbstractSearch version with one tied to database
111 gs_doc_db = new SimpleDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem);
112 if (!gs_doc_db.isValid())
113 {
114 logger.error("Failed to open Document Database.");
115 return false;
[24394]116 }
[24857]117 this.gs_doc = gs_doc_db;
[24394]118
[24857]119 // do we support any of the extended features?
120 does_chunking = true;
121
122 // Get the default index out of <defaultIndex> (buildConfig.xml)
123 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
124 if (def != null)
125 {
126 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
127 } // otherwise will be "", and the first one will be the default
128
129 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
130 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
131 if (defSub != null)
132 {
133 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
[24394]134 }
[24857]135
136 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
137 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
138 if (defLang != null)
139 {
140 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
141 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
142
143 // get index options
144 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
145 if (index_option_list != null)
146 {
147 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
148 for (int i = 0; i < options.getLength(); i++)
149 {
150 Element opt = (Element) options.item(i);
151 String name = opt.getAttribute(GSXML.NAME_ATT);
152 String value = opt.getAttribute(GSXML.VALUE_ATT);
153 if (name.equals(MAXNUMERIC_OPTION))
154 {
155 int maxnum = Integer.parseInt(value);
156 if (4 <= maxnum && maxnum < 512)
157 {
158 maxnumeric = maxnum;
159 }
160 }
161 else if (name.equals(STEMINDEX_OPTION))
162 {
163 int stemindex = Integer.parseInt(value);
164 // stem and case are true by default, accent folding false by default
165 if ((stemindex & 1) == 0)
166 {
167 does_case = false;
168 }
169 if ((stemindex & 2) == 0)
170 {
171 does_stem = false;
172 }
173 if ((stemindex & 4) != 0)
174 {
175 does_accent = true;
176 }
177 }
178 }
[24394]179 }
180
[24857]181 // get display info from extra info
182 if (extra_info != null)
183 {
184 Document owner = info.getOwnerDocument();
185 // so far we have index specific display elements, and global format elements
186 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
187 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
[24394]188
[24857]189 for (int i = 0; i < indexes.getLength(); i++)
190 {
191 Element ind = (Element) indexes.item(i);
192 String name = ind.getAttribute(GSXML.NAME_ATT);
193 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
194 if (node_extra == null)
195 {
196 logger.error("haven't found extra info for index named " + name);
197 continue;
198 }
[24394]199
[24857]200 // get the display elements if any - displayName
201 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
202 if (display_names != null)
203 {
204 for (int j = 0; j < display_names.getLength(); j++)
205 {
206 Element e = (Element) display_names.item(j);
207 ind.appendChild(owner.importNode(e, true));
208 }
209 }
210 } // for each index
211 }
212 return true;
[24394]213 }
[24857]214
[25635]215 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
[24857]216 {
217 // the index info -
218 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
219 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
220 int len = indexes.getLength();
221 // now add even if there is only one
222 for (int i = 0; i < len; i++)
223 {
224 Element index = (Element) indexes.item(i);
225 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
226 if (shortname.equals(""))
227 {
228 continue;
229 }
230 index_ids.add(shortname);
231 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
232 if (display_name.equals(""))
233 {
234 display_name = index.getAttribute(GSXML.NAME_ATT);
235 if (display_name.equals(""))
236 {
237 display_name = shortname;
238 }
239 }
240 index_names.add(display_name);
241 }
[24394]242 }
[24857]243
[25635]244 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
[24857]245 {
246 // the index info -
247 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
248 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
249 int len = index_subs.getLength();
250 // now add even if there is only one
251 for (int i = 0; i < len; i++)
252 {
253 Element indexsub = (Element) index_subs.item(i);
254 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
255 if (shortname.equals(""))
256 {
257 continue;
258 }
259 index_sub_ids.add(shortname);
260 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
261 if (display_name.equals(""))
262 {
263 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
264 if (display_name.equals(""))
265 {
266 display_name = shortname;
267 }
268 }
269 index_sub_names.add(display_name);
270 }
[24394]271 }
272
[25635]273 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
[24857]274 {
275 // the index info -
276 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
277 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
278 int len = index_langs.getLength();
279 // now add even if there is only one
280 for (int i = 0; i < len; i++)
281 {
282 Element indexlang = (Element) index_langs.item(i);
283 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
284 if (shortname.equals(""))
285 {
286 continue;
287 }
288 index_lang_ids.add(shortname);
289 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
290 if (display_name.equals(""))
291 {
292 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
293 if (display_name.equals(""))
294 {
295 display_name = shortname;
296 }
297 }
298 index_lang_names.add(display_name);
299 }
[24394]300
[24857]301 }
[24394]302
[24857]303 protected void addCustomQueryParams(Element param_list, String lang)
304 {
305 if (this.does_case)
306 {
307 // gs2 has case on by default
308 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON);
309 }
310 if (this.does_stem)
311 {
312 // but stem is off by default
313 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
314 }
315 if (this.does_accent)
316 {
317 // and so is accent folding
318 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
319 }
320 createParameter(MATCH_PARAM, param_list, lang);
321 }
[24394]322
[24857]323 /** convert indexer internal id to Greenstone oid */
324 protected String internalNum2OID(long docnum)
325 {
326 return this.gs_doc_db.internalNum2OID(docnum);
327 }
[24394]328
[24857]329 protected String internalNum2OID(String docnum)
330 {
331 return this.gs_doc_db.internalNum2OID(docnum);
332 }
333
334}
Note: See TracBrowser for help on using the repository browser.