source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 25851

Last change on this file since 25851 was 25851, checked in by sjm84, 12 years ago

Some minor tidying

File size: 10.7 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import java.util.ArrayList;
22
23import org.apache.log4j.Logger;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.SimpleDocumentDatabase;
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30public abstract class AbstractGS2TextSearch extends AbstractTextSearch
31{
32 protected static final String EQUIV_TERM_ELEM = "equivTerm";
33
34 protected static final String STEM_ATT = "stem";
35 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
36 protected static final String FREQ_ATT = "freq";
37
38 // Elements used in the config file that are specific to this class
39 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
40 protected static final String INDEX_STEM_ELEM = "indexStem";
41 protected static final String INDEX_ELEM = "index";
42 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
43 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
44 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
45 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
46
47 // Some indexing options
48 protected static final String STEMINDEX_OPTION = "stemIndexes";
49 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
50
51 /** the stem used for the index files */
52 protected String index_stem = null;
53
54 // stem indexes available
55 protected boolean does_case = true;
56 protected boolean does_stem = true;
57 protected boolean does_accent = false;
58
59 // maxnumeric -
60 protected int maxnumeric = 4;
61
62 SimpleDocumentDatabase gs_doc_db = null;
63
64 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
65
66 /** constructor */
67 public AbstractGS2TextSearch()
68 {
69
70 }
71
72 public void cleanUp()
73 {
74 super.cleanUp();
75 this.gs_doc_db.cleanUp();
76 }
77
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info))
82 {
83 return false;
84 }
85
86 // find out what kind of database we have
87 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
88 String database_type = null;
89 if (database_type_elem != null)
90 {
91 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
92 }
93 if (database_type == null || database_type.equals(""))
94 {
95 database_type = "gdbm"; // the default
96 }
97
98 // the index stem is either the collection name or is specified in the config file
99 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
100 if (index_stem_elem != null)
101 {
102 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
103 }
104 if (this.index_stem == null || this.index_stem.equals(""))
105 {
106 logger.warn("indexStem element not found, stem will default to collection name");
107 this.index_stem = this.cluster_name;
108 }
109
110 // replaces default AbstractSearch version with one tied to database
111 gs_doc_db = new SimpleDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem);
112 if (!gs_doc_db.isValid())
113 {
114 logger.error("Failed to open Document Database.");
115 return false;
116 }
117 this.gs_doc = gs_doc_db;
118
119 // do we support any of the extended features?
120 does_chunking = true;
121
122 // Get the default index out of <defaultIndex> (buildConfig.xml)
123 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
124 if (def != null)
125 {
126 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
127 } // otherwise will be "", and the first one will be the default
128
129 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
130 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
131 if (defSub != null)
132 {
133 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
134 }
135
136 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
137 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
138 if (defLang != null)
139 {
140 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
141 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
142
143 // get index options
144 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
145 if (index_option_list != null)
146 {
147 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
148 for (int i = 0; i < options.getLength(); i++)
149 {
150 Element opt = (Element) options.item(i);
151 String name = opt.getAttribute(GSXML.NAME_ATT);
152 String value = opt.getAttribute(GSXML.VALUE_ATT);
153 if (name.equals(MAXNUMERIC_OPTION))
154 {
155 int maxnum = Integer.parseInt(value);
156 if (4 <= maxnum && maxnum < 512)
157 {
158 maxnumeric = maxnum;
159 }
160 }
161 else if (name.equals(STEMINDEX_OPTION))
162 {
163 int stemindex = Integer.parseInt(value);
164 // stem and case are true by default, accent folding false by default
165 if ((stemindex & 1) == 0)
166 {
167 does_case = false;
168 }
169 if ((stemindex & 2) == 0)
170 {
171 does_stem = false;
172 }
173 if ((stemindex & 4) != 0)
174 {
175 does_accent = true;
176 }
177 }
178 }
179 }
180
181 // get display info from extra info
182 if (extra_info != null)
183 {
184 Document owner = info.getOwnerDocument();
185 // so far we have index specific display elements, and global format elements
186 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
187 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
188
189 for (int i = 0; i < indexes.getLength(); i++)
190 {
191 Element ind = (Element) indexes.item(i);
192 String name = ind.getAttribute(GSXML.NAME_ATT);
193 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
194 if (node_extra == null)
195 {
196 logger.error("haven't found extra info for index named " + name);
197 continue;
198 }
199
200 // get the display elements if any - displayName
201 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
202 if (display_names != null)
203 {
204 for (int j = 0; j < display_names.getLength(); j++)
205 {
206 Element e = (Element) display_names.item(j);
207 ind.appendChild(owner.importNode(e, true));
208 }
209 }
210 } // for each index
211 }
212 return true;
213 }
214
215 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
216 {
217 // the index info -
218 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
219 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
220 int len = indexes.getLength();
221 // now add even if there is only one
222 for (int i = 0; i < len; i++)
223 {
224 Element index = (Element) indexes.item(i);
225 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
226 if (shortname.equals(""))
227 {
228 continue;
229 }
230 index_ids.add(shortname);
231 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
232 if (display_name.equals(""))
233 {
234 display_name = index.getAttribute(GSXML.NAME_ATT);
235 if (display_name.equals(""))
236 {
237 display_name = shortname;
238 }
239 }
240 index_names.add(display_name);
241 }
242 }
243
244 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
245 {
246 // the index info -
247 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
248 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
249 int len = index_subs.getLength();
250 // now add even if there is only one
251 for (int i = 0; i < len; i++)
252 {
253 Element indexsub = (Element) index_subs.item(i);
254 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
255 if (shortname.equals(""))
256 {
257 continue;
258 }
259 index_sub_ids.add(shortname);
260 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
261 if (display_name.equals(""))
262 {
263 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
264 if (display_name.equals(""))
265 {
266 display_name = shortname;
267 }
268 }
269 index_sub_names.add(display_name);
270 }
271 }
272
273 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
274 {
275 // the index info -
276 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
277 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
278 int len = index_langs.getLength();
279 // now add even if there is only one
280 for (int i = 0; i < len; i++)
281 {
282 Element indexlang = (Element) index_langs.item(i);
283 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
284 if (shortname.equals(""))
285 {
286 continue;
287 }
288 index_lang_ids.add(shortname);
289 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
290 if (display_name.equals(""))
291 {
292 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
293 if (display_name.equals(""))
294 {
295 display_name = shortname;
296 }
297 }
298 index_lang_names.add(display_name);
299 }
300
301 }
302
303 protected void addCustomQueryParams(Element param_list, String lang)
304 {
305 if (this.does_case)
306 {
307 // gs2 has case on by default
308 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON);
309 }
310 if (this.does_stem)
311 {
312 // but stem is off by default
313 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
314 }
315 if (this.does_accent)
316 {
317 // and so is accent folding
318 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
319 }
320 createParameter(MATCH_PARAM, param_list, lang);
321 }
322
323 /** convert indexer internal id to Greenstone oid */
324 protected String internalNum2OID(long docnum)
325 {
326 return this.gs_doc_db.internalNum2OID(docnum);
327 }
328
329 protected String internalNum2OID(String docnum)
330 {
331 return this.gs_doc_db.internalNum2OID(docnum);
332 }
333
334}
Note: See TracBrowser for help on using the repository browser.