source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

File size: 10.9 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import org.greenstone.gsdl3.util.OID;
22import org.greenstone.gsdl3.util.DBInfo;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.SimpleDocumentDatabase;
25import org.greenstone.gsdl3.util.GSFile;
26
27// XML classes
28import org.w3c.dom.Document;
29import org.w3c.dom.Element;
30import org.w3c.dom.NodeList;
31
32// java
33import java.util.Vector;
34import java.util.ArrayList;
35import java.util.HashMap;
36import java.util.Map;
37import java.util.Set;
38import java.util.Iterator;
39import java.io.File;
40
41import org.apache.log4j.*;
42
43public abstract class AbstractGS2TextSearch extends AbstractTextSearch
44{
45
46 protected static final String EQUIV_TERM_ELEM = "equivTerm";
47
48 protected static final String STEM_ATT = "stem";
49 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
50 protected static final String FREQ_ATT = "freq";
51
52 // Elements used in the config file that are specific to this class
53 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
54 protected static final String INDEX_STEM_ELEM = "indexStem";
55 protected static final String INDEX_ELEM = "index";
56 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
57 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
58 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
59 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
60
61 // Some indexing options
62 protected static final String STEMINDEX_OPTION = "stemIndexes";
63 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
64
65 /** the stem used for the index files */
66 protected String index_stem = null;
67
68 // stem indexes available
69 protected boolean does_case = true;
70 protected boolean does_stem = true;
71 protected boolean does_accent = false;
72
73 // maxnumeric -
74 protected int maxnumeric = 4;
75
76 SimpleDocumentDatabase gs_doc_db = null;
77
78 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
79
80 /** constructor */
81 public AbstractGS2TextSearch()
82 {
83
84 }
85
86 public void cleanUp()
87 {
88 super.cleanUp();
89 this.gs_doc_db.cleanUp();
90 }
91
92 /** configure this service */
93 public boolean configure(Element info, Element extra_info)
94 {
95 if (!super.configure(info, extra_info))
96 {
97 return false;
98 }
99
100 // find out what kind of database we have
101 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
102 String database_type = null;
103 if (database_type_elem != null)
104 {
105 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
106 }
107 if (database_type == null || database_type.equals(""))
108 {
109 database_type = "gdbm"; // the default
110 }
111
112 // the index stem is either the collection name or is specified in the config file
113 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
114 if (index_stem_elem != null)
115 {
116 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
117 }
118 if (this.index_stem == null || this.index_stem.equals(""))
119 {
120 logger.warn("indexStem element not found, stem will default to collection name");
121 this.index_stem = this.cluster_name;
122 }
123
124 // replaces default AbstractSearch version with one tied to database
125 gs_doc_db = new SimpleDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem);
126 if (!gs_doc_db.isValid())
127 {
128 logger.error("Failed to open Document Database.");
129 return false;
130 }
131 this.gs_doc = gs_doc_db;
132
133 // do we support any of the extended features?
134 does_chunking = true;
135
136 // Get the default index out of <defaultIndex> (buildConfig.xml)
137 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
138 if (def != null)
139 {
140 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
141 } // otherwise will be "", and the first one will be the default
142
143 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
144 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
145 if (defSub != null)
146 {
147 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
148 }
149
150 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
151 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
152 if (defLang != null)
153 {
154 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
155 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
156
157 // get index options
158 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
159 if (index_option_list != null)
160 {
161 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
162 for (int i = 0; i < options.getLength(); i++)
163 {
164 Element opt = (Element) options.item(i);
165 String name = opt.getAttribute(GSXML.NAME_ATT);
166 String value = opt.getAttribute(GSXML.VALUE_ATT);
167 if (name.equals(MAXNUMERIC_OPTION))
168 {
169 int maxnum = Integer.parseInt(value);
170 if (4 <= maxnum && maxnum < 512)
171 {
172 maxnumeric = maxnum;
173 }
174 }
175 else if (name.equals(STEMINDEX_OPTION))
176 {
177 int stemindex = Integer.parseInt(value);
178 // stem and case are true by default, accent folding false by default
179 if ((stemindex & 1) == 0)
180 {
181 does_case = false;
182 }
183 if ((stemindex & 2) == 0)
184 {
185 does_stem = false;
186 }
187 if ((stemindex & 4) != 0)
188 {
189 does_accent = true;
190 }
191 }
192 }
193 }
194
195 // get display info from extra info
196 if (extra_info != null)
197 {
198 Document owner = info.getOwnerDocument();
199 // so far we have index specific display elements, and global format elements
200 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
201 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
202
203 for (int i = 0; i < indexes.getLength(); i++)
204 {
205 Element ind = (Element) indexes.item(i);
206 String name = ind.getAttribute(GSXML.NAME_ATT);
207 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
208 if (node_extra == null)
209 {
210 logger.error("haven't found extra info for index named " + name);
211 continue;
212 }
213
214 // get the display elements if any - displayName
215 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
216 if (display_names != null)
217 {
218 for (int j = 0; j < display_names.getLength(); j++)
219 {
220 Element e = (Element) display_names.item(j);
221 ind.appendChild(owner.importNode(e, true));
222 }
223 }
224 } // for each index
225 }
226 return true;
227 }
228
229 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
230 {
231 // the index info -
232 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
233 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
234 int len = indexes.getLength();
235 // now add even if there is only one
236 for (int i = 0; i < len; i++)
237 {
238 Element index = (Element) indexes.item(i);
239 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
240 if (shortname.equals(""))
241 {
242 continue;
243 }
244 index_ids.add(shortname);
245 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
246 if (display_name.equals(""))
247 {
248 display_name = index.getAttribute(GSXML.NAME_ATT);
249 if (display_name.equals(""))
250 {
251 display_name = shortname;
252 }
253 }
254 index_names.add(display_name);
255 }
256 }
257
258 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
259 {
260 // the index info -
261 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
262 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
263 int len = index_subs.getLength();
264 // now add even if there is only one
265 for (int i = 0; i < len; i++)
266 {
267 Element indexsub = (Element) index_subs.item(i);
268 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
269 if (shortname.equals(""))
270 {
271 continue;
272 }
273 index_sub_ids.add(shortname);
274 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
275 if (display_name.equals(""))
276 {
277 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
278 if (display_name.equals(""))
279 {
280 display_name = shortname;
281 }
282 }
283 index_sub_names.add(display_name);
284 }
285 }
286
287 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
288 {
289 // the index info -
290 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
291 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
292 int len = index_langs.getLength();
293 // now add even if there is only one
294 for (int i = 0; i < len; i++)
295 {
296 Element indexlang = (Element) index_langs.item(i);
297 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
298 if (shortname.equals(""))
299 {
300 continue;
301 }
302 index_lang_ids.add(shortname);
303 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
304 if (display_name.equals(""))
305 {
306 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
307 if (display_name.equals(""))
308 {
309 display_name = shortname;
310 }
311 }
312 index_lang_names.add(display_name);
313 }
314
315 }
316
317 protected void addCustomQueryParams(Element param_list, String lang)
318 {
319 if (this.does_case)
320 {
321 // gs2 has case on by default
322 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON);
323 }
324 if (this.does_stem)
325 {
326 // but stem is off by default
327 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
328 }
329 if (this.does_accent)
330 {
331 // and so is accent folding
332 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
333 }
334 createParameter(MATCH_PARAM, param_list, lang);
335 }
336
337 /** convert indexer internal id to Greenstone oid */
338 protected String internalNum2OID(long docnum)
339 {
340 return this.gs_doc_db.internalNum2OID(docnum);
341 }
342
343 protected String internalNum2OID(String docnum)
344 {
345 return this.gs_doc_db.internalNum2OID(docnum);
346 }
347
348}
Note: See TracBrowser for help on using the repository browser.