source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 26046

Last change on this file since 26046 was 26046, checked in by kjdon, 12 years ago

moved a heap of duplicated code out of service racks and into BasicDocument classes

File size: 10.6 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20import java.util.ArrayList;
21
22import org.apache.log4j.Logger;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.BasicDocumentDatabase;
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29public abstract class AbstractGS2TextSearch extends AbstractTextSearch
30{
31 protected static final String EQUIV_TERM_ELEM = "equivTerm";
32
33 protected static final String STEM_ATT = "stem";
34 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
35 protected static final String FREQ_ATT = "freq";
36
37 // Elements used in the config file that are specific to this class
38 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
39 protected static final String INDEX_STEM_ELEM = "indexStem";
40 protected static final String INDEX_ELEM = "index";
41 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
42 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
43 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
44 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
45
46 // Some indexing options
47 protected static final String STEMINDEX_OPTION = "stemIndexes";
48 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
49
50 /** the stem used for the index files */
51 protected String index_stem = null;
52
53 // stem indexes available
54 protected boolean does_case = true;
55 protected boolean does_stem = true;
56 protected boolean does_accent = false;
57
58 // maxnumeric -
59 protected int maxnumeric = 4;
60
61 BasicDocumentDatabase gs_doc_db = null;
62
63 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
64
65 /** constructor */
66 public AbstractGS2TextSearch()
67 {
68
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.gs_doc_db.cleanUp();
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // find out what kind of database we have
86 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
87 String database_type = null;
88 if (database_type_elem != null)
89 {
90 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (database_type == null || database_type.equals(""))
93 {
94 database_type = "gdbm"; // the default
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null)
100 {
101 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (this.index_stem == null || this.index_stem.equals(""))
104 {
105 logger.warn("indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // replaces default AbstractSearch version with one tied to database
110 gs_doc_db = new BasicDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem);
111 if (!gs_doc_db.isValid())
112 {
113 logger.error("Failed to open Document Database.");
114 return false;
115 }
116 this.gs_doc = gs_doc_db;
117
118 // do we support any of the extended features?
119 does_chunking = true;
120
121 // Get the default index out of <defaultIndex> (buildConfig.xml)
122 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
123 if (def != null)
124 {
125 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
126 } // otherwise will be "", and the first one will be the default
127
128 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
129 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
130 if (defSub != null)
131 {
132 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
133 }
134
135 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
136 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
137 if (defLang != null)
138 {
139 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
140 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
141
142 // get index options
143 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
144 if (index_option_list != null)
145 {
146 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
147 for (int i = 0; i < options.getLength(); i++)
148 {
149 Element opt = (Element) options.item(i);
150 String name = opt.getAttribute(GSXML.NAME_ATT);
151 String value = opt.getAttribute(GSXML.VALUE_ATT);
152 if (name.equals(MAXNUMERIC_OPTION))
153 {
154 int maxnum = Integer.parseInt(value);
155 if (4 <= maxnum && maxnum < 512)
156 {
157 maxnumeric = maxnum;
158 }
159 }
160 else if (name.equals(STEMINDEX_OPTION))
161 {
162 int stemindex = Integer.parseInt(value);
163 // stem and case are true by default, accent folding false by default
164 if ((stemindex & 1) == 0)
165 {
166 does_case = false;
167 }
168 if ((stemindex & 2) == 0)
169 {
170 does_stem = false;
171 }
172 if ((stemindex & 4) != 0)
173 {
174 does_accent = true;
175 }
176 }
177 }
178 }
179
180 // get display info from extra info
181 if (extra_info != null)
182 {
183 Document owner = info.getOwnerDocument();
184 // so far we have index specific display elements, and global format elements
185 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
186 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
187
188 for (int i = 0; i < indexes.getLength(); i++)
189 {
190 Element ind = (Element) indexes.item(i);
191 String name = ind.getAttribute(GSXML.NAME_ATT);
192 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
193 if (node_extra == null)
194 {
195 logger.error("haven't found extra info for index named " + name);
196 continue;
197 }
198
199 // get the display elements if any - displayName
200 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
201 if (display_names != null)
202 {
203 for (int j = 0; j < display_names.getLength(); j++)
204 {
205 Element e = (Element) display_names.item(j);
206 ind.appendChild(owner.importNode(e, true));
207 }
208 }
209 } // for each index
210 }
211 return true;
212 }
213
214 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
215 {
216 // the index info -
217 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
218 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
219 int len = indexes.getLength();
220 // now add even if there is only one
221 for (int i = 0; i < len; i++)
222 {
223 Element index = (Element) indexes.item(i);
224 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
225 if (shortname.equals(""))
226 {
227 continue;
228 }
229 index_ids.add(shortname);
230 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
231 if (display_name.equals(""))
232 {
233 display_name = index.getAttribute(GSXML.NAME_ATT);
234 if (display_name.equals(""))
235 {
236 display_name = shortname;
237 }
238 }
239 index_names.add(display_name);
240 }
241 }
242
243 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
244 {
245 // the index info -
246 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
247 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
248 int len = index_subs.getLength();
249 // now add even if there is only one
250 for (int i = 0; i < len; i++)
251 {
252 Element indexsub = (Element) index_subs.item(i);
253 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
254 if (shortname.equals(""))
255 {
256 continue;
257 }
258 index_sub_ids.add(shortname);
259 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
260 if (display_name.equals(""))
261 {
262 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
263 if (display_name.equals(""))
264 {
265 display_name = shortname;
266 }
267 }
268 index_sub_names.add(display_name);
269 }
270 }
271
272 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
273 {
274 // the index info -
275 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
276 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
277 int len = index_langs.getLength();
278 // now add even if there is only one
279 for (int i = 0; i < len; i++)
280 {
281 Element indexlang = (Element) index_langs.item(i);
282 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
283 if (shortname.equals(""))
284 {
285 continue;
286 }
287 index_lang_ids.add(shortname);
288 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
289 if (display_name.equals(""))
290 {
291 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
292 if (display_name.equals(""))
293 {
294 display_name = shortname;
295 }
296 }
297 index_lang_names.add(display_name);
298 }
299
300 }
301
302 protected void addCustomQueryParams(Element param_list, String lang)
303 {
304 if (this.does_case)
305 {
306 // gs2 has case on by default
307 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON);
308 }
309 if (this.does_stem)
310 {
311 // but stem is off by default
312 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
313 }
314 if (this.does_accent)
315 {
316 // and so is accent folding
317 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
318 }
319 createParameter(MATCH_PARAM, param_list, lang);
320 }
321
322 /** convert indexer internal id to Greenstone oid */
323 protected String internalNum2OID(long docnum)
324 {
325 return this.gs_doc_db.internalNum2OID(docnum);
326 }
327
328 protected String internalNum2OID(String docnum)
329 {
330 return this.gs_doc_db.internalNum2OID(docnum);
331 }
332
333}
Note: See TracBrowser for help on using the repository browser.