source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 32453

Last change on this file since 32453 was 30668, checked in by kjdon, 8 years ago

removed some debug strings

File size: 12.2 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20import java.util.ArrayList;
21
22import org.apache.log4j.Logger;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.BasicDocumentDatabase;
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29public abstract class AbstractGS2TextSearch extends AbstractTextSearch
30{
31 protected static final String EQUIV_TERM_ELEM = "equivTerm";
32
33 protected static final String STEM_ATT = "stem";
34 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
35 protected static final String FREQ_ATT = "freq";
36
37 // Elements used in the config file that are specific to this class
38 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
39 protected static final String INDEX_STEM_ELEM = "indexStem";
40 protected static final String INDEX_ELEM = "index";
41 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
42 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
43 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
44 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
45
46 // Some indexing options
47 protected static final String STEMINDEX_OPTION = "stemIndexes";
48 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
49
50 /** the stem used for the index files */
51 protected String index_stem = null;
52
53 // stem indexes available
54 protected boolean does_case = false;
55 protected boolean does_stem = false;
56 protected boolean does_accent = false;
57
58 // maxnumeric -
59 protected int maxnumeric = 4;
60
61 BasicDocumentDatabase gs_doc_db = null;
62
63 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
64
65 /** constructor */
66 public AbstractGS2TextSearch()
67 {
68
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.gs_doc_db.cleanUp();
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // find out what kind of database we have
86 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
87 String database_type = null;
88 if (database_type_elem != null)
89 {
90 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (database_type == null || database_type.equals(""))
93 {
94 database_type = "gdbm"; // the default
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null)
100 {
101 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (this.index_stem == null || this.index_stem.equals(""))
104 {
105 logger.warn("indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // replaces default AbstractSearch version with one tied to database
110 gs_doc_db = new BasicDocumentDatabase(database_type, this.site_home, this.cluster_name, this.index_stem);
111 if (!gs_doc_db.isValid())
112 {
113 logger.error("Failed to open Document Database.");
114 return false;
115 }
116 this.gs_doc = gs_doc_db;
117
118 // do we support any of the extended features?
119 //does_chunking = true;
120
121 // Get the default index out of <defaultIndex> (buildConfig.xml)
122 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
123 if (def != null)
124 {
125 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
126 } // otherwise will be "", and the first one will be the default
127
128 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
129 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
130 if (defSub != null)
131 {
132 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
133 }
134
135 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
136 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
137 if (defLang != null)
138 {
139 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
140 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
141
142 // get index options
143 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
144 if (index_option_list != null)
145 {
146 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
147 for (int i = 0; i < options.getLength(); i++)
148 {
149 Element opt = (Element) options.item(i);
150 String name = opt.getAttribute(GSXML.NAME_ATT);
151 String value = opt.getAttribute(GSXML.VALUE_ATT);
152 if (name.equals(MAXNUMERIC_OPTION))
153 {
154 int maxnum = Integer.parseInt(value);
155 if (4 <= maxnum && maxnum < 512)
156 {
157 maxnumeric = maxnum;
158 }
159 }
160 else if (name.equals(STEMINDEX_OPTION))
161 {
162 int stemindex = Integer.parseInt(value);
163 if ((stemindex & 1) != 0)
164 {
165 does_case = true;
166 }
167 if ((stemindex & 2) != 0)
168 {
169 does_stem = true;
170 }
171 if ((stemindex & 4) != 0)
172 {
173 does_accent = true;
174 }
175 }
176 }
177 }
178
179 // get display info from extra info
180 if (extra_info != null)
181 {
182 Document owner = info.getOwnerDocument();
183 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
184
185 // so far we have index and indexSubcollection specific display elements, and global format elements
186
187 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
188 for (int i = 0; i < indexes.getLength(); i++)
189 {
190 Element ind = (Element) indexes.item(i);
191 String name = ind.getAttribute(GSXML.NAME_ATT);
192 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
193 if (node_extra == null)
194 {
195 logger.error("haven't found extra info for index named " + name);
196 continue;
197 }
198
199 // get the display elements if any - displayName
200 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
201 if (display_names != null)
202 {
203 for (int j = 0; j < display_names.getLength(); j++)
204 {
205 Element e = (Element) display_names.item(j);
206 ind.appendChild(owner.importNode(e, true));
207 }
208 }
209 } // for each index
210
211 NodeList indexSubcollections = info.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); // buildConfig.xml
212
213 for (int i = 0; i < indexSubcollections.getLength(); i++)
214 {
215 Element indexSubcollection = (Element) indexSubcollections.item(i);
216 String name = indexSubcollection.getAttribute(GSXML.NAME_ATT);
217 Element node_extra = GSXML.getNamedElement(config_search, INDEX_SUBCOLLECTION_ELEM, GSXML.NAME_ATT, name); // collectionConfig.xml
218 if (node_extra == null)
219 {
220 logger.error("haven't found extra info for indexSubCollection named " + name);
221 continue;
222 }
223
224 // get the display elements if any - displayName
225 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
226 if (display_names != null)
227 {
228 for (int j = 0; j < display_names.getLength(); j++)
229 {
230 Element e = (Element) display_names.item(j);
231 indexSubcollection.appendChild(owner.importNode(e, true));
232 }
233 }
234 } // for each indexSubCollection
235 }
236 return true;
237 }
238
239 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
240 {
241 // the index info -
242 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
243 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
244 int len = indexes.getLength();
245 // now add even if there is only one
246 for (int i = 0; i < len; i++)
247 {
248 Element index = (Element) indexes.item(i);
249 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
250 if (shortname.equals(""))
251 {
252 continue;
253 }
254 index_ids.add(shortname);
255 String display_name = getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
256 if (display_name.equals(""))
257 {
258 display_name = index.getAttribute(GSXML.NAME_ATT);
259 if (display_name.equals(""))
260 {
261 display_name = shortname;
262 }
263 String d_name = getTextString(display_name+".buttonname", lang, "metadata_names");
264 if (d_name == null) {
265 d_name = getTextString(cleanUpMetadata(display_name)+".buttonname", lang, "metadata_names");
266 }
267 if (d_name == null) {
268 d_name = getTextString(display_name, lang, "metadata_names");
269 }
270 if (d_name != null) {
271
272 display_name = d_name;
273 }
274 }
275 index_names.add(display_name);
276 }
277 }
278
279 protected String cleanUpMetadata(String meta) {
280 // remove namespace, and only take the first item if there is a list of them.
281 String[] parts = meta.split("[,;]");
282 String cleaned = parts[0];
283 if (cleaned.lastIndexOf('.') != -1) {
284 cleaned = cleaned.substring(cleaned.lastIndexOf('.')+1);
285 }
286 return cleaned;
287
288 }
289 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
290 {
291 // the index info -
292 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
293 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
294 int len = index_subs.getLength();
295 // now add even if there is only one
296 for (int i = 0; i < len; i++)
297 {
298 Element indexsub = (Element) index_subs.item(i);
299 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
300 if (shortname.equals(""))
301 {
302 continue;
303 }
304 index_sub_ids.add(shortname);
305 String display_name = getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
306 if (display_name.equals(""))
307 {
308 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
309 if (display_name.equals(""))
310 {
311 display_name = shortname;
312 }
313 }
314 index_sub_names.add(display_name);
315 }
316 }
317
318 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
319 {
320 // the index info -
321 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
322 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
323 int len = index_langs.getLength();
324 // now add even if there is only one
325 for (int i = 0; i < len; i++)
326 {
327 Element indexlang = (Element) index_langs.item(i);
328 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
329 if (shortname.equals(""))
330 {
331 continue;
332 }
333 index_lang_ids.add(shortname);
334 String display_name = getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
335 if (display_name.equals(""))
336 {
337 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
338 if (display_name.equals(""))
339 {
340 display_name = shortname;
341 }
342 }
343 index_lang_names.add(display_name);
344 }
345
346 }
347
348 protected void addCustomQueryParams(Element param_list, String lang)
349 {
350 if (this.does_case)
351 {
352 createParameter(CASE_PARAM, param_list, lang);//, case_default);
353 }
354 if (this.does_stem)
355 {
356 createParameter(STEM_PARAM, param_list, lang);//, stem_default);
357 }
358 if (this.does_accent)
359 {
360 createParameter(ACCENT_PARAM, param_list, lang);//, accent_default);
361 }
362 createParameter(MATCH_PARAM, param_list, lang);
363 }
364
365 /** convert indexer internal id to Greenstone oid */
366 protected String internalNum2OID(long docnum)
367 {
368 return this.gs_doc_db.internalNum2OID(docnum);
369 }
370
371 protected String internalNum2OID(String docnum)
372 {
373 return this.gs_doc_db.internalNum2OID(docnum);
374 }
375
376}
Note: See TracBrowser for help on using the repository browser.