source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 30634

Last change on this file since 30634 was 30634, checked in by kjdon, 8 years ago

modifying the way levels, indexes, classifier buttons are displayed. displayItem with specific lang takes priority, then displayItem with a key for dictionary lookup, then if no displayItems lookup the level/index name in hte dictionary

File size: 12.3 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20import java.util.ArrayList;
21
22import org.apache.log4j.Logger;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.BasicDocumentDatabase;
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29public abstract class AbstractGS2TextSearch extends AbstractTextSearch
30{
31 protected static final String EQUIV_TERM_ELEM = "equivTerm";
32
33 protected static final String STEM_ATT = "stem";
34 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
35 protected static final String FREQ_ATT = "freq";
36
37 // Elements used in the config file that are specific to this class
38 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
39 protected static final String INDEX_STEM_ELEM = "indexStem";
40 protected static final String INDEX_ELEM = "index";
41 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
42 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
43 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
44 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
45
46 // Some indexing options
47 protected static final String STEMINDEX_OPTION = "stemIndexes";
48 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
49
50 /** the stem used for the index files */
51 protected String index_stem = null;
52
53 // stem indexes available
54 protected boolean does_case = false;
55 protected boolean does_stem = false;
56 protected boolean does_accent = false;
57
58 // maxnumeric -
59 protected int maxnumeric = 4;
60
61 BasicDocumentDatabase gs_doc_db = null;
62
63 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
64
65 /** constructor */
66 public AbstractGS2TextSearch()
67 {
68
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.gs_doc_db.cleanUp();
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // find out what kind of database we have
86 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
87 String database_type = null;
88 if (database_type_elem != null)
89 {
90 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (database_type == null || database_type.equals(""))
93 {
94 database_type = "gdbm"; // the default
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null)
100 {
101 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (this.index_stem == null || this.index_stem.equals(""))
104 {
105 logger.warn("indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // replaces default AbstractSearch version with one tied to database
110 gs_doc_db = new BasicDocumentDatabase(database_type, this.site_home, this.cluster_name, this.index_stem);
111 if (!gs_doc_db.isValid())
112 {
113 logger.error("Failed to open Document Database.");
114 return false;
115 }
116 this.gs_doc = gs_doc_db;
117
118 // do we support any of the extended features?
119 //does_chunking = true;
120
121 // Get the default index out of <defaultIndex> (buildConfig.xml)
122 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
123 if (def != null)
124 {
125 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
126 } // otherwise will be "", and the first one will be the default
127
128 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
129 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
130 if (defSub != null)
131 {
132 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
133 }
134
135 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
136 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
137 if (defLang != null)
138 {
139 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
140 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
141
142 // get index options
143 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
144 if (index_option_list != null)
145 {
146 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
147 for (int i = 0; i < options.getLength(); i++)
148 {
149 Element opt = (Element) options.item(i);
150 String name = opt.getAttribute(GSXML.NAME_ATT);
151 String value = opt.getAttribute(GSXML.VALUE_ATT);
152 if (name.equals(MAXNUMERIC_OPTION))
153 {
154 int maxnum = Integer.parseInt(value);
155 if (4 <= maxnum && maxnum < 512)
156 {
157 maxnumeric = maxnum;
158 }
159 }
160 else if (name.equals(STEMINDEX_OPTION))
161 {
162 int stemindex = Integer.parseInt(value);
163 if ((stemindex & 1) != 0)
164 {
165 does_case = true;
166 }
167 if ((stemindex & 2) != 0)
168 {
169 does_stem = true;
170 }
171 if ((stemindex & 4) != 0)
172 {
173 does_accent = true;
174 }
175 }
176 }
177 }
178
179 // get display info from extra info
180 if (extra_info != null)
181 {
182 Document owner = info.getOwnerDocument();
183 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
184
185 // so far we have index and indexSubcollection specific display elements, and global format elements
186
187 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
188 for (int i = 0; i < indexes.getLength(); i++)
189 {
190 Element ind = (Element) indexes.item(i);
191 String name = ind.getAttribute(GSXML.NAME_ATT);
192 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
193 if (node_extra == null)
194 {
195 logger.error("haven't found extra info for index named " + name);
196 continue;
197 }
198
199 // get the display elements if any - displayName
200 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
201 if (display_names != null)
202 {
203 for (int j = 0; j < display_names.getLength(); j++)
204 {
205 Element e = (Element) display_names.item(j);
206 ind.appendChild(owner.importNode(e, true));
207 }
208 }
209 } // for each index
210
211 NodeList indexSubcollections = info.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); // buildConfig.xml
212
213 for (int i = 0; i < indexSubcollections.getLength(); i++)
214 {
215 Element indexSubcollection = (Element) indexSubcollections.item(i);
216 String name = indexSubcollection.getAttribute(GSXML.NAME_ATT);
217 Element node_extra = GSXML.getNamedElement(config_search, INDEX_SUBCOLLECTION_ELEM, GSXML.NAME_ATT, name); // collectionConfig.xml
218 if (node_extra == null)
219 {
220 logger.error("haven't found extra info for indexSubCollection named " + name);
221 continue;
222 }
223
224 // get the display elements if any - displayName
225 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
226 if (display_names != null)
227 {
228 for (int j = 0; j < display_names.getLength(); j++)
229 {
230 Element e = (Element) display_names.item(j);
231 indexSubcollection.appendChild(owner.importNode(e, true));
232 }
233 }
234 } // for each indexSubCollection
235 }
236 return true;
237 }
238
239 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
240 {
241 // the index info -
242 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
243 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
244 int len = indexes.getLength();
245 // now add even if there is only one
246 for (int i = 0; i < len; i++)
247 {
248 Element index = (Element) indexes.item(i);
249 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
250 if (shortname.equals(""))
251 {
252 continue;
253 }
254 index_ids.add(shortname);
255 String display_name = getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
256 if (display_name.equals(""))
257 {
258 display_name = index.getAttribute(GSXML.NAME_ATT);
259 if (display_name.equals(""))
260 {
261 display_name = shortname;
262 }
263 String d_name = getTextString(display_name+".buttonname", lang, "metadata_names");
264 if (d_name == null) {
265 d_name = getTextString(cleanUpMetadata(display_name)+".buttonname", lang, "metadata_names");
266 }
267 if (d_name == null) {
268 d_name = getTextString(display_name, lang, "metadata_names");
269 }
270 if (d_name != null) {
271
272 display_name = d_name;
273 }
274 }
275 index_names.add(display_name);
276 }
277 }
278
279 protected String cleanUpMetadata(String meta) {
280 // remove namespace, and only take the first item if there is a list of them.
281
282 logger.error("clean up "+meta);
283 String[] parts = meta.split("[,;]");
284 String cleaned = parts[0];
285 if (cleaned.lastIndexOf('.') != -1) {
286 cleaned = cleaned.substring(cleaned.lastIndexOf('.')+1);
287 }
288 logger.error("returning "+cleaned);
289 return cleaned;
290
291 }
292 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
293 {
294 // the index info -
295 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
296 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
297 int len = index_subs.getLength();
298 // now add even if there is only one
299 for (int i = 0; i < len; i++)
300 {
301 Element indexsub = (Element) index_subs.item(i);
302 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
303 if (shortname.equals(""))
304 {
305 continue;
306 }
307 index_sub_ids.add(shortname);
308 String display_name = getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
309 if (display_name.equals(""))
310 {
311 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
312 if (display_name.equals(""))
313 {
314 display_name = shortname;
315 }
316 }
317 index_sub_names.add(display_name);
318 }
319 }
320
321 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
322 {
323 // the index info -
324 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
325 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
326 int len = index_langs.getLength();
327 // now add even if there is only one
328 for (int i = 0; i < len; i++)
329 {
330 Element indexlang = (Element) index_langs.item(i);
331 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
332 if (shortname.equals(""))
333 {
334 continue;
335 }
336 index_lang_ids.add(shortname);
337 String display_name = getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
338 if (display_name.equals(""))
339 {
340 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
341 if (display_name.equals(""))
342 {
343 display_name = shortname;
344 }
345 }
346 index_lang_names.add(display_name);
347 }
348
349 }
350
351 protected void addCustomQueryParams(Element param_list, String lang)
352 {
353 if (this.does_case)
354 {
355 createParameter(CASE_PARAM, param_list, lang);//, case_default);
356 }
357 if (this.does_stem)
358 {
359 createParameter(STEM_PARAM, param_list, lang);//, stem_default);
360 }
361 if (this.does_accent)
362 {
363 createParameter(ACCENT_PARAM, param_list, lang);//, accent_default);
364 }
365 createParameter(MATCH_PARAM, param_list, lang);
366 }
367
368 /** convert indexer internal id to Greenstone oid */
369 protected String internalNum2OID(long docnum)
370 {
371 return this.gs_doc_db.internalNum2OID(docnum);
372 }
373
374 protected String internalNum2OID(String docnum)
375 {
376 return this.gs_doc_db.internalNum2OID(docnum);
377 }
378
379}
Note: See TracBrowser for help on using the repository browser.