source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 33308

Last change on this file since 33308 was 33308, checked in by kjdon, 5 years ago

now you can set hidden='true' to an index element in collectionConfig.xml. The index will be built, but it won't be sent back as an option for the index param in a search page. use for eg map data indexes that you don't need to display to the user

File size: 12.4 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20import java.util.ArrayList;
21
22import org.apache.log4j.Logger;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.BasicDocumentDatabase;
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29public abstract class AbstractGS2TextSearch extends AbstractTextSearch
30{
31 protected static final String EQUIV_TERM_ELEM = "equivTerm";
32
33 protected static final String STEM_ATT = "stem";
34 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
35 protected static final String FREQ_ATT = "freq";
36
37 // Elements used in the config file that are specific to this class
38 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
39 protected static final String INDEX_STEM_ELEM = "indexStem";
40 protected static final String INDEX_ELEM = "index";
41 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
42 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
43 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
44 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
45
46 // Some indexing options
47 protected static final String STEMINDEX_OPTION = "stemIndexes";
48 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
49
50 /** the stem used for the index files */
51 protected String index_stem = null;
52
53 // stem indexes available
54 protected boolean does_case = false;
55 protected boolean does_stem = false;
56 protected boolean does_accent = false;
57
58 // maxnumeric -
59 protected int maxnumeric = 4;
60
61 BasicDocumentDatabase gs_doc_db = null;
62
63 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
64
65 /** constructor */
66 public AbstractGS2TextSearch()
67 {
68
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.gs_doc_db.cleanUp();
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // find out what kind of database we have
86 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
87 String database_type = null;
88 if (database_type_elem != null)
89 {
90 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (database_type == null || database_type.equals(""))
93 {
94 database_type = "gdbm"; // the default
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null)
100 {
101 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (this.index_stem == null || this.index_stem.equals(""))
104 {
105 logger.warn("indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // replaces default AbstractSearch version with one tied to database
110 gs_doc_db = new BasicDocumentDatabase(database_type, this.site_home, this.cluster_name, this.index_stem);
111 if (!gs_doc_db.isValid())
112 {
113 logger.error("Failed to open Document Database.");
114 return false;
115 }
116 this.gs_doc = gs_doc_db;
117
118 // do we support any of the extended features?
119 //does_chunking = true;
120
121 // Get the default index out of <defaultIndex> (buildConfig.xml)
122 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
123 if (def != null)
124 {
125 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
126 } // otherwise will be "", and the first one will be the default
127
128 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
129 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
130 if (defSub != null)
131 {
132 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
133 }
134
135 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
136 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
137 if (defLang != null)
138 {
139 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
140 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
141
142 // get index options
143 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
144 if (index_option_list != null)
145 {
146 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
147 for (int i = 0; i < options.getLength(); i++)
148 {
149 Element opt = (Element) options.item(i);
150 String name = opt.getAttribute(GSXML.NAME_ATT);
151 String value = opt.getAttribute(GSXML.VALUE_ATT);
152 if (name.equals(MAXNUMERIC_OPTION))
153 {
154 int maxnum = Integer.parseInt(value);
155 if (4 <= maxnum && maxnum < 512)
156 {
157 maxnumeric = maxnum;
158 }
159 }
160 else if (name.equals(STEMINDEX_OPTION))
161 {
162 int stemindex = Integer.parseInt(value);
163 if ((stemindex & 1) != 0)
164 {
165 does_case = true;
166 }
167 if ((stemindex & 2) != 0)
168 {
169 does_stem = true;
170 }
171 if ((stemindex & 4) != 0)
172 {
173 does_accent = true;
174 }
175 }
176 }
177 }
178
179 // get display info from extra info
180 if (extra_info != null)
181 {
182 Document owner = info.getOwnerDocument();
183 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
184
185 // so far we have index and indexSubcollection specific display elements, and global format elements
186
187 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
188 for (int i = 0; i < indexes.getLength(); i++)
189 {
190 Element ind = (Element) indexes.item(i);
191 String name = ind.getAttribute(GSXML.NAME_ATT);
192 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
193 if (node_extra == null)
194 {
195 logger.error("haven't found extra info for index named " + name);
196 continue;
197 }
198 if (node_extra.getAttribute("hidden").equals("true")) {
199 ind.setAttribute("hidden", "true");
200 } else {
201 // get the display elements if any - displayName
202 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
203 if (display_names != null)
204 {
205 for (int j = 0; j < display_names.getLength(); j++)
206 {
207 Element e = (Element) display_names.item(j);
208 ind.appendChild(owner.importNode(e, true));
209 }
210 }
211 }
212 } // for each index
213
214 NodeList indexSubcollections = info.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); // buildConfig.xml
215
216 for (int i = 0; i < indexSubcollections.getLength(); i++)
217 {
218 Element indexSubcollection = (Element) indexSubcollections.item(i);
219 String name = indexSubcollection.getAttribute(GSXML.NAME_ATT);
220 Element node_extra = GSXML.getNamedElement(config_search, INDEX_SUBCOLLECTION_ELEM, GSXML.NAME_ATT, name); // collectionConfig.xml
221 if (node_extra == null)
222 {
223 logger.error("haven't found extra info for indexSubCollection named " + name);
224 continue;
225 }
226
227 // get the display elements if any - displayName
228 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
229 if (display_names != null)
230 {
231 for (int j = 0; j < display_names.getLength(); j++)
232 {
233 Element e = (Element) display_names.item(j);
234 indexSubcollection.appendChild(owner.importNode(e, true));
235 }
236 }
237 } // for each indexSubCollection
238 }
239 return true;
240 }
241
242 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
243 {
244 // the index info -
245 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
246 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
247 int len = indexes.getLength();
248 // now add even if there is only one
249 for (int i = 0; i < len; i++)
250 {
251 Element index = (Element) indexes.item(i);
252 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
253 if (shortname.equals(""))
254 {
255 continue;
256 }
257 if (index.getAttribute("hidden").equals("true")) {
258 // don't add in hidden ones
259 continue;
260 }
261 index_ids.add(shortname);
262 String display_name = getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
263 if (display_name.equals(""))
264 {
265 display_name = index.getAttribute(GSXML.NAME_ATT);
266 if (display_name.equals(""))
267 {
268 display_name = shortname;
269 }
270 String d_name = getTextString(display_name+".buttonname", lang, "metadata_names");
271 if (d_name == null) {
272 d_name = getTextString(cleanUpMetadata(display_name)+".buttonname", lang, "metadata_names");
273 }
274 if (d_name == null) {
275 d_name = getTextString(display_name, lang, "metadata_names");
276 }
277 if (d_name != null) {
278
279 display_name = d_name;
280 }
281 }
282 index_names.add(display_name);
283 }
284 }
285
286 protected String cleanUpMetadata(String meta) {
287 // remove namespace, and only take the first item if there is a list of them.
288 String[] parts = meta.split("[,;]");
289 String cleaned = parts[0];
290 if (cleaned.lastIndexOf('.') != -1) {
291 cleaned = cleaned.substring(cleaned.lastIndexOf('.')+1);
292 }
293 return cleaned;
294
295 }
296 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
297 {
298 // the index info -
299 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
300 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
301 int len = index_subs.getLength();
302 // now add even if there is only one
303 for (int i = 0; i < len; i++)
304 {
305 Element indexsub = (Element) index_subs.item(i);
306 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
307 if (shortname.equals(""))
308 {
309 continue;
310 }
311 index_sub_ids.add(shortname);
312 String display_name = getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
313 if (display_name.equals(""))
314 {
315 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
316 if (display_name.equals(""))
317 {
318 display_name = shortname;
319 }
320 }
321 index_sub_names.add(display_name);
322 }
323 }
324
325 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
326 {
327 // the index info -
328 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
329 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
330 int len = index_langs.getLength();
331 // now add even if there is only one
332 for (int i = 0; i < len; i++)
333 {
334 Element indexlang = (Element) index_langs.item(i);
335 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
336 if (shortname.equals(""))
337 {
338 continue;
339 }
340 index_lang_ids.add(shortname);
341 String display_name = getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en", "metadata_names");
342 if (display_name.equals(""))
343 {
344 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
345 if (display_name.equals(""))
346 {
347 display_name = shortname;
348 }
349 }
350 index_lang_names.add(display_name);
351 }
352
353 }
354
355 protected void addCustomQueryParams(Element param_list, String lang)
356 {
357 if (this.does_case)
358 {
359 createParameter(CASE_PARAM, param_list, lang);//, case_default);
360 }
361 if (this.does_stem)
362 {
363 createParameter(STEM_PARAM, param_list, lang);//, stem_default);
364 }
365 if (this.does_accent)
366 {
367 createParameter(ACCENT_PARAM, param_list, lang);//, accent_default);
368 }
369 createParameter(MATCH_PARAM, param_list, lang);
370 }
371
372 /** convert indexer internal id to Greenstone oid */
373 protected String internalNum2OID(long docnum)
374 {
375 return this.gs_doc_db.internalNum2OID(docnum);
376 }
377
378 protected String internalNum2OID(String docnum)
379 {
380 return this.gs_doc_db.internalNum2OID(docnum);
381 }
382
383}
Note: See TracBrowser for help on using the repository browser.