source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractGS2TextSearch.java@ 26198

Last change on this file since 26198 was 26130, checked in by ak19, 12 years ago

custom indexSubcollection displayItems now show up in Form Search. This was also necessary in the enhanced Tudor tutorial, where search partitions are created and users are asked to provide custom displayNames for these.

File size: 11.6 KB
Line 
1/*
2 * AbstractGS2TextSearch.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20import java.util.ArrayList;
21
22import org.apache.log4j.Logger;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.BasicDocumentDatabase;
25import org.w3c.dom.Document;
26import org.w3c.dom.Element;
27import org.w3c.dom.NodeList;
28
29public abstract class AbstractGS2TextSearch extends AbstractTextSearch
30{
31 protected static final String EQUIV_TERM_ELEM = "equivTerm";
32
33 protected static final String STEM_ATT = "stem";
34 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
35 protected static final String FREQ_ATT = "freq";
36
37 // Elements used in the config file that are specific to this class
38 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
39 protected static final String INDEX_STEM_ELEM = "indexStem";
40 protected static final String INDEX_ELEM = "index";
41 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
42 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
43 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
44 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
45
46 // Some indexing options
47 protected static final String STEMINDEX_OPTION = "stemIndexes";
48 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
49
50 /** the stem used for the index files */
51 protected String index_stem = null;
52
53 // stem indexes available
54 protected boolean does_case = true;
55 protected boolean does_stem = true;
56 protected boolean does_accent = false;
57
58 // maxnumeric -
59 protected int maxnumeric = 4;
60
61 BasicDocumentDatabase gs_doc_db = null;
62
63 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2TextSearch.class.getName());
64
65 /** constructor */
66 public AbstractGS2TextSearch()
67 {
68
69 }
70
71 public void cleanUp()
72 {
73 super.cleanUp();
74 this.gs_doc_db.cleanUp();
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // find out what kind of database we have
86 Element database_type_elem = (Element) GSXML.getChildByTagName(info, GSXML.DATABASE_TYPE_ELEM);
87 String database_type = null;
88 if (database_type_elem != null)
89 {
90 database_type = database_type_elem.getAttribute(GSXML.NAME_ATT);
91 }
92 if (database_type == null || database_type.equals(""))
93 {
94 database_type = "gdbm"; // the default
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null)
100 {
101 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
102 }
103 if (this.index_stem == null || this.index_stem.equals(""))
104 {
105 logger.warn("indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // replaces default AbstractSearch version with one tied to database
110 gs_doc_db = new BasicDocumentDatabase(this.doc, database_type, this.site_home, this.cluster_name, this.index_stem);
111 if (!gs_doc_db.isValid())
112 {
113 logger.error("Failed to open Document Database.");
114 return false;
115 }
116 this.gs_doc = gs_doc_db;
117
118 // do we support any of the extended features?
119 does_chunking = true;
120
121 // Get the default index out of <defaultIndex> (buildConfig.xml)
122 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
123 if (def != null)
124 {
125 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
126 } // otherwise will be "", and the first one will be the default
127
128 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
129 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
130 if (defSub != null)
131 {
132 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
133 }
134
135 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
136 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
137 if (defLang != null)
138 {
139 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
140 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
141
142 // get index options
143 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
144 if (index_option_list != null)
145 {
146 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
147 for (int i = 0; i < options.getLength(); i++)
148 {
149 Element opt = (Element) options.item(i);
150 String name = opt.getAttribute(GSXML.NAME_ATT);
151 String value = opt.getAttribute(GSXML.VALUE_ATT);
152 if (name.equals(MAXNUMERIC_OPTION))
153 {
154 int maxnum = Integer.parseInt(value);
155 if (4 <= maxnum && maxnum < 512)
156 {
157 maxnumeric = maxnum;
158 }
159 }
160 else if (name.equals(STEMINDEX_OPTION))
161 {
162 int stemindex = Integer.parseInt(value);
163 // stem and case are true by default, accent folding false by default
164 if ((stemindex & 1) == 0)
165 {
166 does_case = false;
167 }
168 if ((stemindex & 2) == 0)
169 {
170 does_stem = false;
171 }
172 if ((stemindex & 4) != 0)
173 {
174 does_accent = true;
175 }
176 }
177 }
178 }
179
180 // get display info from extra info
181 if (extra_info != null)
182 {
183 Document owner = info.getOwnerDocument();
184 Element config_search = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
185
186 // so far we have index and indexSubcollection specific display elements, and global format elements
187
188 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
189 for (int i = 0; i < indexes.getLength(); i++)
190 {
191 Element ind = (Element) indexes.item(i);
192 String name = ind.getAttribute(GSXML.NAME_ATT);
193 Element node_extra = GSXML.getNamedElement(config_search, GSXML.INDEX_ELEM, GSXML.NAME_ATT, name);
194 if (node_extra == null)
195 {
196 logger.error("haven't found extra info for index named " + name);
197 continue;
198 }
199
200 // get the display elements if any - displayName
201 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
202 if (display_names != null)
203 {
204 for (int j = 0; j < display_names.getLength(); j++)
205 {
206 Element e = (Element) display_names.item(j);
207 ind.appendChild(owner.importNode(e, true));
208 }
209 }
210 } // for each index
211
212 NodeList indexSubcollections = info.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM); // buildConfig.xml
213
214 for (int i = 0; i < indexSubcollections.getLength(); i++)
215 {
216 Element indexSubcollection = (Element) indexSubcollections.item(i);
217 String name = indexSubcollection.getAttribute(GSXML.NAME_ATT);
218 Element node_extra = GSXML.getNamedElement(config_search, INDEX_SUBCOLLECTION_ELEM, GSXML.NAME_ATT, name); // collectionConfig.xml
219 if (node_extra == null)
220 {
221 logger.error("haven't found extra info for indexSubCollection named " + name);
222 continue;
223 }
224
225 // get the display elements if any - displayName
226 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
227 if (display_names != null)
228 {
229 for (int j = 0; j < display_names.getLength(); j++)
230 {
231 Element e = (Element) display_names.item(j);
232 indexSubcollection.appendChild(owner.importNode(e, true));
233 }
234 }
235 } // for each indexSubCollection
236 }
237 return true;
238 }
239
240 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
241 {
242 // the index info -
243 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
244 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
245 int len = indexes.getLength();
246 // now add even if there is only one
247 for (int i = 0; i < len; i++)
248 {
249 Element index = (Element) indexes.item(i);
250 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
251 if (shortname.equals(""))
252 {
253 continue;
254 }
255 index_ids.add(shortname);
256 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
257 if (display_name.equals(""))
258 {
259 display_name = index.getAttribute(GSXML.NAME_ATT);
260 if (display_name.equals(""))
261 {
262 display_name = shortname;
263 }
264 }
265 index_names.add(display_name);
266 }
267 }
268
269 protected void getIndexSubcollectionData(ArrayList<String> index_sub_ids, ArrayList<String> index_sub_names, String lang)
270 {
271 // the index info -
272 Element index_sub_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM + GSXML.LIST_MODIFIER);
273 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
274 int len = index_subs.getLength();
275 // now add even if there is only one
276 for (int i = 0; i < len; i++)
277 {
278 Element indexsub = (Element) index_subs.item(i);
279 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
280 if (shortname.equals(""))
281 {
282 continue;
283 }
284 index_sub_ids.add(shortname);
285 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
286 if (display_name.equals(""))
287 {
288 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
289 if (display_name.equals(""))
290 {
291 display_name = shortname;
292 }
293 }
294 index_sub_names.add(display_name);
295 }
296 }
297
298 protected void getIndexLanguageData(ArrayList<String> index_lang_ids, ArrayList<String> index_lang_names, String lang)
299 {
300 // the index info -
301 Element index_lang_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM + GSXML.LIST_MODIFIER);
302 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
303 int len = index_langs.getLength();
304 // now add even if there is only one
305 for (int i = 0; i < len; i++)
306 {
307 Element indexlang = (Element) index_langs.item(i);
308 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
309 if (shortname.equals(""))
310 {
311 continue;
312 }
313 index_lang_ids.add(shortname);
314 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
315 if (display_name.equals(""))
316 {
317 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
318 if (display_name.equals(""))
319 {
320 display_name = shortname;
321 }
322 }
323 index_lang_names.add(display_name);
324 }
325
326 }
327
328 protected void addCustomQueryParams(Element param_list, String lang)
329 {
330 if (this.does_case)
331 {
332 // gs2 has case on by default
333 createParameter(CASE_PARAM, param_list, lang, BOOLEAN_PARAM_ON);
334 }
335 if (this.does_stem)
336 {
337 // but stem is off by default
338 createParameter(STEM_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
339 }
340 if (this.does_accent)
341 {
342 // and so is accent folding
343 createParameter(ACCENT_PARAM, param_list, lang, BOOLEAN_PARAM_OFF);
344 }
345 createParameter(MATCH_PARAM, param_list, lang);
346 }
347
348 /** convert indexer internal id to Greenstone oid */
349 protected String internalNum2OID(long docnum)
350 {
351 return this.gs_doc_db.internalNum2OID(docnum);
352 }
353
354 protected String internalNum2OID(String docnum)
355 {
356 return this.gs_doc_db.internalNum2OID(docnum);
357 }
358
359}
Note: See TracBrowser for help on using the repository browser.