source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractGS2Search.java@ 13999

Last change on this file since 13999 was 13999, checked in by qq6, 17 years ago

add parameters:indexSubcollection, indexLanguage, defaultIndexSubcollection, defaultIndexLanguage, method: getIndexLanguageData(),getIndexSubcollectionData()

  • Property svn:keywords set to Author Date Id Revision
File size: 11.8 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41public abstract class AbstractGS2Search
42 extends AbstractSearch
43{
44
45 protected static final String EQUIV_TERM_ELEM = "equivTerm";
46
47 protected static final String STEM_ATT = "stem";
48 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
49 protected static final String FREQ_ATT = "freq";
50
51 // Elements used in the config file that are specific to this class
52 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
53 protected static final String INDEX_STEM_ELEM = "indexStem";
54 protected static final String INDEX_ELEM = "index";
55 protected static final String DEFAULT_INDEX_SUBCOLLECTION_ELEM = "defaultIndexSubcollection";
56 protected static final String DEFAULT_INDEX_LANGUAGE_ELEM = "defaultIndexLanguage";
57 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
58 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
59
60
61 // Some indexing options
62 protected static final String STEMINDEX_OPTION = "stemIndexes";
63 protected static final String MAXNUMERIC_OPTION = "maxnumeric";
64
65 /** the stem used for the index files */
66 protected String index_stem = null;
67
68 // stem indexes available
69 protected boolean does_case=true;
70 protected boolean does_stem=true;
71 protected boolean does_accent=false;
72
73 // maxnumeric -
74 protected int maxnumeric = 4;
75
76 protected GDBMWrapper gdbm_src = null;
77
78 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGSearch.class.getName());
79
80
81 /** constructor */
82 public AbstractGS2Search()
83 {
84 this.gdbm_src = new GDBMWrapper();
85 }
86 public void cleanUp() {
87 super.cleanUp();
88 this.gdbm_src.closeDatabase();
89 }
90
91 /** configure this service */
92 public boolean configure(Element info, Element extra_info)
93 {
94 if (!super.configure(info, extra_info)){
95 return false;
96 }
97
98 // do we support any of the extended features?
99 does_chunking = true;
100
101 // Get the default index out of <defaultIndex> (buildConfig.xml)
102 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
103 if (def != null) {
104 this.default_index = def.getAttribute(GSXML.SHORTNAME_ATT);
105 } // otherwise will be "", and the first one will be the default
106
107 //get the default indexSubcollection out of <defaultIndexSubcollection> (buildConfig.xml)
108 Element defSub = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_SUBCOLLECTION_ELEM);
109 if (defSub != null) {
110 this.default_index_subcollection = defSub.getAttribute(GSXML.SHORTNAME_ATT);
111 }
112
113 //get the default indexLanguage out of <defaultIndexLanguage> (buildConfig.xml)
114 Element defLang = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_LANGUAGE_ELEM);
115 if (defLang != null) {
116 this.default_index_language = defLang.getAttribute(GSXML.SHORTNAME_ATT);
117 } //concate defaultIndex + defaultIndexSubcollection + defaultIndexLanguage
118
119
120 // the index stem is either the collection name or is specified in the config file
121 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
122 if (index_stem_elem != null) {
123 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
124 }
125 if (this.index_stem == null || this.index_stem.equals("")) {
126 logger.warn("indexStem element not found, stem will default to collection name");
127 this.index_stem = this.cluster_name;
128 }
129
130 // get index options
131 Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER);
132 if (index_option_list != null) {
133 NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM);
134 for (int i=0; i<options.getLength(); i++) {
135 Element opt = (Element)options.item(i);
136 String name = opt.getAttribute(GSXML.NAME_ATT);
137 String value = opt.getAttribute(GSXML.VALUE_ATT);
138 if (name.equals(MAXNUMERIC_OPTION)) {
139 int maxnum = Integer.parseInt(value);
140 if (4 <= maxnum && maxnum < 512) {
141 maxnumeric = maxnum;
142 }
143 }
144 else if (name.equals(STEMINDEX_OPTION)) {
145 int stemindex = Integer.parseInt(value);
146 // stem and case are true by default, accent folding false by default
147 if ((stemindex & 1) == 0) {
148 does_case = false;
149 }
150 if ((stemindex & 2) == 0) {
151 does_stem = false;
152 }
153 if ((stemindex & 4) != 0) {
154 does_accent = true;
155 }
156 }
157 }
158 }
159
160 // get display info from extra info
161 if (extra_info !=null) {
162 Document owner = info.getOwnerDocument();
163 // so far we have index specific display elements, and global format elements
164 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
165 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
166
167 for (int i=0; i<indexes.getLength();i++) {
168 Element ind = (Element)indexes.item(i);
169 String name = ind.getAttribute(GSXML.NAME_ATT);
170 Element node_extra = GSXML.getNamedElement(config_search,
171 GSXML.INDEX_ELEM,
172 GSXML.NAME_ATT,
173 name);
174 if (node_extra == null) {
175 logger.error("haven't found extra info for index named "+name);
176 continue;
177 }
178
179 // get the display elements if any - displayName
180 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
181 if (display_names !=null) {
182 for (int j=0; j<display_names.getLength(); j++) {
183 Element e = (Element)display_names.item(j);
184 ind.appendChild(owner.importNode(e, true));
185 }
186 }
187 } // for each index
188 }
189 // Open GDBM database for querying
190 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem);
191 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
192 logger.error(" Could not open GDBM database!");
193 return false;
194 }
195 return true;
196 }
197
198 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
199 // the index info -
200 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
201 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
202 int len = indexes.getLength();
203 // now add even if there is only one
204 for (int i=0; i<len; i++) {
205 Element index = (Element)indexes.item(i);
206 String shortname = index.getAttribute(GSXML.SHORTNAME_ATT);
207 if (shortname.equals("")) {
208 continue;
209 }
210 index_ids.add(shortname);
211 String display_name = GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en");
212 if (display_name.equals("")) {
213 display_name = index.getAttribute(GSXML.NAME_ATT);
214 if (display_name.equals("")) {
215 display_name = shortname;
216 }
217 }
218 index_names.add(display_name);
219 }
220 }
221
222 protected void getIndexSubcollectionData(ArrayList index_sub_ids, ArrayList index_sub_names, String lang){
223 // the index info -
224 Element index_sub_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM+GSXML.LIST_MODIFIER);
225 NodeList index_subs = index_sub_list.getElementsByTagName(INDEX_SUBCOLLECTION_ELEM);
226 int len = index_subs.getLength();
227 // now add even if there is only one
228 for (int i=0; i<len; i++) {
229 Element indexsub = (Element)index_subs.item(i);
230 String shortname = indexsub.getAttribute(GSXML.SHORTNAME_ATT);
231 if (shortname.equals("")) {
232 continue;
233 }
234 index_sub_ids.add(shortname);
235 String display_name = GSXML.getDisplayText(indexsub, GSXML.DISPLAY_TEXT_NAME, lang, "en");
236 if (display_name.equals("")) {
237 display_name = indexsub.getAttribute(GSXML.NAME_ATT);
238 if (display_name.equals("")) {
239 display_name = shortname;
240 }
241 }
242 index_sub_names.add(display_name);
243 }
244 }
245
246 protected void getIndexLanguageData(ArrayList index_lang_ids, ArrayList index_lang_names, String lang){
247 // the index info -
248 Element index_lang_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM+GSXML.LIST_MODIFIER);
249 NodeList index_langs = index_lang_list.getElementsByTagName(INDEX_LANGUAGE_ELEM);
250 int len = index_langs.getLength();
251 // now add even if there is only one
252 for (int i=0; i<len; i++) {
253 Element indexlang = (Element)index_langs.item(i);
254 String shortname = indexlang.getAttribute(GSXML.SHORTNAME_ATT);
255 if (shortname.equals("")) {
256 continue;
257 }
258 index_lang_ids.add(shortname);
259 String display_name = GSXML.getDisplayText(indexlang, GSXML.DISPLAY_TEXT_NAME, lang, "en");
260 if (display_name.equals("")) {
261 display_name = indexlang.getAttribute(GSXML.NAME_ATT);
262 if (display_name.equals("")) {
263 display_name = shortname;
264 }
265 }
266 index_lang_names.add(display_name);
267 }
268
269
270 }
271
272
273 protected void addCustomQueryParams(Element param_list, String lang)
274 {
275 if (this.does_case){
276 createParameter(CASE_PARAM, param_list, lang);
277 }
278 if (this.does_stem){
279 createParameter(STEM_PARAM, param_list, lang);
280 }
281 if (this.does_accent){
282 createParameter(ACCENT_PARAM, param_list, lang);
283 }
284 createParameter(MATCH_PARAM, param_list, lang);
285 }
286
287
288
289 /** returns the document type of the doc that the specified node
290 belongs to. should be one of
291 GSXML.DOC_TYPE_SIMPLE,
292 GSXML.DOC_TYPE_PAGED,
293 GSXML.DOC_TYPE_HIERARCHY
294 */
295 protected String getDocType(String node_id){
296 DBInfo info = this.gdbm_src.getInfo(node_id);
297 if (info == null) {
298 return GSXML.DOC_TYPE_SIMPLE;
299 }
300 String doc_type = info.getInfo("doctype");
301 if (!doc_type.equals("")&&!doc_type.equals("doc")) {
302 return doc_type;
303 }
304
305 String top_id = OID.getTop(node_id);
306 boolean is_top = (top_id.equals(node_id) ? true : false);
307
308 String children = info.getInfo("contains");
309 boolean is_leaf = (children.equals("") ? true : false);
310
311 if (is_top && is_leaf) { // a single section document
312 return GSXML.DOC_TYPE_SIMPLE;
313 }
314
315 // now we just check the top node
316 if (!is_top) { // we need to look at the top info
317 info = this.gdbm_src.getInfo(top_id);
318 }
319 if (info == null) {
320 return GSXML.DOC_TYPE_HIERARCHY;
321 }
322
323 String childtype = info.getInfo("childtype");
324 if (childtype.equals("Paged")) {
325 return GSXML.DOC_TYPE_PAGED;
326 }
327 return GSXML.DOC_TYPE_HIERARCHY;
328
329 }
330
331 /** returns true if the node has child nodes */
332 protected boolean hasChildren(String node_id){
333 DBInfo info = this.gdbm_src.getInfo(node_id);
334 if (info == null) {
335 return false;
336 }
337 String contains = info.getInfo("contains");
338 if (contains.equals("")) {
339 return false;
340 }
341 return true;
342 }
343
344 /** returns true if the node has a parent */
345 protected boolean hasParent(String node_id){
346 String parent = OID.getParent(node_id);
347 if (parent.equals(node_id)) {
348 return false;
349 }
350 return true;
351 }
352
353 /** convert MG internal id to Greenstone oid */
354 protected String internalNum2OID(long docnum)
355 {
356 return this.gdbm_src.docnum2OID(docnum);
357
358 }
359 protected String internalNum2OID(String docnum)
360 {
361 return this.gdbm_src.docnum2OID(docnum);
362
363 }
364
365}
366
367
Note: See TracBrowser for help on using the repository browser.