source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractSearch.java@ 14183

Last change on this file since 14183 was 14183, checked in by xiao, 17 years ago

instead of simply retrieving the first child of format element in the search element, modify to find the format element which has at least one gsf:template child element

  • Property svn:keywords set to Author Date Id Revision
File size: 16.2 KB
Line 
1/*
2 * AbstractSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.util.GSXML;
23import org.greenstone.gsdl3.util.GSPath;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java classes
31import java.util.ArrayList;
32import java.util.HashMap;
33
34import org.apache.log4j.*;
35
36/** Partially implements a generic search service
37 *
38 * @author <a href="mailto:[email protected]">Katherine Don</a>
39 */
40
41public abstract class AbstractSearch
42 extends ServiceRack
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractSearch.class.getName());
46
47
48 // the search service
49 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
50
51 // compulsory params
52 protected static final String INDEX_PARAM = "index";
53 protected static final String QUERY_PARAM = "query";
54
55 // optional standard params - some of these have to be implemented
56 protected static final String INDEX_SUBCOLLECTION_PARAM = "indexSubcollection";
57 protected static final String INDEX_LANGUAGE_PARAM = "indexLanguage";
58 protected static final String MAXDOCS_PARAM = "maxDocs";
59 protected static final String HITS_PER_PAGE_PARAM = "hitsPerPage";
60 protected static final String START_PAGE_PARAM = "startPage";
61
62 protected static final String INDEX_SUBCOLLECTION_ELEM = "indexSubcollection";
63 protected static final String INDEX_LANGUAGE_ELEM = "indexLanguage";
64
65
66 // some other common params that may be used
67 protected static final String CASE_PARAM = "case";
68 protected static final String STEM_PARAM = "stem";
69 protected static final String ACCENT_PARAM="accent";
70
71 protected static final String BOOLEAN_PARAM_ON = "1";
72 protected static final String BOOLEAN_PARAM_OFF = "0";
73 protected static final String MATCH_PARAM = "matchMode";
74 protected static final String MATCH_PARAM_ALL = "all";
75 protected static final String MATCH_PARAM_SOME = "some";
76
77 /** can more than one index be searched at the smae time? */
78 protected boolean does_multi_index_search = false;
79 /** does this service support paging of results? */
80 protected boolean does_paging = false;
81 /** does this service support asking for a subset of results? */
82 protected boolean does_chunking = false;
83 /** the default document type - use if all documents are the same type
84 */
85 protected String default_document_type = null;
86 /** the default index, or comma separated list if more than one is
87 * the default (with start and end commas, eg ,TI,SU,).
88 * Should be set by configure()
89 */
90 protected String default_index = "";
91
92 protected String default_index_subcollection = "";
93
94 protected String default_index_language = "";
95
96 public AbstractSearch()
97 {
98 }
99
100 /** sets up the short service info for TextQuery. If other services
101 * will be provided, should be added in the subclass configure
102 * also looks for search format info, and document format info
103 */
104 public boolean configure(Element info, Element extra_info)
105 {
106 if (!super.configure(info, extra_info)){
107 return false;
108 }
109
110 logger.info("Configuring AbstractSearch...");
111
112 this.config_info = info;
113
114 // set up short_service_info_ - for now just has id and type. the name (lang dependent) will be added in if the list is requested.
115 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
116 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
117 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
118 this.short_service_info.appendChild(tq_service);
119
120 // add some format info to service map if there is any - look in extra info
121 // first look in buildConfig
122 Element format = (Element)GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
123
124 if (format==null) {
125 String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM);
126
127 //note by xiao: instead of retrieving the first 'format' element inside the 'search'
128 // element, we are trying to find the real format element which has at least one
129 // 'gsf:template' child element. (extra_info is collectionConfig.xml)
130 //format = (Element) GSXML.getNodeByPath(extra_info, path);
131 Element search_elem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
132 NodeList format_elems = null;
133 if (search_elem != null) {
134 format_elems = search_elem.getElementsByTagName(GSXML.FORMAT_ELEM);
135 }
136 for(int i=0; i<format_elems.getLength(); i++) {
137 format = (Element)format_elems.item(i);
138 if (format.getElementsByTagName("gsf:template").getLength() != 0) {
139 break;
140 }
141 }
142 }//end of if(format==null)
143 //
144 if (format != null) {
145 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format, true));
146 }
147
148 // look for document display format - for documentType
149 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
150 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
151 if (display_format != null) {
152 // check for docType option.
153 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
154 if (doc_type_opt != null) {
155 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
156 if (!value.equals("")) {
157 this.default_document_type = value;
158 }
159 }
160 }
161
162 return true;
163 }
164
165 /** returns the description of the TextQuery service. If a subclass
166 * provides other services they need to provides their own descriptions */
167 protected Element getServiceDescription(String service, String lang, String subset)
168 {
169 if (!service.equals(TEXT_QUERY_SERVICE)) {
170 return null;
171 }
172
173 Element tq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
174 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
175 tq_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
176 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) {
177 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getServiceName(TEXT_QUERY_SERVICE, lang) ));
178 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getServiceSubmit(TEXT_QUERY_SERVICE, lang) ));
179 tq_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getServiceDescription(TEXT_QUERY_SERVICE, lang)));
180 }
181 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
182 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
183 addCustomQueryParams(param_list, lang);
184 addStandardQueryParams(param_list, lang);
185 tq_service.appendChild(param_list);
186 }
187 return tq_service;
188
189 }
190
191 // perhaps these should be changed to search down the class hierarchy for
192 // values - do can just put the info in the resource bundle to use it
193 /** returns the default name for the TextQuery service */
194 protected String getServiceName(String service_id, String lang) {
195 return getTextString(service_id+".name", lang);
196 }
197
198 /** returns the default description for the TextQuery service */
199 protected String getServiceDescription(String service_id, String lang) {
200 return getTextString(service_id+".description", lang);
201 }
202
203 /** returns the default submit button text for the TextQuery service */
204 protected String getServiceSubmit(String service_id, String lang) {
205 return getTextString(service_id+".submit", lang);
206
207 }
208 /** adds the standard query params into the service description */
209 protected void addStandardQueryParams(Element param_list, String lang)
210 {
211 if (!default_index.equals("")){
212 createParameter(INDEX_PARAM, param_list, lang);
213 }
214 if (!default_index_subcollection.equals("")){
215 createParameter(INDEX_SUBCOLLECTION_PARAM,param_list, lang);
216 }
217 if (!default_index_language.equals("")){
218 createParameter(INDEX_LANGUAGE_PARAM,param_list, lang);
219 }
220 if (does_chunking) {
221 createParameter(MAXDOCS_PARAM, param_list, lang);
222 }
223 if (does_paging) {
224 createParameter(HITS_PER_PAGE_PARAM, param_list, lang);
225 createParameter(START_PAGE_PARAM, param_list, lang);
226 }
227 createParameter(QUERY_PARAM, param_list, lang);
228 }
229
230 /** adds any service specific query params into the service
231 * default implementation: add nothing. subclasses may need to
232 * override this to add in their specific parameters
233 */
234 protected void addCustomQueryParams(Element param_list, String lang)
235 {
236 // default behaviour, do nothing
237 }
238
239 /** default implementations for the standard parameters plus some
240 * other common ones
241 * index, maxDocs, hitsPerPage, startPage, query, case, stem,
242 */
243 protected void createParameter(String name, Element param_list, String lang) {
244 Element param = null;
245 if (name.equals(QUERY_PARAM)) {
246 param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
247 param_list.appendChild(param);
248 } else if (name.equals(INDEX_PARAM)) {
249
250 // should we make these class fields?
251 ArrayList index_ids = new ArrayList();
252 ArrayList index_names = new ArrayList();
253 getIndexData(index_ids, index_names, lang);
254 String param_type = GSXML.PARAM_TYPE_ENUM_SINGLE;
255 if (does_multi_index_search) {
256 param_type = GSXML.PARAM_TYPE_ENUM_MULTI;
257 }
258 param = GSXML.createParameterDescription2(this.doc, INDEX_PARAM, getTextString("param."+INDEX_PARAM, lang), param_type, this.default_index, index_ids, index_names);
259 param_list.appendChild(param);
260 }
261 else if (name.equals(INDEX_SUBCOLLECTION_PARAM)){
262 Element index_sub_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_SUBCOLLECTION_ELEM+GSXML.LIST_MODIFIER);
263 if (index_sub_list == null) return;
264 ArrayList index_sub_ids = new ArrayList();
265 ArrayList index_sub_names = new ArrayList();
266 getIndexSubcollectionData(index_sub_ids, index_sub_names, lang);
267 String param_type = GSXML.PARAM_TYPE_ENUM_SINGLE;
268 if (does_multi_index_search) {
269 param_type = GSXML.PARAM_TYPE_ENUM_MULTI;
270 }
271 param = GSXML.createParameterDescription2(this.doc, INDEX_SUBCOLLECTION_PARAM, getTextString("param."+INDEX_SUBCOLLECTION_PARAM, lang), param_type, this.default_index_subcollection, index_sub_ids, index_sub_names);
272 param_list.appendChild(param);
273 }
274 else if(name.equals(INDEX_LANGUAGE_PARAM)){
275 Element index_lang_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_LANGUAGE_ELEM+GSXML.LIST_MODIFIER);
276 if (index_lang_list == null) return;
277 ArrayList index_lang_ids = new ArrayList();
278 ArrayList index_lang_names = new ArrayList();
279 getIndexLanguageData(index_lang_ids, index_lang_names, lang);
280 String param_type = GSXML.PARAM_TYPE_ENUM_SINGLE;
281 if (does_multi_index_search) {
282 param_type = GSXML.PARAM_TYPE_ENUM_MULTI;
283 }
284 param = GSXML.createParameterDescription2(this.doc, INDEX_LANGUAGE_PARAM, getTextString("param."+INDEX_LANGUAGE_PARAM, lang), param_type, this.default_index_language, index_lang_ids, index_lang_names);
285 param_list.appendChild(param);
286 }
287 else if (name.equals(MAXDOCS_PARAM) || name.equals(HITS_PER_PAGE_PARAM)) {
288 String default_val = "100";
289 if (name.equals(HITS_PER_PAGE_PARAM)) {
290 default_val = "10";
291 }
292 param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_INTEGER, default_val, null, null);
293 param_list.appendChild(param);
294 } else if (name.equals(CASE_PARAM) || name.equals(STEM_PARAM) || name.equals(ACCENT_PARAM)) {
295 String[] bool_ops = {"0", "1"};
296 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
297 param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts);
298 param_list.appendChild(param);
299 } else if (name.equals(MATCH_PARAM)) {
300 String[] vals = {MATCH_PARAM_SOME, MATCH_PARAM_ALL };
301 String[] val_texts = {getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_SOME, lang), getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_ALL, lang)};
302 param = GSXML.createParameterDescription(this.doc, MATCH_PARAM, getTextString("param."+MATCH_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, MATCH_PARAM_SOME, vals, val_texts);
303 param_list.appendChild(param);
304 } else if (name.equals(START_PAGE_PARAM)) {
305 // start page - set to 1 for the search page
306 param = GSXML.createParameterDescription(this.doc, START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, "1", null, null);
307 param_list.appendChild(param);
308 }
309
310
311 }
312 /** create an element to go into the search results list. A node element
313 * has the form
314 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy' rank='0.23'/>
315 */
316 protected Element createDocNode(String node_id, String rank) {
317 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
318 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
319 node.setAttribute(GSXML.NODE_RANK_ATT, rank);
320 String doc_type = null;
321 if (default_document_type != null) {
322 doc_type = default_document_type;
323 } else {
324 doc_type = getDocType(node_id);
325 }
326 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
327 String node_type = getNodeType(node_id, doc_type);
328 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
329 return node;
330 }
331
332 /** returns the node type of the specified node.
333 should be one of
334 GSXML.NODE_TYPE_LEAF,
335 GSXML.NODE_TYPE_INTERNAL,
336 GSXML.NODE_TYPE_ROOT
337 */
338 protected String getNodeType(String node_id, String doc_type) {
339 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
340 return GSXML.NODE_TYPE_LEAF;
341 }
342
343 if (!hasParent(node_id)) {
344 return GSXML.NODE_TYPE_ROOT;
345 }
346 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
347 return GSXML.NODE_TYPE_LEAF;
348 }
349 if (!hasChildren(node_id)) {
350 return GSXML.NODE_TYPE_LEAF;
351 }
352 return GSXML.NODE_TYPE_INTERNAL;
353
354 }
355
356
357 /** returns the document type of the doc that the specified node
358 belongs to. should be one of
359 GSXML.DOC_TYPE_SIMPLE,
360 GSXML.DOC_TYPE_PAGED,
361 GSXML.DOC_TYPE_HIERARCHY
362 default implementation returns GSXML.DOC_TYPE_SIMPLE, over ride
363 if documents can be hierarchical
364 */
365 protected String getDocType(String node_id) {
366 return GSXML.DOC_TYPE_SIMPLE;
367 }
368
369 /** returns true if the node has child nodes
370 * default implementation returns false, over ride if documents can be
371 * hierarchical
372 */
373 protected boolean hasChildren(String node_id) {
374 return false;
375 }
376 /** returns true if the node has a parent
377 * default implementation returns false, over ride if documents can be
378 * hierarchical*/
379 protected boolean hasParent(String node_id) {
380 return false;
381 }
382
383 /** do the actual query
384 * must be implemented by subclass */
385 abstract protected Element processTextQuery(Element request);
386
387 /** get the details about the indexes available
388 * must be implemented by subclass
389 * there must be at least one index */
390 abstract protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang);
391
392 /** get the details about the indexexSubcollections available
393 * might be implemented by subclass
394 */
395 protected void getIndexSubcollectionData(ArrayList index_ids, ArrayList index_names, String lang){}
396
397 /** get the details about the indexes available
398 * might be implemented by subclass
399 */
400 protected void getIndexLanguageData(ArrayList index_ids, ArrayList index_names, String lang){}
401
402
403}
404
Note: See TracBrowser for help on using the repository browser.