source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractSearch.java@ 29558

Last change on this file since 29558 was 29558, checked in by kjdon, 9 years ago

work around does_paging, does_chunking. only add in maxdocs, hitsperpage params if the service actually uses them. lucnee/solr, don't use maxdocs any more. I haven't had a chance to clean up the changes, but I need to commit, so there may be extraneous debug statements still here.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.7 KB
Line 
1/*
2 * AbstractSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import java.util.ArrayList;
23import java.util.HashMap;
24
25import org.apache.log4j.Logger;
26import org.greenstone.gsdl3.util.AbstractBasicDocument;
27import org.greenstone.gsdl3.util.BasicDocument;
28import org.greenstone.gsdl3.util.GSPath;
29import org.greenstone.gsdl3.util.GSXML;
30import org.greenstone.gsdl3.util.XMLConverter;
31import org.w3c.dom.Document;
32import org.w3c.dom.Element;
33import org.w3c.dom.NodeList;
34
35/**
36 * Partially implements a generic search service
37 *
38 */
39
40public abstract class AbstractSearch extends ServiceRack
41{
42
43 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractSearch.class.getName());
44
45 // the search service
46 protected String QUERY_SERVICE = null; // set by subclass
47
48 // compulsory params
49 protected static final String INDEX_PARAM = "index";
50 protected static final String QUERY_PARAM = "query";
51 protected static final String RAW_PARAM = "rawquery";
52
53 // optional standard params - some of these have to be implemented
54 protected static final String MAXDOCS_PARAM = "maxDocs";
55 protected static final String HITS_PER_PAGE_PARAM = "hitsPerPage";
56 protected static final String START_PAGE_PARAM = "startPage";
57
58 protected AbstractBasicDocument gs_doc = null;
59
60 /** can more than one index be searched at the same time? */
61 protected boolean does_multi_index_search = false;
62 /** does this service support paging of results? */
63 protected boolean does_paging = false;
64 /** does this service support asking for a subset of results? */
65 protected boolean does_chunking = false;
66 /** does this service support faceting search results */
67 protected boolean does_faceting = false;
68 /**
69 * the default document type - use if all documents are the same type
70 */
71 protected String default_document_type = null;
72 /**
73 * the default index, or comma separated list if more than one is the
74 * default (with start and end commas, eg ,TI,SU,). Should be set by
75 * configure()
76 */
77 protected String default_index = "";
78
79 protected Element service_metadata_list = null;
80 protected HashMap<String, String> paramDefaults = null;
81
82 public AbstractSearch()
83 {
84 paramDefaults = new HashMap<String, String>();
85 }
86
87 /**
88 * Sets up the short service info for service by QUERY_SERVICE (e.g.
89 * TextQuery or AudioQuery) If other services will be provided, should be
90 * added in the subclass configure also looks for search format info, and
91 * document format info
92 */
93 public boolean configure(Element info, Element extra_info)
94 {
95 if (!super.configure(info, extra_info))
96 {
97 return false;
98 }
99
100 logger.info("Configuring AbstractSearch...");
101
102 this.config_info = info;
103
104 // set up short_service_info_
105 // => for now just has id and type. the name (lang dependent)
106 // will be added in if the list is requested.
107
108 Element tq_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
109 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
110 tq_service.setAttribute(GSXML.NAME_ATT, QUERY_SERVICE);
111 this.short_service_info.appendChild(tq_service);
112
113 // load up paging defaults
114 if (does_chunking) {
115 paramDefaults.put(MAXDOCS_PARAM, "100");
116 }
117 if (does_paging) {
118 paramDefaults.put(HITS_PER_PAGE_PARAM, "20");
119 paramDefaults.put(START_PAGE_PARAM, "1");
120 }
121
122 // load up any search param defaults
123 Element search_elem = (Element) GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
124 if (search_elem != null) {
125 getSearchParamDefaults(search_elem);
126 }
127 // add some format info to service map if there is any
128 // => lookin extra info first look in buildConfig
129
130 Element format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
131
132 if (format == null)
133 {
134 // try to find a format element inside <search> that contains a gsf:template. Note what if we have only xsl:templates??
135
136 NodeList format_elems = null;
137 if (search_elem != null)
138 {
139 format_elems = search_elem.getElementsByTagName(GSXML.FORMAT_ELEM);
140 }
141 for (int i = 0; i < format_elems.getLength(); i++)
142 {
143 format = (Element) format_elems.item(i);
144 if (format.getElementsByTagName("gsf:template").getLength() != 0)
145 {
146 break;
147 }
148 }
149 }//end of if(format==null)
150 //
151 if (format != null)
152 {
153 this.format_info_map.put(QUERY_SERVICE, this.desc_doc.importNode(format, true));
154 }
155
156 // look for document display format - for documentType
157 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
158 Element display_format = (Element) GSXML.getNodeByPath(extra_info, path);
159 if (display_format != null)
160 {
161 // check for docType option.
162 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
163 if (doc_type_opt != null)
164 {
165 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
166 if (!value.equals(""))
167 {
168 this.default_document_type = value;
169 }
170 }
171 }
172
173 // Base line for document (might be overriden by sub-classes)
174 gs_doc = new BasicDocument(this.default_document_type);
175
176 return true;
177 }
178
179 protected void getSearchParamDefaults(Element search_elem) {
180
181 NodeList param_defaults_list = GSXML.getChildrenByTagName(search_elem, GSXML.PARAM_DEFAULT_ELEM);
182 for (int i=0; i<param_defaults_list.getLength(); i++) {
183 Element paramdef = (Element)param_defaults_list.item(i);
184 String name = paramdef.getAttribute(GSXML.NAME_ATT);
185 String val = paramdef.getAttribute(GSXML.VALUE_ATT);
186 if (!name.equals("") && !val.equals("")) {
187 paramDefaults.put(name, val);
188 }
189 }
190 }
191 /**
192 * returns a basic description for QUERY_SERVICE. If a subclass provides
193 * other services they need to provide their own descriptions
194 */
195 protected Element getServiceDescription(Document doc, String service, String lang, String subset)
196 {
197 if (!service.equals(QUERY_SERVICE))
198 {
199 return null;
200 }
201
202 Element tq_service = doc.createElement(GSXML.SERVICE_ELEM);
203 tq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
204 tq_service.setAttribute(GSXML.NAME_ATT, QUERY_SERVICE);
205 if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER))
206 {
207 tq_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getServiceName(QUERY_SERVICE, lang)));
208 tq_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_SUBMIT, getServiceSubmit(QUERY_SERVICE, lang)));
209 tq_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getServiceDescription(QUERY_SERVICE, lang)));
210 }
211 if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER))
212 {
213 Element param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
214 addCustomQueryParams(param_list, lang);
215 addStandardQueryParams(param_list, lang);
216 tq_service.appendChild(param_list);
217 }
218 if (subset == null || subset.equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER)) {
219
220 if (service_metadata_list == null) {
221 Document ml_doc = XMLConverter.newDOM();
222 service_metadata_list = ml_doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
223 if (does_paging) {
224 service_metadata_list.appendChild(GSXML.createMetadataElement(ml_doc, "does_paging", "true"));
225 }
226 }
227 tq_service.appendChild(doc.importNode(service_metadata_list, true));
228 }
229 return tq_service;
230
231 }
232
233 // perhaps these should be changed to search down the class hierarchy for
234 // values - do can just put the info in the resource bundle to use it
235 /** returns the default name for the TextQuery service */
236 protected String getServiceName(String service_id, String lang)
237 {
238 return getTextString(service_id + ".name", lang);
239 }
240
241 /** returns the default description for the TextQuery service */
242 protected String getServiceDescription(String service_id, String lang)
243 {
244 return getTextString(service_id + ".description", lang);
245 }
246
247 /** returns the default submit button text for the TextQuery service */
248 protected String getServiceSubmit(String service_id, String lang)
249 {
250 return getTextString(service_id + ".submit", lang);
251
252 }
253
254 /** adds the standard query params into the service description */
255 protected void addStandardQueryParams(Element param_list, String lang)
256 {
257 // this test is not so good. here we are using absence of default index
258 // to determine whether we have indexes or not. But in other places,
259 // absence of default index just means to use the first one as default.
260 if (!default_index.equals(""))
261 {
262 createParameter(INDEX_PARAM, param_list, lang);
263 }
264 if (does_chunking)
265 {
266 createParameter(MAXDOCS_PARAM, param_list, lang);
267 }
268 if (does_paging)
269 {
270 createParameter(HITS_PER_PAGE_PARAM, param_list, lang);
271 createParameter(START_PAGE_PARAM, param_list, lang);
272 }
273 createParameter(QUERY_PARAM, param_list, lang);
274 }
275
276 /**
277 * adds any service specific query params into the service default
278 * implementation: add nothing. subclasses may need to override this to add
279 * in their specific parameters
280 */
281 protected void addCustomQueryParams(Element param_list, String lang)
282 {
283 // default behaviour, do nothing
284 }
285
286 protected void createParameter(String name, Element param_list, String lang)
287 {
288 createParameter(name, param_list, lang, null);
289 }
290
291 protected void createParameter(String name, Element param_list, String lang, String default_value)
292 {
293 // at this level, not interested in boolean return type
294 createParameterChain(name, param_list, lang, default_value);
295 }
296
297 /**
298 * default implementations for the standard parameters plus some other
299 * common ones index, maxDocs, hitsPerPage, startPage
300 */
301
302 protected boolean createParameterChain(String name, Element param_list, String lang, String default_value)
303 {
304 Document doc = param_list.getOwnerDocument();
305 Element param = null;
306 String param_default = default_value;
307 if (default_value == null) {
308 // have we got a stored up default? will be null if not there
309 param_default = paramDefaults.get(name);
310 }
311 if (name.equals(QUERY_PARAM) || name.equals(RAW_PARAM))
312 {
313 param = GSXML.createParameterDescription(doc, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_STRING, param_default, null, null);
314 param_list.appendChild(param);
315 return true;
316 }
317 else if (name.equals(INDEX_PARAM))
318 {
319 // should we make these class fields?
320 ArrayList<String> index_ids = new ArrayList<String>();
321 ArrayList<String> index_names = new ArrayList<String>();
322 getIndexData(index_ids, index_names, lang);
323 String param_type = GSXML.PARAM_TYPE_ENUM_SINGLE;
324 if (does_multi_index_search)
325 {
326 param_type = GSXML.PARAM_TYPE_ENUM_MULTI;
327 }
328 if (param_default == null)
329 {
330 param_default = this.default_index;
331 }
332 param = GSXML.createParameterDescription2(doc, INDEX_PARAM, getTextString("param." + INDEX_PARAM, lang), param_type, param_default, index_ids, index_names);
333 param_list.appendChild(param);
334 return true;
335 }
336 else if (name.equals(MAXDOCS_PARAM))
337 {
338 param = GSXML.createParameterDescription(doc, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_INTEGER, param_default, null, null);
339 param_list.appendChild(param);
340 return true;
341 }
342 else if (name.equals(HITS_PER_PAGE_PARAM))
343 {
344 param = GSXML.createParameterDescription(doc, name, getTextString("param." + name, lang), GSXML.PARAM_TYPE_INTEGER, param_default, null, null);
345 param_list.appendChild(param);
346 return true;
347 }
348 else if (name.equals(START_PAGE_PARAM))
349 {
350 // start page - set to 1 for the search page
351 param = GSXML.createParameterDescription(doc, START_PAGE_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, param_default, null, null);
352 param_list.appendChild(param);
353 return true;
354 }
355
356 // Get to there then none of the above params matched
357 // => return false so the chain can continue
358 return false;
359 }
360
361 /**
362 * create an element to go into the search results list. A node element has
363 * the form <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'
364 * rank='0.23'/>
365 */
366 protected Element createDocNode(Document doc, String node_id, String rank)
367 {
368 return this.gs_doc.createDocNode(doc, node_id, rank);
369 }
370
371 /**
372 * returns the document type of the doc that the specified node belongs to.
373 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
374 * GSXML.DOC_TYPE_HIERARCHY
375 */
376 protected String getDocType(String node_id)
377 {
378 return this.gs_doc.getDocType(node_id);
379 }
380
381 /**
382 * returns the node type of the specified node. should be one of
383 * GSXML.NODE_TYPE_LEAF, GSXML.NODE_TYPE_INTERNAL, GSXML.NODE_TYPE_ROOT
384 */
385 protected String getNodeType(String node_id, String doc_type)
386 {
387 return this.gs_doc.getNodeType(node_id, doc_type);
388 }
389
390 /** returns true if the node has child nodes */
391 protected boolean hasChildren(String node_id)
392 {
393 return this.gs_doc.hasChildren(node_id);
394 }
395
396 /** returns true if the node has a parent */
397 protected boolean hasParent(String node_id)
398 {
399 return this.gs_doc.hasParent(node_id);
400 }
401
402 /**
403 * get the details about the indexes available must be implemented by
404 * subclass there must be at least one index
405 */
406 abstract protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang);
407
408}
Note: See TracBrowser for help on using the repository browser.