/* * AbstractGS2FieldSearch.java * Copyright (C) 2006 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.mgpp.*; import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // java classes import java.util.Iterator; import java.util.Set; import java.util.HashMap; import java.util.Map; import java.util.ArrayList; import java.util.Vector; import java.io.File; import org.apache.log4j.*; abstract public class AbstractGS2FieldSearch extends AbstractGS2Search { // extra services offered by mgpp collections protected static final String FIELD_QUERY_SERVICE = "FieldQuery"; protected static final String ADVANCED_FIELD_QUERY_SERVICE = "AdvancedFieldQuery"; // extra parameters used protected static final String LEVEL_PARAM = "level"; protected static final String RANK_PARAM = "sortBy"; protected static final String RANK_PARAM_RANK = "1"; protected static final String RANK_PARAM_NONE = "0"; protected static final String SIMPLE_FIELD_PARAM = "simpleField"; protected static final String ADVANCED_FIELD_PARAM = "complexField"; // more params for field query protected static final String FIELD_QUERY_PARAM = "fqv"; protected static final String FIELD_STEM_PARAM = "fqs"; protected static final String FIELD_CASE_PARAM = "fqc"; protected static final String FIELD_ACCENT_PARAM="fqa"; protected static final String FIELD_FIELD_PARAM = "fqf"; protected static final String FIELD_COMBINE_PARAM = "fqk"; protected static final String FIELD_COMBINE_PARAM_AND = "0"; protected static final String FIELD_COMBINE_PARAM_OR = "1"; protected static final String FIELD_COMBINE_PARAM_NOT = "2"; // some stuff for config files protected static final String SEARCH_TYPE_ELEM = "searchType"; protected static final String SEARCH_TYPE_PLAIN = "plain"; protected static final String SEARCH_TYPE_FORM = "form"; protected static final String SEARCH_TYPE_FORM_SIMPLE = "simple"; protected static final String SEARCH_TYPE_FORM_ADVANCED = "advanced"; protected static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; protected static final String LEVEL_ELEM = "level"; protected static final String FIELD_ATT = "field"; protected static final int TEXT_QUERY = 0; protected static final int SIMPLE_QUERY = 1; protected static final int ADVANCED_QUERY = 2; protected String AND_OPERATOR = "&"; protected String OR_OPERATOR = "|"; protected String NOT_OPERATOR = "!"; // the default level for retrieval - and we'll use it for searching too protected String default_level=null; // the default field for searching protected String default_field = null; // which search services will we offer?? protected boolean plain_search = false; protected boolean simple_form_search = false; protected boolean advanced_form_search = false; static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractGS2FieldSearch.class.getName()); /** constructor */ public AbstractGS2FieldSearch() { } public void cleanUp() { super.cleanUp(); } /** configure this service */ public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ return false; } // the generic config has set up the text query service, but we may not want it Element search_type_list = (Element) GSXML.getChildByTagName(info, SEARCH_TYPE_ELEM + GSXML.LIST_MODIFIER); if (search_type_list == null) { // assume form and plain this.plain_search = true; this.simple_form_search = true; this.advanced_form_search = true; } else { NodeList types = search_type_list.getElementsByTagName(SEARCH_TYPE_ELEM); for (int i=0; i (buildConfig.xml) Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); if (def != null) { this.default_level = def.getAttribute(GSXML.NAME_ATT); } if (this.default_level == null || this.default_level.equals("")) { logger.error("default level not specified!"); return false; } // set up the extra services which are available for this collection // check the config info - if there is no field list, then there is no fielded searching Element field_list = (Element) GSXML.getChildByTagName(info, GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER); if (field_list==null) { // nothing more to do return true; } // the format info is the same for all services Element format_info = (Element)format_info_map.get(TEXT_QUERY_SERVICE); // find the default field - use the first one Element first_field = (Element)GSXML.getChildByTagName(field_list, GSXML.FIELD_ELEM); default_field = first_field.getAttribute(GSXML.SHORTNAME_ATT); // else set up the fielded query services if (this.simple_form_search) { // set up short_service_info_ - for now just has id and type - name will be added in on the fly Element fq_service = this.doc.createElement(GSXML.SERVICE_ELEM); fq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); fq_service.setAttribute(GSXML.NAME_ATT, FIELD_QUERY_SERVICE); this.short_service_info.appendChild(fq_service); if (format_info != null) { this.format_info_map.put(FIELD_QUERY_SERVICE, format_info); } } if (this.advanced_form_search) { Element afq_service = this.doc.createElement(GSXML.SERVICE_ELEM); afq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); afq_service.setAttribute(GSXML.NAME_ATT, ADVANCED_FIELD_QUERY_SERVICE); this.short_service_info.appendChild(afq_service); if (format_info != null) { this.format_info_map.put(ADVANCED_FIELD_QUERY_SERVICE, format_info); } } return true; } protected Element getServiceDescription(String service_id, String lang, String subset) { // should we check that the service is actually on offer? presumably we wont get asked for services that we haven't advertised previously. if (!service_id.equals(FIELD_QUERY_SERVICE) && !service_id.equals(ADVANCED_FIELD_QUERY_SERVICE)) { return super.getServiceDescription(service_id, lang, subset); } Element service = this.doc.createElement(GSXML.SERVICE_ELEM); service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); service.setAttribute(GSXML.NAME_ATT, service_id); if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) { service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service_id+".name", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service_id+".submit", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service_id+".description", lang))); } if (subset == null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) { Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); service.appendChild(param_list); if (service_id.equals(FIELD_QUERY_SERVICE)) { addCustomQueryParams(param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, SIMPLE_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); // the components createParameter(FIELD_QUERY_PARAM, multiparam, lang); createParameter(FIELD_FIELD_PARAM, multiparam, lang); } else { createParameter(LEVEL_PARAM, param_list, lang); createParameter(RANK_PARAM, param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, stem, case, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, ADVANCED_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); createParameter(FIELD_COMBINE_PARAM, multiparam, lang); createParameter(FIELD_QUERY_PARAM, multiparam, lang); if (this.does_case) { createParameter(FIELD_CASE_PARAM, multiparam, lang); } if (this.does_stem) { createParameter(FIELD_STEM_PARAM, multiparam, lang); } if (this.does_accent) { createParameter(FIELD_ACCENT_PARAM, multiparam, lang); } createParameter(FIELD_FIELD_PARAM, multiparam, lang); } } return service; } /** add in the level params to TextQuery */ protected void addCustomQueryParams(Element param_list, String lang) { createParameter(LEVEL_PARAM, param_list, lang); super.addCustomQueryParams(param_list, lang); } /** create a param and add to the list */ protected void createParameter(String name, Element param_list, String lang) { Element param = null; if (name.equals(LEVEL_PARAM)) { ArrayList level_ids = new ArrayList(); ArrayList level_names = new ArrayList(); getLevelData(level_ids, level_names, lang); if (level_ids.size()>1) { // the first one is the default param = GSXML.createParameterDescription2(this.doc, LEVEL_PARAM, getTextString("param."+LEVEL_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)level_ids.get(0), level_ids, level_names); } else { // we need to set the level, but hidden, in case there is an invalid level saved param = GSXML.createParameterDescription(this.doc, LEVEL_PARAM, "", GSXML.PARAM_TYPE_INVISIBLE, (String)level_ids.get(0), null, null); } } else if (name.equals(RANK_PARAM)) { String [] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE }; String [] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang), getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang)}; param = GSXML.createParameterDescription(this.doc, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1, vals1_texts ); } else if (name.equals(FIELD_QUERY_PARAM)) { param = GSXML.createParameterDescription(this.doc, FIELD_QUERY_PARAM, getTextString("param."+FIELD_QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); } else if (name.equals(FIELD_CASE_PARAM) || name.equals(FIELD_STEM_PARAM) || name.equals(FIELD_ACCENT_PARAM)) { String[] bool_ops = {"0", "1"}; String[] bool_texts = {getTextString("param.boolean.off", lang, "AbstractSearch"),getTextString("param.boolean.on", lang, "AbstractSearch")}; param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts); } else if (name.equals(FIELD_FIELD_PARAM)) { ArrayList fields = new ArrayList(); ArrayList field_names = new ArrayList(); getIndexData(fields, field_names, lang); // the field list - read from config file param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names ); } else if (name.equals(FIELD_COMBINE_PARAM)) { String []vals = {FIELD_COMBINE_PARAM_AND, FIELD_COMBINE_PARAM_OR, FIELD_COMBINE_PARAM_NOT}; String []val_texts = {getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_AND, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_OR, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_NOT, lang)}; param = GSXML.createParameterDescription(this.doc, FIELD_COMBINE_PARAM, "", GSXML.PARAM_TYPE_ENUM_SINGLE, FIELD_COMBINE_PARAM_AND, vals, val_texts); param.setAttribute(GSXML.PARAM_IGNORE_POS_ATT, "0"); } if (param != null) { param_list.appendChild(param); } else { super.createParameter(name, param_list, lang); } } // should cache some of this protected void getLevelData(ArrayList level_ids, ArrayList level_names, String lang) { Element level_list = (Element)GSXML.getChildByTagName(this.config_info, LEVEL_ELEM+GSXML.LIST_MODIFIER); NodeList levels = level_list.getElementsByTagName(LEVEL_ELEM); for (int i=0; i0) { Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); result.appendChild(document_list); for (int d = 0; d < docs.length; d++) { String doc_id = internalNum2OID(docs[d]); Element doc_node = createDocNode(doc_id, doc_ranks[d]); document_list.appendChild(doc_node); } } // Create a term list to store the term information, and add it Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER); result.appendChild(term_list); addTermInfo(term_list, params, query_result); return result; } /** methods to handle actually doing the query */ /** do any initialisation of the query object */ abstract protected boolean setUpQueryer(HashMap params); /** do the query */ abstract protected Object runQuery(String query); /** get the total number of docs that match */ abstract protected long numDocsMatched(Object query_result); /** get the list of doc ids */ abstract protected String [] getDocIDs(Object query_result); /** get the list of doc ranks */ abstract protected String [] getDocRanks(Object query_result); /** add in term info if available */ abstract protected boolean addTermInfo(Element term_list, HashMap params, Object query_result); /** combines all the field params into a single query * - for simple field query */ /** We assume the combination (AND/OR) is done by the match param */ protected String parseFieldQueryParams(HashMap params) { StringBuffer final_query = new StringBuffer(256); String text_line = (String)params.get(FIELD_QUERY_PARAM); String[] texts = text_line.split(",", -1); String field_line = (String)params.get(FIELD_FIELD_PARAM); String[] fields = field_line.split(",", -1); for (int i=0; i