/* * AbstractMGPPSearch.java * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // greenstone classes import org.greenstone.mgpp.*; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.GSFile; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // java classes import java.util.Iterator; import java.util.Set; import java.util.HashMap; import java.util.Map; import java.util.ArrayList; import java.util.Vector; import java.io.File; /** Partially implements a generic MGPP search service * * @author Katherine Don */ abstract public class AbstractMGPPSearch extends AbstractSearch { // extra services offered by mgpp collections private static final String FIELD_QUERY_SERVICE = "FieldQuery"; private static final String ADVANCED_FIELD_QUERY_SERVICE = "AdvancedFieldQuery"; // extra parameters used // private static final String INDEX_FIELD_PARAM = "index"; private static final String LEVEL_PARAM = "level"; private static final String RANK_PARAM = "sortBy"; private static final String RANK_PARAM_RANK = "1"; private static final String RANK_PARAM_NONE = "0"; private static final String SIMPLE_FIELD_PARAM = "simpleField"; private static final String ADVANCED_FIELD_PARAM = "complexField"; // more params for field query private static final String FIELD_QUERY_PARAM = "fqv"; private static final String FIELD_STEM_PARAM = "fqs"; private static final String FIELD_CASE_PARAM = "fqc"; private static final String FIELD_FIELD_PARAM = "fqf"; private static final String FIELD_COMBINE_PARAM = "fqk"; private static final String FIELD_COMBINE_PARAM_AND = "0"; private static final String FIELD_COMBINE_PARAM_OR = "1"; private static final String FIELD_COMBINE_PARAM_NOT = "2"; // some stuff for config files private static final String SEARCH_TYPE_ELEM = "searchType"; private static final String SEARCH_TYPE_PLAIN = "plain"; private static final String SEARCH_TYPE_FORM = "form"; private static final String SEARCH_TYPE_FORM_SIMPLE = "simple"; private static final String SEARCH_TYPE_FORM_ADVANCED = "advanced"; protected static final String DEFAULT_INDEX_ELEM = "defaultIndex"; protected static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; protected static final String LEVEL_ELEM = "level"; protected static final String EQUIV_TERM_ELEM = "equivTerm"; protected static final String STEM_ATT = "stem"; protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch"; protected static final String FREQ_ATT = "freq"; private static final int TEXT_QUERY = 0; private static final int SIMPLE_QUERY = 1; private static final int ADVANCED_QUERY = 2; protected static final String INDEX_STEM_ELEM = "indexStem"; protected static final String FIELD_ATT = "field"; private MGPPWrapper mgpp_src=null; /** the default index */ protected String default_index = null; // the default level for retrieval - and we'll use it for searching too private String default_level=null; // the default field for searching private String default_field = null; // which search services will we offer?? private boolean plain_search = false; private boolean simple_form_search = false; private boolean advanced_form_search = false; /** the stem used for the index files */ protected String index_stem = null; public AbstractMGPPSearch() { this.mgpp_src = new MGPPWrapper(); } public boolean configure(Element info, Element extra_info) { // Do generic configuration if (super.configure(info, extra_info) == false) return false; // Do specific configuration System.out.println("Configuring AbstractMGPPSearch..."); // do we support any of the extended features? does_chunking = true; // Get the default index out of (buildConfig.xml) Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM); if (def != null) { this.default_index = def.getAttribute(GSXML.NAME_ATT); } if (this.default_index == null || this.default_index.equals("")) { System.err.println("Error: default index not specified!"); return false; } // the index stem is either specified in the config file or is "index" Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM); if (index_stem_elem != null) { this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT); } if (this.index_stem == null || this.index_stem.equals("")) { System.err.println("AbstractMGPPSearch.configure(): indexStem element not found, stem will default to collection name"); this.index_stem = this.cluster_name; } // the generic config has set up the text query service, but we may not want it Element search_type_list = (Element) GSXML.getChildByTagName(info, SEARCH_TYPE_ELEM + GSXML.LIST_MODIFIER); if (search_type_list == null) { // assume form and plain this.plain_search = true; this.simple_form_search = true; this.advanced_form_search = true; } else { NodeList types = search_type_list.getElementsByTagName(SEARCH_TYPE_ELEM); for (int i=0; i (buildConfig.xml) def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); if (def != null) { this.default_level = def.getAttribute(GSXML.NAME_ATT); } if (this.default_level == null || this.default_level.equals("")) { System.err.println("Error: default level not specified!"); return false; } // the default level is also the level which gdbm is expecting // this must not be overwritten this.mgpp_src.setReturnLevel(this.default_level); // return term info this.mgpp_src.setReturnTerms(true); // set the default - this may be overwritten by query params this.mgpp_src.setQueryLevel(this.default_level); // set up the extra services which are available for this collection // check the config info - if there is no field list, then there is no fielded searching Element field_list = (Element) GSXML.getChildByTagName(info, GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER); if (field_list==null) { // nothing more to do return true; } // the format info is the same for all services Element format_info = (Element)format_info_map.get(TEXT_QUERY_SERVICE); // find the default field - use the first one Element first_field = (Element)GSXML.getChildByTagName(field_list, GSXML.FIELD_ELEM); default_field = first_field.getAttribute(GSXML.SHORTNAME_ATT); // else set up the fielded query services if (this.simple_form_search) { // set up short_service_info_ - for now just has id and type - name will be added in on teh fly Element fq_service = this.doc.createElement(GSXML.SERVICE_ELEM); fq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); fq_service.setAttribute(GSXML.NAME_ATT, FIELD_QUERY_SERVICE); this.short_service_info.appendChild(fq_service); if (format_info != null) { this.format_info_map.put(FIELD_QUERY_SERVICE, format_info); } } if (this.advanced_form_search) { Element afq_service = this.doc.createElement(GSXML.SERVICE_ELEM); afq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); afq_service.setAttribute(GSXML.NAME_ATT, ADVANCED_FIELD_QUERY_SERVICE); this.short_service_info.appendChild(afq_service); if (format_info != null) { this.format_info_map.put(ADVANCED_FIELD_QUERY_SERVICE, format_info); } } return true; } protected Element getServiceDescription(String service_id, String lang, String subset) { // should we check that the service is actually on offer? presumably we wont get asked for services that we haven't advertised previously. if (!service_id.equals(FIELD_QUERY_SERVICE) && !service_id.equals(ADVANCED_FIELD_QUERY_SERVICE)) { return super.getServiceDescription(service_id, lang, subset); } Element service = this.doc.createElement(GSXML.SERVICE_ELEM); service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); service.setAttribute(GSXML.NAME_ATT, service_id); if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) { service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service_id+".name", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service_id+".submit", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service_id+".description", lang))); } if (subset == null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) { Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); service.appendChild(param_list); if (service_id.equals(FIELD_QUERY_SERVICE)) { addCustomQueryParams(param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, SIMPLE_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); // the components createParameter(FIELD_QUERY_PARAM, multiparam, lang); createParameter(FIELD_FIELD_PARAM, multiparam, lang); } else { createParameter(LEVEL_PARAM, param_list, lang); createParameter(RANK_PARAM, param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, stem, case, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, ADVANCED_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); createParameter(FIELD_COMBINE_PARAM, multiparam, lang); createParameter(FIELD_QUERY_PARAM, multiparam, lang); createParameter(FIELD_CASE_PARAM, multiparam, lang); createParameter(FIELD_STEM_PARAM, multiparam, lang); createParameter(FIELD_FIELD_PARAM, multiparam, lang); } } return service; } /** add in the mgpp specific params to TextQuery */ protected void addCustomQueryParams(Element param_list, String lang) { createParameter(LEVEL_PARAM, param_list, lang); createParameter(CASE_PARAM, param_list, lang); createParameter(STEM_PARAM, param_list, lang); createParameter(MATCH_PARAM, param_list, lang); createParameter(RANK_PARAM, param_list, lang); } /** create a param and add to the list */ protected void createParameter(String name, Element param_list, String lang) { Element param = null; if (name.equals(LEVEL_PARAM)) { ArrayList level_ids = new ArrayList(); ArrayList level_names = new ArrayList(); getLevelData(level_ids, level_names, lang); if (level_ids.size()>1) { // the first one is the default param = GSXML.createParameterDescription2(this.doc, LEVEL_PARAM, getTextString("param."+LEVEL_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)level_ids.get(0), level_ids, level_names); } } else if (name.equals(RANK_PARAM)) { String [] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE }; String [] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang), getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang, "MGPPSearch")}; param = GSXML.createParameterDescription(this.doc, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1, vals1_texts ); } else if (name.equals(FIELD_QUERY_PARAM)) { param = GSXML.createParameterDescription(this.doc, FIELD_QUERY_PARAM, getTextString("param."+FIELD_QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); } else if (name.equals(FIELD_CASE_PARAM) || name.equals(FIELD_STEM_PARAM)) { String[] bool_ops = {"0", "1"}; String[] bool_texts = {getTextString("param.boolean.off", lang, "AbstractSearch"),getTextString("param.boolean.on", lang, "AbstractSearch")}; param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts); } else if (name.equals(FIELD_FIELD_PARAM)) { ArrayList fields = new ArrayList(); ArrayList field_names = new ArrayList(); getIndexData(fields, field_names, lang); // the field list - read from config file param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names ); } else if (name.equals(FIELD_COMBINE_PARAM)) { String []vals = {FIELD_COMBINE_PARAM_AND, FIELD_COMBINE_PARAM_OR, FIELD_COMBINE_PARAM_NOT}; String []val_texts = {getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_AND, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_OR, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_NOT, lang)}; param = GSXML.createParameterDescription(this.doc, FIELD_COMBINE_PARAM, "", GSXML.PARAM_TYPE_ENUM_SINGLE, FIELD_COMBINE_PARAM_AND, vals, val_texts); param.setAttribute(GSXML.PARAM_IGNORE_POS_ATT, "0"); } if (param != null) { param_list.appendChild(param); } else { super.createParameter(name, param_list, lang); } } // should cache some of this protected void getLevelData(ArrayList level_ids, ArrayList level_names, String lang) { Element level_list = (Element)GSXML.getChildByTagName(this.config_info, LEVEL_ELEM+GSXML.LIST_MODIFIER); NodeList levels = level_list.getElementsByTagName(LEVEL_ELEM); for (int i=0; i0) { combine = " "+c+" "; } if (f.equals("")||f.equals("ZZ")) { s.append(combine+q); } else { s.append(combine+"["+q+"]:"+f); } } protected String addStemAndCase(String q, String s, String c) { String mods = "#"; if (c.equals("1")) { mods += "i"; } else { mods += "c"; } if (s.equals("1")) { mods += "s"; } else { mods+= "u"; } StringBuffer temp = new StringBuffer(); String [] terms = q.split(" "); for (int i=0; i