/* * AbstractMGPPSearch.java * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // greenstone classes import org.greenstone.mgpp.*; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.GSFile; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // java classes import java.util.Iterator; import java.util.Set; import java.util.HashMap; import java.util.Map; import java.util.ArrayList; import java.util.Vector; import java.io.File; import org.apache.log4j.*; /** Partially implements a generic MGPP search service * * @author Katherine Don */ abstract public class AbstractMGPPSearch extends AbstractSearch { static Category logger = Category.getInstance(org.greenstone.gsdl3.service.AbstractMGPPSearch.class.getName()); // extra services offered by mgpp collections private static final String FIELD_QUERY_SERVICE = "FieldQuery"; private static final String ADVANCED_FIELD_QUERY_SERVICE = "AdvancedFieldQuery"; // extra parameters used // private static final String INDEX_FIELD_PARAM = "index"; private static final String LEVEL_PARAM = "level"; private static final String RANK_PARAM = "sortBy"; private static final String RANK_PARAM_RANK = "1"; private static final String RANK_PARAM_NONE = "0"; private static final String SIMPLE_FIELD_PARAM = "simpleField"; private static final String ADVANCED_FIELD_PARAM = "complexField"; // more params for field query private static final String FIELD_QUERY_PARAM = "fqv"; private static final String FIELD_STEM_PARAM = "fqs"; private static final String FIELD_CASE_PARAM = "fqc"; private static final String FIELD_ACCENT_PARAM="fqa"; private static final String FIELD_FIELD_PARAM = "fqf"; private static final String FIELD_COMBINE_PARAM = "fqk"; private static final String FIELD_COMBINE_PARAM_AND = "0"; private static final String FIELD_COMBINE_PARAM_OR = "1"; private static final String FIELD_COMBINE_PARAM_NOT = "2"; private static final String ACCENT_PARAM="accent"; // some stuff for config files private static final String SEARCH_TYPE_ELEM = "searchType"; private static final String SEARCH_TYPE_PLAIN = "plain"; private static final String SEARCH_TYPE_FORM = "form"; private static final String SEARCH_TYPE_FORM_SIMPLE = "simple"; private static final String SEARCH_TYPE_FORM_ADVANCED = "advanced"; protected static final String DEFAULT_INDEX_ELEM = "defaultIndex"; protected static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; protected static final String LEVEL_ELEM = "level"; protected static final String STEMINDEX_OPTION = "stemIndexes"; protected static final String MAXNUMERIC_OPTION = "maxnumeric"; protected static final String EQUIV_TERM_ELEM = "equivTerm"; protected static final String STEM_ATT = "stem"; protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch"; protected static final String FREQ_ATT = "freq"; private static final int TEXT_QUERY = 0; private static final int SIMPLE_QUERY = 1; private static final int ADVANCED_QUERY = 2; protected static final String FIELD_ATT = "field"; private MGPPWrapper mgpp_src=null; // the default level for retrieval - and we'll use it for searching too private String default_level=null; // the default field for searching private String default_field = null; // which search services will we offer?? private boolean plain_search = false; private boolean simple_form_search = false; private boolean advanced_form_search = false; // stem indexes available private boolean does_case=true; private boolean does_stem=true; private boolean does_accent=false; // maxnumeric - not used yet. needs to be passed to MGPPWrapper. private int maxnumeric = 4; /** the stem used for the index files */ protected String index_stem = null; public AbstractMGPPSearch() { this.mgpp_src = new MGPPWrapper(); } public void cleanUp() { super.cleanUp(); this.mgpp_src.unloadIndexData(); } public boolean configure(Element info, Element extra_info) { // Do generic configuration if (super.configure(info, extra_info) == false) return false; // Do specific configuration logger.info("Configuring AbstractMGPPSearch..."); // do we support any of the extended features? does_chunking = true; // Get the default index out of (buildConfig.xml) Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM); if (def != null) { this.default_index = def.getAttribute(GSXML.NAME_ATT); } // otherwise its "" and will be the first one in the list // the index stem is either specified in the config file or is "index" Element index_stem_elem = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_STEM_ELEM); if (index_stem_elem != null) { this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT); } if (this.index_stem == null || this.index_stem.equals("")) { logger.warn("AbstractMGPPSearch.configure(): indexStem element not found, stem will default to collection name"); this.index_stem = this.cluster_name; } // get index options Element index_option_list = (Element) GSXML.getChildByTagName(info, GSXML.INDEX_OPTION_ELEM + GSXML.LIST_MODIFIER); if (index_option_list != null) { NodeList options = index_option_list.getElementsByTagName(GSXML.INDEX_OPTION_ELEM); for (int i=0; i (buildConfig.xml) def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); if (def != null) { this.default_level = def.getAttribute(GSXML.NAME_ATT); } if (this.default_level == null || this.default_level.equals("")) { logger.error("default level not specified!"); return false; } // the default level is also the level which gdbm is expecting // this must not be overwritten this.mgpp_src.setReturnLevel(this.default_level); // return term info this.mgpp_src.setReturnTerms(true); // set the default - this may be overwritten by query params this.mgpp_src.setQueryLevel(this.default_level); // set up the extra services which are available for this collection // check the config info - if there is no field list, then there is no fielded searching Element field_list = (Element) GSXML.getChildByTagName(info, GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER); if (field_list==null) { // nothing more to do return true; } // the format info is the same for all services Element format_info = (Element)format_info_map.get(TEXT_QUERY_SERVICE); // find the default field - use the first one Element first_field = (Element)GSXML.getChildByTagName(field_list, GSXML.FIELD_ELEM); default_field = first_field.getAttribute(GSXML.SHORTNAME_ATT); // else set up the fielded query services if (this.simple_form_search) { // set up short_service_info_ - for now just has id and type - name will be added in on teh fly Element fq_service = this.doc.createElement(GSXML.SERVICE_ELEM); fq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); fq_service.setAttribute(GSXML.NAME_ATT, FIELD_QUERY_SERVICE); this.short_service_info.appendChild(fq_service); if (format_info != null) { this.format_info_map.put(FIELD_QUERY_SERVICE, format_info); } } if (this.advanced_form_search) { Element afq_service = this.doc.createElement(GSXML.SERVICE_ELEM); afq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); afq_service.setAttribute(GSXML.NAME_ATT, ADVANCED_FIELD_QUERY_SERVICE); this.short_service_info.appendChild(afq_service); if (format_info != null) { this.format_info_map.put(ADVANCED_FIELD_QUERY_SERVICE, format_info); } } return true; } protected Element getServiceDescription(String service_id, String lang, String subset) { // should we check that the service is actually on offer? presumably we wont get asked for services that we haven't advertised previously. if (!service_id.equals(FIELD_QUERY_SERVICE) && !service_id.equals(ADVANCED_FIELD_QUERY_SERVICE)) { return super.getServiceDescription(service_id, lang, subset); } Element service = this.doc.createElement(GSXML.SERVICE_ELEM); service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY); service.setAttribute(GSXML.NAME_ATT, service_id); if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) { service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service_id+".name", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service_id+".submit", lang))); service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service_id+".description", lang))); } if (subset == null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) { Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); service.appendChild(param_list); if (service_id.equals(FIELD_QUERY_SERVICE)) { addCustomQueryParams(param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, SIMPLE_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); // the components createParameter(FIELD_QUERY_PARAM, multiparam, lang); createParameter(FIELD_FIELD_PARAM, multiparam, lang); } else { createParameter(LEVEL_PARAM, param_list, lang); createParameter(RANK_PARAM, param_list, lang); createParameter(MAXDOCS_PARAM, param_list, lang); // create a multi param for the fields etc // text box, stem, case, field Element multiparam = null; Element param=null; multiparam = GSXML.createParameterDescription(this.doc, ADVANCED_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null); multiparam.setAttribute("occurs", "4"); param_list.appendChild(multiparam); createParameter(FIELD_COMBINE_PARAM, multiparam, lang); createParameter(FIELD_QUERY_PARAM, multiparam, lang); if (this.does_case) { createParameter(FIELD_CASE_PARAM, multiparam, lang); } if (this.does_stem) { createParameter(FIELD_STEM_PARAM, multiparam, lang); } if (this.does_accent) { createParameter(FIELD_ACCENT_PARAM, multiparam, lang); } createParameter(FIELD_FIELD_PARAM, multiparam, lang); } } return service; } /** add in the mgpp specific params to TextQuery */ protected void addCustomQueryParams(Element param_list, String lang) { createParameter(LEVEL_PARAM, param_list, lang); if (this.does_case){ createParameter(CASE_PARAM, param_list, lang); } if (this.does_stem){ createParameter(STEM_PARAM, param_list, lang); } if (this.does_accent){ createParameter(ACCENT_PARAM, param_list, lang); } createParameter(MATCH_PARAM, param_list, lang); createParameter(RANK_PARAM, param_list, lang); } /** create a param and add to the list */ protected void createParameter(String name, Element param_list, String lang) { Element param = null; if (name.equals(LEVEL_PARAM)) { ArrayList level_ids = new ArrayList(); ArrayList level_names = new ArrayList(); getLevelData(level_ids, level_names, lang); if (level_ids.size()>1) { // the first one is the default param = GSXML.createParameterDescription2(this.doc, LEVEL_PARAM, getTextString("param."+LEVEL_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)level_ids.get(0), level_ids, level_names); } } else if (name.equals(RANK_PARAM)) { String [] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE }; String [] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang), getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang)}; param = GSXML.createParameterDescription(this.doc, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1, vals1_texts ); } else if (name.equals(FIELD_QUERY_PARAM)) { param = GSXML.createParameterDescription(this.doc, FIELD_QUERY_PARAM, getTextString("param."+FIELD_QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null); } else if (name.equals(FIELD_CASE_PARAM) || name.equals(FIELD_STEM_PARAM) || name.equals(FIELD_ACCENT_PARAM)) { String[] bool_ops = {"0", "1"}; String[] bool_texts = {getTextString("param.boolean.off", lang, "AbstractSearch"),getTextString("param.boolean.on", lang, "AbstractSearch")}; param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts); } else if (name.equals(FIELD_FIELD_PARAM)) { ArrayList fields = new ArrayList(); ArrayList field_names = new ArrayList(); getIndexData(fields, field_names, lang); // the field list - read from config file param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names ); } else if (name.equals(FIELD_COMBINE_PARAM)) { String []vals = {FIELD_COMBINE_PARAM_AND, FIELD_COMBINE_PARAM_OR, FIELD_COMBINE_PARAM_NOT}; String []val_texts = {getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_AND, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_OR, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_NOT, lang)}; param = GSXML.createParameterDescription(this.doc, FIELD_COMBINE_PARAM, "", GSXML.PARAM_TYPE_ENUM_SINGLE, FIELD_COMBINE_PARAM_AND, vals, val_texts); param.setAttribute(GSXML.PARAM_IGNORE_POS_ATT, "0"); } if (param != null) { param_list.appendChild(param); } else { super.createParameter(name, param_list, lang); } } // should cache some of this protected void getLevelData(ArrayList level_ids, ArrayList level_names, String lang) { Element level_list = (Element)GSXML.getChildByTagName(this.config_info, LEVEL_ELEM+GSXML.LIST_MODIFIER); NodeList levels = level_list.getElementsByTagName(LEVEL_ELEM); for (int i=0; i0) { Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); result.appendChild(document_list); for (int d = 0; d < docs.size(); d++) { long docnum = ((MGPPDocInfo) docs.elementAt(d)).num_; float rank = ((MGPPDocInfo) docs.elementAt(d)).rank_; String doc_id = MGPPNum2OID(docnum); Element doc_node = createDocNode(doc_id, Float.toString(rank)); //doc_node.setAttribute("rank", Float.toString(rank)); document_list.appendChild(doc_node); } } // Create a term list to store the term information, and add it String query_level = (String)params.get(LEVEL_PARAM); // the current query level Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER); result.appendChild(term_list); Vector terms = mqr.getTerms(); for (int t = 0; t < terms.size(); t++) { MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t); Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_); term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); field = term_info.tag_; if (field.equals(query_level)) { // ignore field = ""; } term_elem.setAttribute(FIELD_ATT, field); Vector equiv_terms = term_info.equiv_terms_; Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER); term_elem.appendChild(equiv_term_list); for (int et = 0; et < equiv_terms.size(); et++) { String equiv_term = (String) equiv_terms.get(et); Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term); equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, ""); equiv_term_elem.setAttribute(FREQ_ATT, ""); equiv_term_list.appendChild(equiv_term_elem); } term_list.appendChild(term_elem); } return result; } // should probably use a list rather than map protected boolean setStandardQueryParams(HashMap params) { Set entries = params.entrySet(); Iterator i = entries.iterator(); while (i.hasNext()) { Map.Entry m = (Map.Entry)i.next(); String name = (String)m.getKey(); String value = (String)m.getValue(); if (name.equals(CASE_PARAM)) { boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); this.mgpp_src.setCase(val); } else if (name.equals(STEM_PARAM)) { boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); this.mgpp_src.setStem(val); } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { int docs = Integer.parseInt(value); this.mgpp_src.setMaxDocs(docs); } else if (name.equals(LEVEL_PARAM)) { this.mgpp_src.setQueryLevel(value); } else if (name.equals(MATCH_PARAM)) { int mode; if (value.equals(MATCH_PARAM_ALL)) mode=1; else mode=0; this.mgpp_src.setMatchMode(mode); } else if (name.equals(RANK_PARAM)) { if (value.equals(RANK_PARAM_RANK)) { this.mgpp_src.setSortByRank(true); } else if (value.equals(RANK_PARAM_NONE)) { this.mgpp_src.setSortByRank(false); } } // ignore any others } return true; } protected String addFieldInfo(String query, String field) { if (field.equals("") || field.equals("ZZ")) { return query; } return "["+query+"]:"+field; } /** combines all the field params into a single query * - for simple field query */ protected String parseFieldQueryParams(HashMap params) { StringBuffer final_query = new StringBuffer(256); String text_line = (String)params.get(FIELD_QUERY_PARAM); String[] texts = text_line.split(",", -1); String field_line = (String)params.get(FIELD_FIELD_PARAM); String[] fields = field_line.split(",", -1); String combine="&"; String match = (String)params.get(MATCH_PARAM); if (match.equals(MATCH_PARAM_SOME)) { combine = "|"; } for (int i=0; i0) { combine = " "+c+" "; } if (f.equals("")||f.equals("ZZ")) { s.append(combine+q); } else { s.append(combine+"["+q+"]:"+f); } } protected String addStemAndCase(String q, String s, String c) { String mods = "#"; if (c.equals("1")) { mods += "i"; } else { mods += "c"; } if (s.equals("1")) { mods += "s"; } else { mods+= "u"; } StringBuffer temp = new StringBuffer(); String [] terms = q.split(" "); for (int i=0; i