/*
* AbstractMGPPSearch.java
* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.greenstone.gsdl3.service;
// greenstone classes
import org.greenstone.mgpp.*;
import org.greenstone.gsdl3.util.GSXML;
import org.greenstone.gsdl3.util.GSFile;
// XML classes
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
// java classes
import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.Vector;
import java.io.File;
/** Partially implements a generic MGPP search service
*
* @author Katherine Don
*/
abstract public class AbstractMGPPSearch
extends AbstractSearch
{
// extra services offered by mgpp collections
private static final String FIELD_QUERY_SERVICE = "FieldQuery";
private static final String ADVANCED_FIELD_QUERY_SERVICE = "AdvancedFieldQuery";
// extra parameters used
// private static final String INDEX_FIELD_PARAM = "index";
private static final String LEVEL_PARAM = "level";
private static final String RANK_PARAM = "sortBy";
private static final String RANK_PARAM_RANK = "1";
private static final String RANK_PARAM_NONE = "0";
private static final String SIMPLE_FIELD_PARAM = "simpleField";
private static final String ADVANCED_FIELD_PARAM = "complexField";
// more params for field query
private static final String FIELD_QUERY_PARAM = "fqv";
private static final String FIELD_STEM_PARAM = "fqs";
private static final String FIELD_CASE_PARAM = "fqc";
private static final String FIELD_FIELD_PARAM = "fqf";
private static final String FIELD_COMBINE_PARAM = "fqk";
private static final String FIELD_COMBINE_PARAM_AND = "0";
private static final String FIELD_COMBINE_PARAM_OR = "1";
private static final String FIELD_COMBINE_PARAM_NOT = "2";
// some stuff for config files
private static final String SEARCH_TYPE_ELEM = "searchType";
private static final String SEARCH_TYPE_PLAIN = "plain";
private static final String SEARCH_TYPE_FORM = "form";
private static final String SEARCH_TYPE_FORM_SIMPLE = "simple";
private static final String SEARCH_TYPE_FORM_ADVANCED = "advanced";
protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
protected static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
protected static final String LEVEL_ELEM = "level";
protected static final String EQUIV_TERM_ELEM = "equivTerm";
protected static final String STEM_ATT = "stem";
protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
protected static final String FREQ_ATT = "freq";
private static final int TEXT_QUERY = 0;
private static final int SIMPLE_QUERY = 1;
private static final int ADVANCED_QUERY = 2;
protected static final String INDEX_STEM_ELEM = "indexStem";
protected static final String FIELD_ATT = "field";
private MGPPWrapper mgpp_src=null;
/** the default index */
protected String default_index = null;
// the default level for retrieval - and we'll use it for searching too
private String default_level=null;
// the default field for searching
private String default_field = null;
// which search services will we offer??
private boolean plain_search = false;
private boolean simple_form_search = false;
private boolean advanced_form_search = false;
/** the stem used for the index files */
protected String index_stem = null;
public AbstractMGPPSearch()
{
this.mgpp_src = new MGPPWrapper();
}
public boolean configure(Element info, Element extra_info)
{
// Do generic configuration
if (super.configure(info, extra_info) == false)
return false;
// Do specific configuration
System.out.println("Configuring AbstractMGPPSearch...");
// do we support any of the extended features?
does_chunking = true;
// Get the default index out of (buildConfig.xml)
Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
if (def != null) {
this.default_index = def.getAttribute(GSXML.NAME_ATT);
}
if (this.default_index == null || this.default_index.equals("")) {
System.err.println("Error: default index not specified!");
return false;
}
// the index stem is either specified in the config file or is "index"
Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
if (index_stem_elem != null) {
this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
}
if (this.index_stem == null || this.index_stem.equals("")) {
System.err.println("AbstractMGPPSearch.configure(): indexStem element not found, stem will default to collection name");
this.index_stem = this.cluster_name;
}
// the generic config has set up the text query service, but we may not want it
Element search_type_list = (Element) GSXML.getChildByTagName(info, SEARCH_TYPE_ELEM + GSXML.LIST_MODIFIER);
if (search_type_list == null) {
// assume form and plain
this.plain_search = true;
this.simple_form_search = true;
this.advanced_form_search = true;
} else {
NodeList types = search_type_list.getElementsByTagName(SEARCH_TYPE_ELEM);
for (int i=0; i (buildConfig.xml)
def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
if (def != null) {
this.default_level = def.getAttribute(GSXML.NAME_ATT);
}
if (this.default_level == null || this.default_level.equals("")) {
System.err.println("Error: default level not specified!");
return false;
}
// the default level is also the level which gdbm is expecting
// this must not be overwritten
this.mgpp_src.setReturnLevel(this.default_level);
// return term info
this.mgpp_src.setReturnTerms(true);
// set the default - this may be overwritten by query params
this.mgpp_src.setQueryLevel(this.default_level);
// set up the extra services which are available for this collection
// check the config info - if there is no field list, then there is no fielded searching
Element field_list = (Element) GSXML.getChildByTagName(info, GSXML.FIELD_ELEM+GSXML.LIST_MODIFIER);
if (field_list==null) {
// nothing more to do
return true;
}
// the format info is the same for all services
Element format_info = (Element)format_info_map.get(TEXT_QUERY_SERVICE);
// find the default field - use the first one
Element first_field = (Element)GSXML.getChildByTagName(field_list, GSXML.FIELD_ELEM);
default_field = first_field.getAttribute(GSXML.SHORTNAME_ATT);
// else set up the fielded query services
if (this.simple_form_search) {
// set up short_service_info_ - for now just has id and type - name will be added in on teh fly
Element fq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
fq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
fq_service.setAttribute(GSXML.NAME_ATT, FIELD_QUERY_SERVICE);
this.short_service_info.appendChild(fq_service);
if (format_info != null) {
this.format_info_map.put(FIELD_QUERY_SERVICE, format_info);
}
}
if (this.advanced_form_search) {
Element afq_service = this.doc.createElement(GSXML.SERVICE_ELEM);
afq_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
afq_service.setAttribute(GSXML.NAME_ATT, ADVANCED_FIELD_QUERY_SERVICE);
this.short_service_info.appendChild(afq_service);
if (format_info != null) {
this.format_info_map.put(ADVANCED_FIELD_QUERY_SERVICE, format_info);
}
}
return true;
}
protected Element getServiceDescription(String service_id, String lang, String subset) {
// should we check that the service is actually on offer? presumably we wont get asked for services that we haven't advertised previously.
if (!service_id.equals(FIELD_QUERY_SERVICE) && !service_id.equals(ADVANCED_FIELD_QUERY_SERVICE)) {
return super.getServiceDescription(service_id, lang, subset);
}
Element service = this.doc.createElement(GSXML.SERVICE_ELEM);
service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
service.setAttribute(GSXML.NAME_ATT, service_id);
if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) {
service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(service_id+".name", lang)));
service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(service_id+".submit", lang)));
service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(service_id+".description", lang)));
}
if (subset == null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
service.appendChild(param_list);
if (service_id.equals(FIELD_QUERY_SERVICE)) {
addCustomQueryParams(param_list, lang);
createParameter(MAXDOCS_PARAM, param_list, lang);
// create a multi param for the fields etc
// text box, field
Element multiparam = null;
Element param=null;
multiparam = GSXML.createParameterDescription(this.doc, SIMPLE_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null);
multiparam.setAttribute("occurs", "4");
param_list.appendChild(multiparam);
// the components
createParameter(FIELD_QUERY_PARAM, multiparam, lang);
createParameter(FIELD_FIELD_PARAM, multiparam, lang);
} else {
createParameter(LEVEL_PARAM, param_list, lang);
createParameter(RANK_PARAM, param_list, lang);
createParameter(MAXDOCS_PARAM, param_list, lang);
// create a multi param for the fields etc
// text box, stem, case, field
Element multiparam = null;
Element param=null;
multiparam = GSXML.createParameterDescription(this.doc, ADVANCED_FIELD_PARAM, "", GSXML.PARAM_TYPE_MULTI, null, null, null);
multiparam.setAttribute("occurs", "4");
param_list.appendChild(multiparam);
createParameter(FIELD_COMBINE_PARAM, multiparam, lang);
createParameter(FIELD_QUERY_PARAM, multiparam, lang);
createParameter(FIELD_CASE_PARAM, multiparam, lang);
createParameter(FIELD_STEM_PARAM, multiparam, lang);
createParameter(FIELD_FIELD_PARAM, multiparam, lang);
}
}
return service;
}
/** add in the mgpp specific params to TextQuery */
protected void addCustomQueryParams(Element param_list, String lang)
{
createParameter(LEVEL_PARAM, param_list, lang);
createParameter(CASE_PARAM, param_list, lang);
createParameter(STEM_PARAM, param_list, lang);
createParameter(MATCH_PARAM, param_list, lang);
createParameter(RANK_PARAM, param_list, lang);
}
/** create a param and add to the list */
protected void createParameter(String name, Element param_list, String lang)
{
Element param = null;
if (name.equals(LEVEL_PARAM)) {
ArrayList level_ids = new ArrayList();
ArrayList level_names = new ArrayList();
getLevelData(level_ids, level_names, lang);
if (level_ids.size()>1) {
// the first one is the default
param = GSXML.createParameterDescription2(this.doc, LEVEL_PARAM, getTextString("param."+LEVEL_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)level_ids.get(0), level_ids, level_names);
}
} else if (name.equals(RANK_PARAM)) {
String [] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE };
String [] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang), getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang, "MGPPSearch")};
param = GSXML.createParameterDescription(this.doc, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1, vals1_texts );
} else if (name.equals(FIELD_QUERY_PARAM)) {
param = GSXML.createParameterDescription(this.doc, FIELD_QUERY_PARAM, getTextString("param."+FIELD_QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
} else if (name.equals(FIELD_CASE_PARAM) || name.equals(FIELD_STEM_PARAM)) {
String[] bool_ops = {"0", "1"};
String[] bool_texts = {getTextString("param.boolean.off", lang, "AbstractSearch"),getTextString("param.boolean.on", lang, "AbstractSearch")};
param = GSXML.createParameterDescription(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, bool_ops, bool_texts);
} else if (name.equals(FIELD_FIELD_PARAM)) {
ArrayList fields = new ArrayList();
ArrayList field_names = new ArrayList();
getIndexData(fields, field_names, lang);
// the field list - read from config file
param = GSXML.createParameterDescription2(this.doc, name, getTextString("param."+name, lang), GSXML.PARAM_TYPE_ENUM_SINGLE, (String)fields.get(0), fields, field_names );
} else if (name.equals(FIELD_COMBINE_PARAM)) {
String []vals = {FIELD_COMBINE_PARAM_AND, FIELD_COMBINE_PARAM_OR, FIELD_COMBINE_PARAM_NOT};
String []val_texts = {getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_AND, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_OR, lang), getTextString("param."+FIELD_COMBINE_PARAM+"."+FIELD_COMBINE_PARAM_NOT, lang)};
param = GSXML.createParameterDescription(this.doc, FIELD_COMBINE_PARAM, "", GSXML.PARAM_TYPE_ENUM_SINGLE, FIELD_COMBINE_PARAM_AND, vals, val_texts);
param.setAttribute(GSXML.PARAM_IGNORE_POS_ATT, "0");
}
if (param != null) {
param_list.appendChild(param);
} else {
super.createParameter(name, param_list, lang);
}
}
// should cache some of this
protected void getLevelData(ArrayList level_ids, ArrayList level_names, String lang)
{
Element level_list = (Element)GSXML.getChildByTagName(this.config_info, LEVEL_ELEM+GSXML.LIST_MODIFIER);
NodeList levels = level_list.getElementsByTagName(LEVEL_ELEM);
for (int i=0; i0) {
combine = " "+c+" ";
}
if (f.equals("")||f.equals("ZZ")) {
s.append(combine+q);
} else {
s.append(combine+"["+q+"]:"+f);
}
}
protected String addStemAndCase(String q, String s, String c) {
String mods = "#";
if (c.equals("1")) {
mods += "i";
} else {
mods += "c";
}
if (s.equals("1")) {
mods += "s";
} else {
mods+= "u";
}
StringBuffer temp = new StringBuffer();
String [] terms = q.split(" ");
for (int i=0; i