/* * GS2MGPPSearch.java * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.mgpp.*; import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // java classes import java.util.Iterator; import java.util.Set; import java.util.HashMap; import java.util.Map; import java.util.ArrayList; import java.util.Vector; import java.io.File; import org.apache.log4j.*; /** * * @author Katherine Don * @author Michael Dewsnip */ public class GS2MGPPSearch extends AbstractGS2FieldSearch { private MGPPWrapper mgpp_src=null; private String physical_index_name = "idx"; static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName()); /** constructor */ public GS2MGPPSearch() { this.mgpp_src = new MGPPWrapper(); } public void cleanUp() { super.cleanUp(); this.mgpp_src.unloadIndexData(); } /** configure this service */ public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ return false; } // the default level is also the level which gdbm is expecting // this must not be overwritten this.mgpp_src.setReturnLevel(this.default_gdbm_level); // return term info this.mgpp_src.setReturnTerms(true); // set the default - this may be overwritten by query params this.mgpp_src.setQueryLevel(this.default_level); this.mgpp_src.setMaxNumeric(this.maxnumeric); return true; } /** add in the mgpp specific params to TextQuery */ protected void addCustomQueryParams(Element param_list, String lang) { super.addCustomQueryParams(param_list, lang); createParameter(RANK_PARAM, param_list, lang); } protected boolean setUpQueryer(HashMap params) { // set up the query params Set entries = params.entrySet(); Iterator i = entries.iterator(); String physical_sub_index_name=null; String physical_index_language_name=null; while (i.hasNext()) { Map.Entry m = (Map.Entry)i.next(); String name = (String)m.getKey(); String value = (String)m.getValue(); if (name.equals(CASE_PARAM)) { boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); this.mgpp_src.setCase(val); } else if (name.equals(STEM_PARAM)) { boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); this.mgpp_src.setStem(val); } else if (name.equals(ACCENT_PARAM)) { boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false); this.mgpp_src.setAccentFold(val); } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) { int docs = Integer.parseInt(value); this.mgpp_src.setMaxDocs(docs); } else if (name.equals(LEVEL_PARAM)) { this.mgpp_src.setQueryLevel(value); } else if (name.equals(MATCH_PARAM)) { int mode; if (value.equals(MATCH_PARAM_ALL)) mode=1; else mode=0; this.mgpp_src.setMatchMode(mode); } else if (name.equals(RANK_PARAM)) { if (value.equals(RANK_PARAM_RANK)) { this.mgpp_src.setSortByRank(true); } else if (value.equals(RANK_PARAM_NONE)) { this.mgpp_src.setSortByRank(false); } } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) { physical_sub_index_name=value; }else if (name.equals(INDEX_LANGUAGE_PARAM)){ physical_index_language_name=value; } // ignore any others } if (physical_index_name.equals("idx")){ if (physical_sub_index_name!=null) { physical_index_name+=physical_sub_index_name; } if (physical_index_language_name!=null){ physical_index_name+=physical_index_language_name; } } // set up mgpp_src String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name); this.mgpp_src.loadIndexData(indexdir); physical_index_name="idx"; return true; } protected Object runQuery(String query) { this.mgpp_src.runQuery(query); MGPPQueryResult mqr= this.mgpp_src.getQueryResult(); return mqr; } protected long numDocsMatched(Object query_result) { return ((MGPPQueryResult)query_result).getTotalDocs(); } protected String [] getDocIDs(Object query_result) { Vector docs = ((MGPPQueryResult)query_result).getDocs(); String [] doc_nums = new String [docs.size()]; for (int d = 0; d < docs.size(); d++) { doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_)); } return doc_nums; } protected String [] getDocRanks(Object query_result) { Vector docs = ((MGPPQueryResult)query_result).getDocs(); String [] doc_ranks = new String [docs.size()]; for (int d = 0; d < docs.size(); d++) { doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_); } return doc_ranks; } protected boolean addTermInfo(Element term_list, HashMap params, Object query_result) { String query_level = (String)params.get(LEVEL_PARAM); // the current query level Vector terms = ((MGPPQueryResult)query_result).getTerms(); for (int t = 0; t < terms.size(); t++) { MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t); Element term_elem = this.doc.createElement(GSXML.TERM_ELEM); term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_); term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_); term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_); term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_); String field = term_info.tag_; if (field.equals(query_level)) { // ignore field = ""; } term_elem.setAttribute(FIELD_ATT, field); Vector equiv_terms = term_info.equiv_terms_; Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER); term_elem.appendChild(equiv_term_list); for (int et = 0; et < equiv_terms.size(); et++) { String equiv_term = (String) equiv_terms.get(et); Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM); equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term); equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, ""); equiv_term_elem.setAttribute(FREQ_ATT, ""); equiv_term_list.appendChild(equiv_term_elem); } term_list.appendChild(term_elem); } return true; } protected String addFieldInfo(String query, String field) { if (field.equals("") || field.equals("ZZ")) { return query; } return "["+query+"]:"+field; } protected void addQueryElem(StringBuffer final_query, String query, String field, String combine) { String comb=""; if (final_query.length()>0) { comb = " "+combine+" "; } final_query.append(comb+addFieldInfo(query,field)); } protected String addStemOptions(String query, String stem, String casef, String accent) { String mods = "#"; if (casef != null) { if (casef.equals("1")) { mods += "i"; } else { mods += "c"; } } if (stem != null) { if (stem.equals("1")) { mods += "s"; } else { mods+= "u"; } } if (accent != null) { if (accent.equals("1")) { mods += "f"; } else { mods += "a"; } } StringBuffer temp = new StringBuffer(); String [] terms = query.split(" "); for (int i=0; i