source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 13567

Last change on this file since 13567 was 13483, checked in by shaoqun, 18 years ago

added the set accentfolding method

  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java classes
31import java.util.Iterator;
32import java.util.Set;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.ArrayList;
36import java.util.Vector;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
45 */
46
47public class GS2MGPPSearch
48 extends AbstractGS2FieldSearch
49{
50 private MGPPWrapper mgpp_src=null;
51
52 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
53
54 /** constructor */
55 public GS2MGPPSearch()
56 {
57 this.mgpp_src = new MGPPWrapper();
58 }
59
60 public void cleanUp() {
61 super.cleanUp();
62 this.mgpp_src.unloadIndexData();
63 }
64
65 /** configure this service */
66 public boolean configure(Element info, Element extra_info)
67 {
68 if (!super.configure(info, extra_info)){
69 return false;
70 }
71
72 // the default level is also the level which gdbm is expecting
73 // this must not be overwritten
74 this.mgpp_src.setReturnLevel(this.default_level);
75 // return term info
76 this.mgpp_src.setReturnTerms(true);
77 // set the default - this may be overwritten by query params
78 this.mgpp_src.setQueryLevel(this.default_level);
79 this.mgpp_src.setMaxNumeric(this.maxnumeric);
80
81 return true;
82 }
83
84 /** add in the mgpp specific params to TextQuery */
85 protected void addCustomQueryParams(Element param_list, String lang)
86 {
87 super.addCustomQueryParams(param_list, lang);
88 createParameter(RANK_PARAM, param_list, lang);
89 }
90
91 protected boolean setUpQueryer(HashMap params) {
92 // set up mgpp_src
93 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, this.default_index);
94 this.mgpp_src.loadIndexData(indexdir);
95
96 // set up the query params
97 Set entries = params.entrySet();
98 Iterator i = entries.iterator();
99 while (i.hasNext()) {
100 Map.Entry m = (Map.Entry)i.next();
101 String name = (String)m.getKey();
102 String value = (String)m.getValue();
103
104 if (name.equals(CASE_PARAM)) {
105 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
106 this.mgpp_src.setCase(val);
107 } else if (name.equals(STEM_PARAM)) {
108 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
109 this.mgpp_src.setStem(val);
110 } else if (name.equals(ACCENT_PARAM)) {
111 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
112 this.mgpp_src.setAccentFold(val);
113 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
114 int docs = Integer.parseInt(value);
115 this.mgpp_src.setMaxDocs(docs);
116 } else if (name.equals(LEVEL_PARAM)) {
117 this.mgpp_src.setQueryLevel(value);
118 } else if (name.equals(MATCH_PARAM)) {
119 int mode;
120 if (value.equals(MATCH_PARAM_ALL)) mode=1;
121 else mode=0;
122 this.mgpp_src.setMatchMode(mode);
123 } else if (name.equals(RANK_PARAM)) {
124 if (value.equals(RANK_PARAM_RANK)) {
125 this.mgpp_src.setSortByRank(true);
126 } else if (value.equals(RANK_PARAM_NONE)) {
127 this.mgpp_src.setSortByRank(false);
128 }
129 } // ignore any others
130 }
131
132 return true;
133 }
134
135 protected Object runQuery(String query) {
136 this.mgpp_src.runQuery(query);
137 MGPPQueryResult mqr= this.mgpp_src.getQueryResult();
138 return mqr;
139
140 }
141
142 protected long numDocsMatched(Object query_result) {
143 return ((MGPPQueryResult)query_result).getTotalDocs();
144 }
145
146 protected String [] getDocIDs(Object query_result) {
147
148 Vector docs = ((MGPPQueryResult)query_result).getDocs();
149 String [] doc_nums = new String [docs.size()];
150 for (int d = 0; d < docs.size(); d++) {
151 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
152 }
153 return doc_nums;
154 }
155
156 protected String [] getDocRanks(Object query_result) {
157
158 Vector docs = ((MGPPQueryResult)query_result).getDocs();
159 String [] doc_ranks = new String [docs.size()];
160 for (int d = 0; d < docs.size(); d++) {
161 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
162 }
163 return doc_ranks;
164 }
165
166 protected boolean addTermInfo(Element term_list, HashMap params,
167 Object query_result) {
168
169 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
170
171 Vector terms = ((MGPPQueryResult)query_result).getTerms();
172 for (int t = 0; t < terms.size(); t++) {
173 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
174
175 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
176 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
177 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
178 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
179 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
180 String field = term_info.tag_;
181 if (field.equals(query_level)) {
182 // ignore
183 field = "";
184 }
185 term_elem.setAttribute(FIELD_ATT, field);
186
187 Vector equiv_terms = term_info.equiv_terms_;
188 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
189 term_elem.appendChild(equiv_term_list);
190
191 for (int et = 0; et < equiv_terms.size(); et++) {
192 String equiv_term = (String) equiv_terms.get(et);
193
194 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
195 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
196 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
197 equiv_term_elem.setAttribute(FREQ_ATT, "");
198 equiv_term_list.appendChild(equiv_term_elem);
199 }
200
201 term_list.appendChild(term_elem);
202 }
203 return true;
204 }
205
206
207 protected String addFieldInfo(String query, String field) {
208 if (field.equals("") || field.equals("ZZ")) {
209 return query;
210 }
211 return "["+query+"]:"+field;
212 }
213 protected void addQueryElem(StringBuffer final_query, String query,
214 String field, String combine) {
215
216 String comb="";
217 if (final_query.length()>0) {
218 comb = " "+combine+" ";
219 }
220 final_query.append(comb+addFieldInfo(query,field));
221 }
222
223 protected String addStemOptions(String query, String stem,
224 String casef, String accent) {
225 String mods = "#";
226 if (casef != null) {
227 if (casef.equals("1")) {
228 mods += "i";
229 } else {
230 mods += "c";
231 }
232 }
233 if (stem != null) {
234 if (stem.equals("1")) {
235 mods += "s";
236 } else {
237 mods+= "u";
238 }
239 }
240 if (accent != null) {
241 if (accent.equals("1")) {
242 mods += "f";
243 } else {
244 mods += "a";
245 }
246 }
247
248 StringBuffer temp = new StringBuffer();
249 String [] terms = query.split(" ");
250 for (int i=0; i<terms.length; i++) {
251 String t = terms[i].trim();
252 // what is the TX bit about???
253 if (!t.equals("") && !t.equals("TX")) {
254 temp.append(" "+t+mods);
255 }
256 }
257 return temp.toString();
258 }
259
260}
261
262
Note: See TracBrowser for help on using the repository browser.