source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 29409

Last change on this file since 29409 was 29409, checked in by kjdon, 9 years ago

adding a reset method to MGPPSearchWrapper. When we rebuild a collection, we are unloading then reloading the index data, however the default settings (stem method etc) were not being reset. This led to a problem: If a collection was built with accent folding on, and a query wsa done with accent folding on, then this was stored in defaultStemMethod. If the collection was then built without accent folding on, the accent folding setting wouldn't be unset and forever after, the stemMethod would be wrong and therefore no stem/casefold could be done. Now we can call reset after unloading the indexdata to get all the settings back to defaults.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.3 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import java.io.File;
22import java.io.Serializable;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.Map;
27import java.util.Set;
28import java.util.Vector;
29
30import org.apache.log4j.Logger;
31import org.greenstone.gsdl3.util.FacetWrapper;
32import org.greenstone.gsdl3.util.GSFile;
33import org.greenstone.gsdl3.util.GSXML;
34import org.greenstone.gsdl3.util.XMLConverter;
35
36import org.greenstone.mgpp.MGPPDocInfo;
37import org.greenstone.mgpp.MGPPQueryResult;
38import org.greenstone.mgpp.MGPPSearchWrapper;
39import org.greenstone.mgpp.MGPPTermInfo;
40
41import org.w3c.dom.Document;
42import org.w3c.dom.Element;
43
44public class GS2MGPPSearch extends AbstractGS2FieldSearch
45{
46 private static MGPPSearchWrapper mgpp_src = null;
47
48 private String physical_index_name = "idx";
49
50 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
51
52 /** constructor */
53 public GS2MGPPSearch()
54 {
55 if (mgpp_src == null)
56 {
57 mgpp_src = new MGPPSearchWrapper();
58 }
59 }
60
61 public void cleanUp()
62 {
63 super.cleanUp();
64 mgpp_src.unloadIndexData();
65 mgpp_src.reset(); // reset stored settings to defaults
66 }
67
68 /** process a query */
69 protected Element processAnyQuery(Element request, int query_type)
70 {
71 synchronized (mgpp_src)
72 {
73 return super.processAnyQuery(request, query_type);
74 }
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // set up the defaults which are not dependent on query parameters
86 // the default level is also the level which the database is expecting
87 // this must not be overwritten
88 mgpp_src.setReturnLevel(this.default_db_level);
89 // return term info
90 mgpp_src.setReturnTerms(true);
91 mgpp_src.setMaxNumeric(this.maxnumeric);
92 return true;
93 }
94
95 /** add in the mgpp specific params to TextQuery */
96 protected void addCustomQueryParams(Element param_list, String lang)
97 {
98 super.addCustomQueryParams(param_list, lang);
99 createParameter(RANK_PARAM, param_list, lang);
100 }
101
102 protected boolean setUpQueryer(HashMap<String, Serializable> params)
103 {
104
105 // set up the defaults that may be changed by query params
106 mgpp_src.setQueryLevel(this.default_level);
107 // we have case folding on by default
108 if (this.does_case) {
109 mgpp_src.setCase(paramDefaults.get(CASE_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
110 }
111 if (this.does_stem) {
112 mgpp_src.setStem(paramDefaults.get(STEM_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
113 }
114 if (this.does_accent) {
115 mgpp_src.setAccentFold(paramDefaults.get(ACCENT_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
116 }
117 // set up the query params
118 Set entries = params.entrySet();
119 Iterator i = entries.iterator();
120 String current_physical_index_name = this.physical_index_name;
121 String physical_sub_index_name = this.default_index_subcollection;
122 String physical_index_language_name = this.default_index_language;
123 while (i.hasNext())
124 {
125 Map.Entry m = (Map.Entry) i.next();
126 String name = (String) m.getKey();
127 String value = (String) m.getValue();
128
129 if (name.equals(CASE_PARAM) && this.does_case)
130 {
131 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
132 mgpp_src.setCase(val);
133 }
134 else if (name.equals(STEM_PARAM) && this.does_stem)
135 {
136 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
137 mgpp_src.setStem(val);
138 }
139 else if (name.equals(ACCENT_PARAM) && this.does_accent)
140 {
141 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
142 mgpp_src.setAccentFold(val);
143 }
144 else if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
145 {
146 int docs = Integer.parseInt(value);
147 mgpp_src.setMaxDocs(docs);
148 }
149 else if (name.equals(LEVEL_PARAM))
150 {
151 mgpp_src.setQueryLevel(value);
152 }
153 else if (name.equals(MATCH_PARAM))
154 {
155 int mode;
156 if (value.equals(MATCH_PARAM_ALL))
157 mode = 1;
158 else
159 mode = 0;
160 mgpp_src.setMatchMode(mode);
161 }
162 else if (name.equals(RANK_PARAM))
163 {
164 if (value.equals(RANK_PARAM_RANK))
165 {
166 mgpp_src.setSortByRank(true);
167 }
168 else if (value.equals(RANK_PARAM_NONE))
169 {
170 mgpp_src.setSortByRank(false);
171 }
172 }
173 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
174 {
175 physical_sub_index_name = value;
176 }
177 else if (name.equals(INDEX_LANGUAGE_PARAM))
178 {
179 physical_index_language_name = value;
180 } // ignore any others
181 }
182
183 if (physical_index_name.equals("idx"))
184 {
185 if (physical_sub_index_name != null)
186 {
187 current_physical_index_name += physical_sub_index_name;
188 }
189 if (physical_index_language_name != null)
190 {
191 current_physical_index_name += physical_index_language_name;
192 }
193 }
194
195 // set up mgpp_src
196 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, current_physical_index_name);
197 mgpp_src.loadIndexData(indexdir);
198
199 return true;
200 }
201
202 protected Object runQuery(String query)
203 {
204 mgpp_src.runQuery(query);
205 MGPPQueryResult mqr = mgpp_src.getQueryResult();
206 return mqr;
207
208 }
209
210 protected long numDocsMatched(Object query_result)
211 {
212 return ((MGPPQueryResult) query_result).getTotalDocs();
213 }
214
215 protected String[] getDocIDs(Object query_result)
216 {
217
218 Vector docs = ((MGPPQueryResult) query_result).getDocs();
219 String[] doc_nums = new String[docs.size()];
220 for (int d = 0; d < docs.size(); d++)
221 {
222 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
223 }
224 return doc_nums;
225 }
226
227 protected String[] getDocRanks(Object query_result)
228 {
229
230 Vector docs = ((MGPPQueryResult) query_result).getDocs();
231 String[] doc_ranks = new String[docs.size()];
232 for (int d = 0; d < docs.size(); d++)
233 {
234 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
235 }
236 return doc_ranks;
237 }
238
239 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result)
240 {
241 Document doc = term_list.getOwnerDocument();
242 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
243
244 Vector terms = ((MGPPQueryResult) query_result).getTerms();
245 for (int t = 0; t < terms.size(); t++)
246 {
247 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
248
249 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
250 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
251 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
252 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
253 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
254 String field = term_info.tag_;
255 if (field.equals(query_level))
256 {
257 // ignore
258 field = "";
259 }
260 term_elem.setAttribute(FIELD_ATT, field);
261
262 Vector equiv_terms = term_info.equiv_terms_;
263 Element equiv_term_list = doc.createElement(EQUIV_TERM_ELEM + GSXML.LIST_MODIFIER);
264 term_elem.appendChild(equiv_term_list);
265
266 for (int et = 0; et < equiv_terms.size(); et++)
267 {
268 String equiv_term = (String) equiv_terms.get(et);
269
270 Element equiv_term_elem = doc.createElement(GSXML.TERM_ELEM);
271 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
272 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
273 equiv_term_elem.setAttribute(FREQ_ATT, "");
274 equiv_term_list.appendChild(equiv_term_elem);
275 }
276
277 term_list.appendChild(term_elem);
278 }
279 return true;
280 }
281
282 protected String addFieldInfo(String query, String field)
283 {
284 if (field.equals("") || field.equals("ZZ"))
285 {
286 return query;
287 }
288 return "[" + query + "]:" + field;
289 }
290
291 protected void addQueryElem(StringBuffer final_query, String query, String field, String combine)
292 {
293
294 String comb = "";
295 if (final_query.length() > 0)
296 {
297 comb = " " + combine + " ";
298 }
299 final_query.append(comb + addFieldInfo(query, field));
300 }
301
302 protected String addStemOptions(String query, String stem, String casef, String accent)
303 {
304 String mods = "#";
305 if (casef != null)
306 {
307 if (casef.equals("1"))
308 {
309 mods += "i";
310 }
311 else
312 {
313 mods += "c";
314 }
315 }
316 if (stem != null)
317 {
318 if (stem.equals("1"))
319 {
320 mods += "s";
321 }
322 else
323 {
324 mods += "u";
325 }
326 }
327 if (accent != null)
328 {
329 if (accent.equals("1"))
330 {
331 mods += "f";
332 }
333 else
334 {
335 mods += "a";
336 }
337 }
338
339 StringBuffer temp = new StringBuffer();
340 String[] terms = query.split(" ");
341 for (int i = 0; i < terms.length; i++)
342 {
343 String t = terms[i].trim();
344 // what is the TX bit about???
345 if (!t.equals("") && !t.equals("TX"))
346 {
347 temp.append(" " + t + mods);
348 }
349 }
350 return temp.toString();
351 }
352
353 protected ArrayList<FacetWrapper> getFacets(Object query_result)
354 {
355 return null;
356 }
357}
Note: See TracBrowser for help on using the repository browser.