source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 29318

Last change on this file since 29318 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import java.io.File;
22import java.io.Serializable;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.Map;
27import java.util.Set;
28import java.util.Vector;
29
30import org.apache.log4j.Logger;
31import org.greenstone.gsdl3.util.FacetWrapper;
32import org.greenstone.gsdl3.util.GSFile;
33import org.greenstone.gsdl3.util.GSXML;
34import org.greenstone.gsdl3.util.XMLConverter;
35
36import org.greenstone.mgpp.MGPPDocInfo;
37import org.greenstone.mgpp.MGPPQueryResult;
38import org.greenstone.mgpp.MGPPSearchWrapper;
39import org.greenstone.mgpp.MGPPTermInfo;
40
41import org.w3c.dom.Document;
42import org.w3c.dom.Element;
43
44public class GS2MGPPSearch extends AbstractGS2FieldSearch
45{
46 private static MGPPSearchWrapper mgpp_src = null;
47
48 private String physical_index_name = "idx";
49
50 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
51
52 /** constructor */
53 public GS2MGPPSearch()
54 {
55 if (mgpp_src == null)
56 {
57 mgpp_src = new MGPPSearchWrapper();
58 }
59 }
60
61 public void cleanUp()
62 {
63 super.cleanUp();
64 mgpp_src.unloadIndexData();
65 }
66
67 /** process a query */
68 protected Element processAnyQuery(Element request, int query_type)
69 {
70 synchronized (mgpp_src)
71 {
72 return super.processAnyQuery(request, query_type);
73 }
74 }
75
76 /** configure this service */
77 public boolean configure(Element info, Element extra_info)
78 {
79 if (!super.configure(info, extra_info))
80 {
81 return false;
82 }
83
84 // set up the defaults which are not dependent on query parameters
85 // the default level is also the level which the database is expecting
86 // this must not be overwritten
87 mgpp_src.setReturnLevel(this.default_db_level);
88 // return term info
89 mgpp_src.setReturnTerms(true);
90 mgpp_src.setMaxNumeric(this.maxnumeric);
91 return true;
92 }
93
94 /** add in the mgpp specific params to TextQuery */
95 protected void addCustomQueryParams(Element param_list, String lang)
96 {
97 super.addCustomQueryParams(param_list, lang);
98 createParameter(RANK_PARAM, param_list, lang);
99 }
100
101 protected boolean setUpQueryer(HashMap<String, Serializable> params)
102 {
103
104 // set up the defaults that may be changed by query params
105 mgpp_src.setQueryLevel(this.default_level);
106 // we have case folding on by default
107 if (this.does_case) {
108 mgpp_src.setCase(paramDefaults.get(CASE_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
109 }
110 if (this.does_stem) {
111 mgpp_src.setStem(paramDefaults.get(STEM_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
112 }
113 if (this.does_accent) {
114 mgpp_src.setAccentFold(paramDefaults.get(ACCENT_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
115 }
116 // set up the query params
117 Set entries = params.entrySet();
118 Iterator i = entries.iterator();
119 String current_physical_index_name = this.physical_index_name;
120 String physical_sub_index_name = this.default_index_subcollection;
121 String physical_index_language_name = this.default_index_language;
122 while (i.hasNext())
123 {
124 Map.Entry m = (Map.Entry) i.next();
125 String name = (String) m.getKey();
126 String value = (String) m.getValue();
127
128 if (name.equals(CASE_PARAM) && this.does_case)
129 {
130 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
131 mgpp_src.setCase(val);
132 }
133 else if (name.equals(STEM_PARAM) && this.does_stem)
134 {
135 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
136 mgpp_src.setStem(val);
137 }
138 else if (name.equals(ACCENT_PARAM) && this.does_accent)
139 {
140 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
141 mgpp_src.setAccentFold(val);
142 }
143 else if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
144 {
145 int docs = Integer.parseInt(value);
146 mgpp_src.setMaxDocs(docs);
147 }
148 else if (name.equals(LEVEL_PARAM))
149 {
150 mgpp_src.setQueryLevel(value);
151 }
152 else if (name.equals(MATCH_PARAM))
153 {
154 int mode;
155 if (value.equals(MATCH_PARAM_ALL))
156 mode = 1;
157 else
158 mode = 0;
159 mgpp_src.setMatchMode(mode);
160 }
161 else if (name.equals(RANK_PARAM))
162 {
163 if (value.equals(RANK_PARAM_RANK))
164 {
165 mgpp_src.setSortByRank(true);
166 }
167 else if (value.equals(RANK_PARAM_NONE))
168 {
169 mgpp_src.setSortByRank(false);
170 }
171 }
172 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
173 {
174 physical_sub_index_name = value;
175 }
176 else if (name.equals(INDEX_LANGUAGE_PARAM))
177 {
178 physical_index_language_name = value;
179 } // ignore any others
180 }
181
182 if (physical_index_name.equals("idx"))
183 {
184 if (physical_sub_index_name != null)
185 {
186 current_physical_index_name += physical_sub_index_name;
187 }
188 if (physical_index_language_name != null)
189 {
190 current_physical_index_name += physical_index_language_name;
191 }
192 }
193
194 // set up mgpp_src
195 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, current_physical_index_name);
196 mgpp_src.loadIndexData(indexdir);
197
198 return true;
199 }
200
201 protected Object runQuery(String query)
202 {
203 mgpp_src.runQuery(query);
204 MGPPQueryResult mqr = mgpp_src.getQueryResult();
205 return mqr;
206
207 }
208
209 protected long numDocsMatched(Object query_result)
210 {
211 return ((MGPPQueryResult) query_result).getTotalDocs();
212 }
213
214 protected String[] getDocIDs(Object query_result)
215 {
216
217 Vector docs = ((MGPPQueryResult) query_result).getDocs();
218 String[] doc_nums = new String[docs.size()];
219 for (int d = 0; d < docs.size(); d++)
220 {
221 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
222 }
223 return doc_nums;
224 }
225
226 protected String[] getDocRanks(Object query_result)
227 {
228
229 Vector docs = ((MGPPQueryResult) query_result).getDocs();
230 String[] doc_ranks = new String[docs.size()];
231 for (int d = 0; d < docs.size(); d++)
232 {
233 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
234 }
235 return doc_ranks;
236 }
237
238 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result)
239 {
240 Document doc = term_list.getOwnerDocument();
241 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
242
243 Vector terms = ((MGPPQueryResult) query_result).getTerms();
244 for (int t = 0; t < terms.size(); t++)
245 {
246 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
247
248 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
249 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
250 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
251 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
252 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
253 String field = term_info.tag_;
254 if (field.equals(query_level))
255 {
256 // ignore
257 field = "";
258 }
259 term_elem.setAttribute(FIELD_ATT, field);
260
261 Vector equiv_terms = term_info.equiv_terms_;
262 Element equiv_term_list = doc.createElement(EQUIV_TERM_ELEM + GSXML.LIST_MODIFIER);
263 term_elem.appendChild(equiv_term_list);
264
265 for (int et = 0; et < equiv_terms.size(); et++)
266 {
267 String equiv_term = (String) equiv_terms.get(et);
268
269 Element equiv_term_elem = doc.createElement(GSXML.TERM_ELEM);
270 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
271 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
272 equiv_term_elem.setAttribute(FREQ_ATT, "");
273 equiv_term_list.appendChild(equiv_term_elem);
274 }
275
276 term_list.appendChild(term_elem);
277 }
278 return true;
279 }
280
281 protected String addFieldInfo(String query, String field)
282 {
283 if (field.equals("") || field.equals("ZZ"))
284 {
285 return query;
286 }
287 return "[" + query + "]:" + field;
288 }
289
290 protected void addQueryElem(StringBuffer final_query, String query, String field, String combine)
291 {
292
293 String comb = "";
294 if (final_query.length() > 0)
295 {
296 comb = " " + combine + " ";
297 }
298 final_query.append(comb + addFieldInfo(query, field));
299 }
300
301 protected String addStemOptions(String query, String stem, String casef, String accent)
302 {
303 String mods = "#";
304 if (casef != null)
305 {
306 if (casef.equals("1"))
307 {
308 mods += "i";
309 }
310 else
311 {
312 mods += "c";
313 }
314 }
315 if (stem != null)
316 {
317 if (stem.equals("1"))
318 {
319 mods += "s";
320 }
321 else
322 {
323 mods += "u";
324 }
325 }
326 if (accent != null)
327 {
328 if (accent.equals("1"))
329 {
330 mods += "f";
331 }
332 else
333 {
334 mods += "a";
335 }
336 }
337
338 StringBuffer temp = new StringBuffer();
339 String[] terms = query.split(" ");
340 for (int i = 0; i < terms.length; i++)
341 {
342 String t = terms[i].trim();
343 // what is the TX bit about???
344 if (!t.equals("") && !t.equals("TX"))
345 {
346 temp.append(" " + t + mods);
347 }
348 }
349 return temp.toString();
350 }
351
352 protected ArrayList<FacetWrapper> getFacets(Object query_result)
353 {
354 return null;
355 }
356}
Note: See TracBrowser for help on using the repository browser.