source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 30049

Last change on this file since 30049 was 30049, checked in by Georgiy Litvinov, 9 years ago

While using Solr field highlighted by Solr Servlet. Also added snippets to search results while using Solr.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import java.io.File;
22import java.io.Serializable;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.List;
27import java.util.Map;
28import java.util.Set;
29import java.util.Vector;
30
31import org.apache.log4j.Logger;
32import org.greenstone.gsdl3.util.FacetWrapper;
33import org.greenstone.gsdl3.util.GSFile;
34import org.greenstone.gsdl3.util.GSXML;
35import org.greenstone.gsdl3.util.XMLConverter;
36import org.greenstone.mgpp.MGPPDocInfo;
37import org.greenstone.mgpp.MGPPQueryResult;
38import org.greenstone.mgpp.MGPPSearchWrapper;
39import org.greenstone.mgpp.MGPPTermInfo;
40import org.w3c.dom.Document;
41import org.w3c.dom.Element;
42
43public class GS2MGPPSearch extends AbstractGS2FieldSearch
44{
45 private static MGPPSearchWrapper mgpp_src = null;
46
47 private String physical_index_name = "idx";
48
49 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
50
51 /** constructor */
52 public GS2MGPPSearch()
53 {
54 does_chunking = true;
55 if (mgpp_src == null)
56 {
57 mgpp_src = new MGPPSearchWrapper();
58 }
59 }
60
61 public void cleanUp()
62 {
63 super.cleanUp();
64 mgpp_src.unloadIndexData();
65 mgpp_src.reset(); // reset stored settings to defaults
66 }
67
68 /** process a query */
69 protected Element processAnyQuery(Element request, int query_type)
70 {
71 synchronized (mgpp_src)
72 {
73 return super.processAnyQuery(request, query_type);
74 }
75 }
76
77 /** configure this service */
78 public boolean configure(Element info, Element extra_info)
79 {
80 if (!super.configure(info, extra_info))
81 {
82 return false;
83 }
84
85 // set up the defaults which are not dependent on query parameters
86 // the default level is also the level which the database is expecting
87 // this must not be overwritten
88 mgpp_src.setReturnLevel(this.default_db_level);
89 // return term info
90 mgpp_src.setReturnTerms(true);
91 mgpp_src.setMaxNumeric(this.maxnumeric);
92 // mgpp internal default is 50, so set it here so the interface params agree
93 paramDefaults.put(MAXDOCS_PARAM, "50");
94 return true;
95 }
96
97 /** add in the mgpp specific params to TextQuery */
98 protected void addCustomQueryParams(Element param_list, String lang)
99 {
100 super.addCustomQueryParams(param_list, lang);
101 createParameter(RANK_PARAM, param_list, lang);
102 }
103
104 protected boolean setUpQueryer(HashMap<String, Serializable> params)
105 {
106
107 // set up the defaults that may be changed by query params
108 mgpp_src.setQueryLevel(this.default_level);
109 // we have case folding on by default
110 if (this.does_case) {
111 mgpp_src.setCase(paramDefaults.get(CASE_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
112 }
113 if (this.does_stem) {
114 mgpp_src.setStem(paramDefaults.get(STEM_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
115 }
116 if (this.does_accent) {
117 mgpp_src.setAccentFold(paramDefaults.get(ACCENT_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
118 }
119 // set up the query params
120 Set entries = params.entrySet();
121 Iterator i = entries.iterator();
122 String current_physical_index_name = this.physical_index_name;
123 String physical_sub_index_name = this.default_index_subcollection;
124 String physical_index_language_name = this.default_index_language;
125 while (i.hasNext())
126 {
127 Map.Entry m = (Map.Entry) i.next();
128 String name = (String) m.getKey();
129 String value = (String) m.getValue();
130
131 if (name.equals(CASE_PARAM) && this.does_case)
132 {
133 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
134 mgpp_src.setCase(val);
135 }
136 else if (name.equals(STEM_PARAM) && this.does_stem)
137 {
138 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
139 mgpp_src.setStem(val);
140 }
141 else if (name.equals(ACCENT_PARAM) && this.does_accent)
142 {
143 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
144 mgpp_src.setAccentFold(val);
145 }
146 else if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
147 {
148 int docs = Integer.parseInt(value);
149 mgpp_src.setMaxDocs(docs);
150 }
151 else if (name.equals(LEVEL_PARAM))
152 {
153 mgpp_src.setQueryLevel(value);
154 }
155 else if (name.equals(MATCH_PARAM))
156 {
157 int mode;
158 if (value.equals(MATCH_PARAM_ALL))
159 mode = 1;
160 else
161 mode = 0;
162 mgpp_src.setMatchMode(mode);
163 }
164 else if (name.equals(RANK_PARAM))
165 {
166 if (value.equals(RANK_PARAM_RANK))
167 {
168 mgpp_src.setSortByRank(true);
169 }
170 else if (value.equals(RANK_PARAM_NONE))
171 {
172 mgpp_src.setSortByRank(false);
173 }
174 }
175 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
176 {
177 physical_sub_index_name = value;
178 }
179 else if (name.equals(INDEX_LANGUAGE_PARAM))
180 {
181 physical_index_language_name = value;
182 } // ignore any others
183 }
184
185 if (physical_index_name.equals("idx"))
186 {
187 if (physical_sub_index_name != null)
188 {
189 current_physical_index_name += physical_sub_index_name;
190 }
191 if (physical_index_language_name != null)
192 {
193 current_physical_index_name += physical_index_language_name;
194 }
195 }
196
197 // set up mgpp_src
198 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, current_physical_index_name);
199 mgpp_src.loadIndexData(indexdir);
200
201 return true;
202 }
203
204 protected Object runQuery(String query)
205 {
206 mgpp_src.runQuery(query);
207 MGPPQueryResult mqr = mgpp_src.getQueryResult();
208 return mqr;
209
210 }
211
212 protected long numDocsMatched(Object query_result)
213 {
214 return ((MGPPQueryResult) query_result).getTotalDocs();
215 }
216
217 protected String[] getDocIDs(Object query_result)
218 {
219
220 Vector docs = ((MGPPQueryResult) query_result).getDocs();
221 String[] doc_nums = new String[docs.size()];
222 for (int d = 0; d < docs.size(); d++)
223 {
224 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
225 }
226 return doc_nums;
227 }
228
229 protected String[] getDocRanks(Object query_result)
230 {
231
232 Vector docs = ((MGPPQueryResult) query_result).getDocs();
233 String[] doc_ranks = new String[docs.size()];
234 for (int d = 0; d < docs.size(); d++)
235 {
236 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
237 }
238 return doc_ranks;
239 }
240
241 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result)
242 {
243 Document doc = term_list.getOwnerDocument();
244 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
245
246 Vector terms = ((MGPPQueryResult) query_result).getTerms();
247 for (int t = 0; t < terms.size(); t++)
248 {
249 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
250
251 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
252 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
253 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
254 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
255 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
256 String field = term_info.tag_;
257 if (field.equals(query_level))
258 {
259 // ignore
260 field = "";
261 }
262 term_elem.setAttribute(FIELD_ATT, field);
263
264 Vector equiv_terms = term_info.equiv_terms_;
265 Element equiv_term_list = doc.createElement(EQUIV_TERM_ELEM + GSXML.LIST_MODIFIER);
266 term_elem.appendChild(equiv_term_list);
267
268 for (int et = 0; et < equiv_terms.size(); et++)
269 {
270 String equiv_term = (String) equiv_terms.get(et);
271
272 Element equiv_term_elem = doc.createElement(GSXML.TERM_ELEM);
273 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
274 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
275 equiv_term_elem.setAttribute(FREQ_ATT, "");
276 equiv_term_list.appendChild(equiv_term_elem);
277 }
278
279 term_list.appendChild(term_elem);
280 }
281 return true;
282 }
283
284 protected String addFieldInfo(String query, String field)
285 {
286 if (field.equals("") || field.equals("ZZ"))
287 {
288 return query;
289 }
290 return "[" + query + "]:" + field;
291 }
292
293 protected void addQueryElem(StringBuffer final_query, String query, String field, String combine)
294 {
295
296 String comb = "";
297 if (final_query.length() > 0)
298 {
299 comb = " " + combine + " ";
300 }
301 final_query.append(comb + addFieldInfo(query, field));
302 }
303
304 protected String addStemOptions(String query, String stem, String casef, String accent)
305 {
306 String mods = "#";
307 if (casef != null)
308 {
309 if (casef.equals("1"))
310 {
311 mods += "i";
312 }
313 else
314 {
315 mods += "c";
316 }
317 }
318 if (stem != null)
319 {
320 if (stem.equals("1"))
321 {
322 mods += "s";
323 }
324 else
325 {
326 mods += "u";
327 }
328 }
329 if (accent != null)
330 {
331 if (accent.equals("1"))
332 {
333 mods += "f";
334 }
335 else
336 {
337 mods += "a";
338 }
339 }
340
341 StringBuffer temp = new StringBuffer();
342 String[] terms = query.split(" ");
343 for (int i = 0; i < terms.length; i++)
344 {
345 String t = terms[i].trim();
346 // what is the TX bit about???
347 if (!t.equals("") && !t.equals("TX"))
348 {
349 temp.append(" " + t + mods);
350 }
351 }
352 return temp.toString();
353 }
354
355 protected ArrayList<FacetWrapper> getFacets(Object query_result)
356 {
357 return null;
358 }
359
360 @Override
361 protected Map<String, Map<String, List<String>>> getHighlightSnippets(
362 Object query_result) {
363 // TODO Auto-generated method stub
364 return null;
365 }
366}
Note: See TracBrowser for help on using the repository browser.