source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 32619

Last change on this file since 32619 was 32619, checked in by ak19, 5 years ago

3 significant changes in 1 commit particularly impacting Lucene queries: 1. Instead if GS2LuceneSearch havinga GS2LuceneQuery object member variable for doing each and every search, each query now instantiates its own local GS2LuceneQuery object, configures it for that specific search, runs the search and then the GS2LuceneQuery object expires. This fixes a bug by preventing multiple concurrent searches getting the search configurations of other searches run at the same time. 2. Though GS2LuceneQuery objects need to be instantiated 1 per query over a collection, we don't want to keep reopening a collection's sidx and didx index folders with IndexReader objects for every query. Since IndexReaders support concurrent access, we'd like to use one IndexReader per collection index (one for didx, one for sidx) with the IndexReaders existing for the life of a collection. This meant moving the maintaining of IndexReader objects from GS2LuceneQuery into the GS2LuceneSearch service and turning them into singletons by using a HashMap to maintain index-dir, reader pairs. GS3 Services, e.g. GS2LuceneSearch, are loaded and unloaded on collection activate and deactivate respectively. On deactivate, cleanUp() is called on services and other GS3 modules. When GS2LuceneSearch.cleanUp() is called, we now finally close the singleton IndexReader objects/resources that a collection's GS2LuceneSearch object maintains. 3. Redid previous bugfix (then committed to GS2LuceneQuery): Point 2 again solves the filelocking problem of multiple handles to the index being opened and not all being closed on deactivate, but it's solved in a different and better/more optimal way than in the previous commit.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.0 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20// Greenstone classes
21import java.io.File;
22import java.io.Serializable;
23import java.util.ArrayList;
24import java.util.HashMap;
25import java.util.Iterator;
26import java.util.List;
27import java.util.Map;
28import java.util.Set;
29import java.util.Vector;
30
31import org.apache.log4j.Logger;
32import org.greenstone.gsdl3.util.FacetWrapper;
33import org.greenstone.gsdl3.util.GSFile;
34import org.greenstone.gsdl3.util.GSXML;
35import org.greenstone.gsdl3.util.XMLConverter;
36import org.greenstone.mgpp.MGPPDocInfo;
37import org.greenstone.mgpp.MGPPQueryResult;
38import org.greenstone.mgpp.MGPPSearchWrapper;
39import org.greenstone.mgpp.MGPPTermInfo;
40import org.w3c.dom.Document;
41import org.w3c.dom.Element;
42
43public class GS2MGPPSearch extends AbstractGS2FieldSearch
44{
45 private static MGPPSearchWrapper mgpp_src = null; // STATIC!
46
47 private String physical_index_name = "idx";
48
49 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
50
51 /** constructor */
52 public GS2MGPPSearch()
53 {
54 does_chunking = true;
55 if (mgpp_src == null)
56 {
57 mgpp_src = new MGPPSearchWrapper();
58 }
59 }
60
61 public void cleanUp()
62 {
63 super.cleanUp();
64 mgpp_src.unloadIndexData();
65 mgpp_src.reset(); // reset stored settings to defaults
66 }
67
68 /** process a query */
69 protected Element processAnyQuery(Element request, int query_type)
70 {
71 // don't know that the static (class variable) mgpp_src is "multi-threaded re-entrant" allowing multiple users
72 // to search the same index at the same time. So leave code as-is: to synchronize on mgpp_src when running query
73 synchronized (mgpp_src)
74 {
75 return super.processAnyQuery(request, query_type);
76 }
77 }
78
79 /** configure this service */
80 public boolean configure(Element info, Element extra_info)
81 {
82 if (!super.configure(info, extra_info))
83 {
84 return false;
85 }
86
87 // set up the defaults which are not dependent on query parameters
88 // the default level is also the level which the database is expecting
89 // this must not be overwritten
90 mgpp_src.setReturnLevel(this.default_db_level);
91 // return term info
92 mgpp_src.setReturnTerms(true);
93 mgpp_src.setMaxNumeric(this.maxnumeric);
94 // mgpp internal default is 50, so set it here so the interface params agree
95 paramDefaults.put(MAXDOCS_PARAM, "50");
96 return true;
97 }
98
99 /** add in the mgpp specific params to TextQuery */
100 protected void addCustomQueryParams(Element param_list, String lang)
101 {
102 super.addCustomQueryParams(param_list, lang);
103 createParameter(RANK_PARAM, param_list, lang);
104 }
105
106 protected Object setUpQueryer(HashMap<String, Serializable> params)
107 {
108
109 // set up the defaults that may be changed by query params
110 mgpp_src.setQueryLevel(this.default_level);
111 // we have case folding on by default
112 if (this.does_case) {
113 mgpp_src.setCase(paramDefaults.get(CASE_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
114 }
115 if (this.does_stem) {
116 mgpp_src.setStem(paramDefaults.get(STEM_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
117 }
118 if (this.does_accent) {
119 mgpp_src.setAccentFold(paramDefaults.get(ACCENT_PARAM).equals(BOOLEAN_PARAM_ON) ? true : false);
120 }
121 // set up the query params
122 Set entries = params.entrySet();
123 Iterator i = entries.iterator();
124 String current_physical_index_name = this.physical_index_name;
125 String physical_sub_index_name = this.default_index_subcollection;
126 String physical_index_language_name = this.default_index_language;
127 while (i.hasNext())
128 {
129 Map.Entry m = (Map.Entry) i.next();
130 String name = (String) m.getKey();
131 String value = (String) m.getValue();
132
133 if (name.equals(CASE_PARAM) && this.does_case)
134 {
135 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
136 mgpp_src.setCase(val);
137 }
138 else if (name.equals(STEM_PARAM) && this.does_stem)
139 {
140 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
141 mgpp_src.setStem(val);
142 }
143 else if (name.equals(ACCENT_PARAM) && this.does_accent)
144 {
145 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
146 mgpp_src.setAccentFold(val);
147 }
148 else if (name.equals(MAXDOCS_PARAM) && !value.equals(""))
149 {
150 int docs = Integer.parseInt(value);
151 mgpp_src.setMaxDocs(docs);
152 }
153 else if (name.equals(LEVEL_PARAM))
154 {
155 mgpp_src.setQueryLevel(value);
156 }
157 else if (name.equals(MATCH_PARAM))
158 {
159 int mode;
160 if (value.equals(MATCH_PARAM_ALL))
161 mode = 1;
162 else
163 mode = 0;
164 mgpp_src.setMatchMode(mode);
165 }
166 else if (name.equals(RANK_PARAM))
167 {
168 if (value.equals(RANK_PARAM_RANK))
169 {
170 mgpp_src.setSortByRank(true);
171 }
172 else if (value.equals(RANK_PARAM_NONE))
173 {
174 mgpp_src.setSortByRank(false);
175 }
176 }
177 else if (name.equals(INDEX_SUBCOLLECTION_PARAM))
178 {
179 physical_sub_index_name = value;
180 }
181 else if (name.equals(INDEX_LANGUAGE_PARAM))
182 {
183 physical_index_language_name = value;
184 } // ignore any others
185 }
186
187 if (physical_index_name.equals("idx"))
188 {
189 if (physical_sub_index_name != null)
190 {
191 current_physical_index_name += physical_sub_index_name;
192 }
193 if (physical_index_language_name != null)
194 {
195 current_physical_index_name += physical_index_language_name;
196 }
197 }
198
199 // set up mgpp_src
200 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, current_physical_index_name);
201 mgpp_src.loadIndexData(indexdir);
202
203 return mgpp_src; //return the query object
204 }
205
206 protected Object runQuery(Object queryObject, String query)
207 {
208 // queryObject is mgpp_src, so use mgpp_src reference directly:
209
210 mgpp_src.runQuery(query);
211 MGPPQueryResult mqr = mgpp_src.getQueryResult();
212 return mqr;
213
214 }
215
216 protected long numDocsMatched(Object query_result)
217 {
218 return ((MGPPQueryResult) query_result).getTotalDocs();
219 }
220
221 protected String[] getDocIDs(Object query_result)
222 {
223
224 Vector docs = ((MGPPQueryResult) query_result).getDocs();
225 String[] doc_nums = new String[docs.size()];
226 for (int d = 0; d < docs.size(); d++)
227 {
228 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
229 }
230 return doc_nums;
231 }
232
233 protected String[] getDocRanks(Object query_result)
234 {
235
236 Vector docs = ((MGPPQueryResult) query_result).getDocs();
237 String[] doc_ranks = new String[docs.size()];
238 for (int d = 0; d < docs.size(); d++)
239 {
240 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
241 }
242 return doc_ranks;
243 }
244
245 protected boolean addTermInfo(Element term_list, HashMap<String, Serializable> params, Object query_result)
246 {
247 Document doc = term_list.getOwnerDocument();
248 String query_level = (String) params.get(LEVEL_PARAM); // the current query level
249
250 Vector terms = ((MGPPQueryResult) query_result).getTerms();
251 for (int t = 0; t < terms.size(); t++)
252 {
253 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
254
255 Element term_elem = doc.createElement(GSXML.TERM_ELEM);
256 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
257 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
258 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
259 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
260 String field = term_info.tag_;
261 if (field.equals(query_level))
262 {
263 // ignore
264 field = "";
265 }
266 term_elem.setAttribute(FIELD_ATT, field);
267
268 Vector equiv_terms = term_info.equiv_terms_;
269 Element equiv_term_list = doc.createElement(EQUIV_TERM_ELEM + GSXML.LIST_MODIFIER);
270 term_elem.appendChild(equiv_term_list);
271
272 for (int et = 0; et < equiv_terms.size(); et++)
273 {
274 String equiv_term = (String) equiv_terms.get(et);
275
276 Element equiv_term_elem = doc.createElement(GSXML.TERM_ELEM);
277 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
278 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
279 equiv_term_elem.setAttribute(FREQ_ATT, "");
280 equiv_term_list.appendChild(equiv_term_elem);
281 }
282
283 term_list.appendChild(term_elem);
284 }
285 return true;
286 }
287
288 protected String addFieldInfo(String query, String field)
289 {
290 if (field.equals("") || field.equals("ZZ"))
291 {
292 return query;
293 }
294 return "[" + query + "]:" + field;
295 }
296
297 protected void addQueryElem(StringBuffer final_query, String query, String field, String combine)
298 {
299
300 String comb = "";
301 if (final_query.length() > 0)
302 {
303 comb = " " + combine + " ";
304 }
305 final_query.append(comb + addFieldInfo(query, field));
306 }
307
308 protected String addStemOptions(String query, String stem, String casef, String accent)
309 {
310 String mods = "#";
311 if (casef != null)
312 {
313 if (casef.equals("1"))
314 {
315 mods += "i";
316 }
317 else
318 {
319 mods += "c";
320 }
321 }
322 if (stem != null)
323 {
324 if (stem.equals("1"))
325 {
326 mods += "s";
327 }
328 else
329 {
330 mods += "u";
331 }
332 }
333 if (accent != null)
334 {
335 if (accent.equals("1"))
336 {
337 mods += "f";
338 }
339 else
340 {
341 mods += "a";
342 }
343 }
344
345 StringBuffer temp = new StringBuffer();
346 String[] terms = query.split(" ");
347 for (int i = 0; i < terms.length; i++)
348 {
349 String t = terms[i].trim();
350 // what is the TX bit about???
351 if (!t.equals("") && !t.equals("TX"))
352 {
353 temp.append(" " + t + mods);
354 }
355 }
356 return temp.toString();
357 }
358
359 protected ArrayList<FacetWrapper> getFacets(Object query_result, String lang)
360 {
361 return null;
362 }
363
364 @Override
365 protected Map<String, Map<String, List<String>>> getHighlightSnippets(
366 Object query_result) {
367 // TODO Auto-generated method stub
368 return null;
369 }
370}
Note: See TracBrowser for help on using the repository browser.