source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 20239

Last change on this file since 20239 was 20239, checked in by kjdon, 15 years ago

set casefolding to true by default

  • Property svn:keywords set to Author Date Id Revision
File size: 8.9 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.MGPPSearchWrapper;
23import org.greenstone.mgpp.MGPPTermInfo;
24import org.greenstone.mgpp.MGPPQueryResult;
25import org.greenstone.mgpp.MGPPDocInfo;
26
27import org.greenstone.gsdl3.util.GSFile;
28import org.greenstone.gsdl3.util.GSXML;
29
30
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// java classes
37import java.util.Iterator;
38import java.util.Set;
39import java.util.HashMap;
40import java.util.Map;
41import java.util.ArrayList;
42import java.util.Vector;
43import java.io.File;
44
45import org.apache.log4j.*;
46
47
48public class GS2MGPPSearch
49 extends AbstractGS2FieldSearch {
50 private static MGPPSearchWrapper mgpp_src=null;
51
52 private String physical_index_name = "idx";
53
54 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
55
56 /** constructor */
57 public GS2MGPPSearch() {
58 if(mgpp_src == null) {
59 mgpp_src = new MGPPSearchWrapper();
60 }
61 }
62
63 public void cleanUp() {
64 super.cleanUp();
65 mgpp_src.unloadIndexData();
66 }
67 /** process a query */
68 protected Element processAnyQuery(Element request, int query_type) {
69 synchronized (mgpp_src) {
70 return super.processAnyQuery(request, query_type);
71 }
72 }
73 /** configure this service */
74 public boolean configure(Element info, Element extra_info) {
75 if (!super.configure(info, extra_info)){
76 return false;
77 }
78
79 // set up the defaults which are not dependent on query parameters
80 // the default level is also the level which the database is expecting
81 // this must not be overwritten
82 mgpp_src.setReturnLevel(this.default_db_level);
83 // return term info
84 mgpp_src.setReturnTerms(true);
85 mgpp_src.setMaxNumeric(this.maxnumeric);
86 return true;
87 }
88
89 /** add in the mgpp specific params to TextQuery */
90 protected void addCustomQueryParams(Element param_list, String lang) {
91 super.addCustomQueryParams(param_list, lang);
92 createParameter(RANK_PARAM, param_list, lang);
93 }
94
95 protected boolean setUpQueryer(HashMap params) {
96
97 // set up the defaults that may be changed by query params
98 mgpp_src.setQueryLevel(this.default_level);
99 // we have case folding on by default
100 mgpp_src.setCase(true);
101
102 // set up the query params
103 Set entries = params.entrySet();
104 Iterator i = entries.iterator();
105 String physical_sub_index_name= this.default_index_subcollection;
106 String physical_index_language_name= this.default_index_language;
107 while (i.hasNext()) {
108 Map.Entry m = (Map.Entry)i.next();
109 String name = (String)m.getKey();
110 String value = (String)m.getValue();
111
112 if (name.equals(CASE_PARAM)) {
113 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
114 mgpp_src.setCase(val);
115 } else if (name.equals(STEM_PARAM)) {
116 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
117 mgpp_src.setStem(val);
118 } else if (name.equals(ACCENT_PARAM)) {
119 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
120 mgpp_src.setAccentFold(val);
121 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
122 int docs = Integer.parseInt(value);
123 mgpp_src.setMaxDocs(docs);
124 } else if (name.equals(LEVEL_PARAM)) {
125 mgpp_src.setQueryLevel(value);
126 } else if (name.equals(MATCH_PARAM)) {
127 int mode;
128 if (value.equals(MATCH_PARAM_ALL)) mode=1;
129 else mode=0;
130 mgpp_src.setMatchMode(mode);
131 } else if (name.equals(RANK_PARAM)) {
132 if (value.equals(RANK_PARAM_RANK)) {
133 mgpp_src.setSortByRank(true);
134 } else if (value.equals(RANK_PARAM_NONE)) {
135 mgpp_src.setSortByRank(false);
136 }
137 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
138 physical_sub_index_name=value;
139 }else if (name.equals(INDEX_LANGUAGE_PARAM)){
140 physical_index_language_name=value;
141 } // ignore any others
142 }
143
144 if (physical_index_name.equals("idx")){
145 if (physical_sub_index_name!=null) {
146 physical_index_name+=physical_sub_index_name;
147 }
148 if (physical_index_language_name!=null){
149 physical_index_name+=physical_index_language_name;
150 }
151 }
152
153 // set up mgpp_src
154 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name);
155 mgpp_src.loadIndexData(indexdir);
156
157 return true;
158 }
159
160 protected Object runQuery(String query) {
161 mgpp_src.runQuery(query);
162 MGPPQueryResult mqr= mgpp_src.getQueryResult();
163 return mqr;
164
165 }
166
167 protected long numDocsMatched(Object query_result) {
168 return ((MGPPQueryResult)query_result).getTotalDocs();
169 }
170
171 protected String [] getDocIDs(Object query_result) {
172
173 Vector docs = ((MGPPQueryResult)query_result).getDocs();
174 String [] doc_nums = new String [docs.size()];
175 for (int d = 0; d < docs.size(); d++) {
176 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
177 }
178 return doc_nums;
179 }
180
181 protected String [] getDocRanks(Object query_result) {
182
183 Vector docs = ((MGPPQueryResult)query_result).getDocs();
184 String [] doc_ranks = new String [docs.size()];
185 for (int d = 0; d < docs.size(); d++) {
186 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
187 }
188 return doc_ranks;
189 }
190
191 protected boolean addTermInfo(Element term_list, HashMap params,
192 Object query_result) {
193
194 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
195
196 Vector terms = ((MGPPQueryResult)query_result).getTerms();
197 for (int t = 0; t < terms.size(); t++) {
198 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
199
200 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
201 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
202 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
203 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
204 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
205 String field = term_info.tag_;
206 if (field.equals(query_level)) {
207 // ignore
208 field = "";
209 }
210 term_elem.setAttribute(FIELD_ATT, field);
211
212 Vector equiv_terms = term_info.equiv_terms_;
213 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
214 term_elem.appendChild(equiv_term_list);
215
216 for (int et = 0; et < equiv_terms.size(); et++) {
217 String equiv_term = (String) equiv_terms.get(et);
218
219 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
220 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
221 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
222 equiv_term_elem.setAttribute(FREQ_ATT, "");
223 equiv_term_list.appendChild(equiv_term_elem);
224 }
225
226 term_list.appendChild(term_elem);
227 }
228 return true;
229 }
230
231
232 protected String addFieldInfo(String query, String field) {
233 if (field.equals("") || field.equals("ZZ")) {
234 return query;
235 }
236 return "["+query+"]:"+field;
237 }
238 protected void addQueryElem(StringBuffer final_query, String query,
239 String field, String combine) {
240
241 String comb="";
242 if (final_query.length()>0) {
243 comb = " "+combine+" ";
244 }
245 final_query.append(comb+addFieldInfo(query,field));
246 }
247
248 protected String addStemOptions(String query, String stem,
249 String casef, String accent) {
250 String mods = "#";
251 if (casef != null) {
252 if (casef.equals("1")) {
253 mods += "i";
254 } else {
255 mods += "c";
256 }
257 }
258 if (stem != null) {
259 if (stem.equals("1")) {
260 mods += "s";
261 } else {
262 mods+= "u";
263 }
264 }
265 if (accent != null) {
266 if (accent.equals("1")) {
267 mods += "f";
268 } else {
269 mods += "a";
270 }
271 }
272
273 StringBuffer temp = new StringBuffer();
274 String [] terms = query.split(" ");
275 for (int i=0; i<terms.length; i++) {
276 String t = terms[i].trim();
277 // what is the TX bit about???
278 if (!t.equals("") && !t.equals("TX")) {
279 temp.append(" "+t+mods);
280 }
281 }
282 return temp.toString();
283 }
284
285}
286
287
Note: See TracBrowser for help on using the repository browser.