source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 13993

Last change on this file since 13993 was 13993, checked in by qq6, 17 years ago

add language and subcollection parameters to index

  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java classes
31import java.util.Iterator;
32import java.util.Set;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.ArrayList;
36import java.util.Vector;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
45 */
46
47public class GS2MGPPSearch
48 extends AbstractGS2FieldSearch
49{
50 private MGPPWrapper mgpp_src=null;
51
52 private String physical_index_name = "idx";
53
54 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
55
56 /** constructor */
57 public GS2MGPPSearch()
58 {
59 this.mgpp_src = new MGPPWrapper();
60 }
61
62 public void cleanUp() {
63 super.cleanUp();
64 this.mgpp_src.unloadIndexData();
65 }
66
67 /** configure this service */
68 public boolean configure(Element info, Element extra_info)
69 {
70 if (!super.configure(info, extra_info)){
71 return false;
72 }
73
74 // the default level is also the level which gdbm is expecting
75 // this must not be overwritten
76 this.mgpp_src.setReturnLevel(this.default_gdbm_level);
77 // return term info
78 this.mgpp_src.setReturnTerms(true);
79 // set the default - this may be overwritten by query params
80 this.mgpp_src.setQueryLevel(this.default_level);
81 this.mgpp_src.setMaxNumeric(this.maxnumeric);
82
83 return true;
84 }
85
86 /** add in the mgpp specific params to TextQuery */
87 protected void addCustomQueryParams(Element param_list, String lang)
88 {
89 super.addCustomQueryParams(param_list, lang);
90 createParameter(RANK_PARAM, param_list, lang);
91 }
92
93 protected boolean setUpQueryer(HashMap params) {
94
95 // set up the query params
96 Set entries = params.entrySet();
97 Iterator i = entries.iterator();
98 String physical_sub_index_name=null;
99 String physical_index_language_name=null;
100 while (i.hasNext()) {
101 Map.Entry m = (Map.Entry)i.next();
102 String name = (String)m.getKey();
103 String value = (String)m.getValue();
104
105 if (name.equals(CASE_PARAM)) {
106 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
107 this.mgpp_src.setCase(val);
108 } else if (name.equals(STEM_PARAM)) {
109 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
110 this.mgpp_src.setStem(val);
111 } else if (name.equals(ACCENT_PARAM)) {
112 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
113 this.mgpp_src.setAccentFold(val);
114 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
115 int docs = Integer.parseInt(value);
116 this.mgpp_src.setMaxDocs(docs);
117 } else if (name.equals(LEVEL_PARAM)) {
118 this.mgpp_src.setQueryLevel(value);
119 } else if (name.equals(MATCH_PARAM)) {
120 int mode;
121 if (value.equals(MATCH_PARAM_ALL)) mode=1;
122 else mode=0;
123 this.mgpp_src.setMatchMode(mode);
124 } else if (name.equals(RANK_PARAM)) {
125 if (value.equals(RANK_PARAM_RANK)) {
126 this.mgpp_src.setSortByRank(true);
127 } else if (value.equals(RANK_PARAM_NONE)) {
128 this.mgpp_src.setSortByRank(false);
129 }
130 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
131 physical_sub_index_name=value;
132 }else if (name.equals(INDEX_LANGUAGE_PARAM)){
133 physical_index_language_name=value;
134 } // ignore any others
135 }
136
137 if (physical_index_name.equals("idx")){
138 if (physical_sub_index_name!=null) {
139 physical_index_name+=physical_sub_index_name;
140 }
141 if (physical_index_language_name!=null){
142 physical_index_name+=physical_index_language_name;
143 }
144 }
145
146 // set up mgpp_src
147 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name);
148 this.mgpp_src.loadIndexData(indexdir);
149
150 physical_index_name="idx";
151 return true;
152 }
153
154 protected Object runQuery(String query) {
155 this.mgpp_src.runQuery(query);
156 MGPPQueryResult mqr= this.mgpp_src.getQueryResult();
157 return mqr;
158
159 }
160
161 protected long numDocsMatched(Object query_result) {
162 return ((MGPPQueryResult)query_result).getTotalDocs();
163 }
164
165 protected String [] getDocIDs(Object query_result) {
166
167 Vector docs = ((MGPPQueryResult)query_result).getDocs();
168 String [] doc_nums = new String [docs.size()];
169 for (int d = 0; d < docs.size(); d++) {
170 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
171 }
172 return doc_nums;
173 }
174
175 protected String [] getDocRanks(Object query_result) {
176
177 Vector docs = ((MGPPQueryResult)query_result).getDocs();
178 String [] doc_ranks = new String [docs.size()];
179 for (int d = 0; d < docs.size(); d++) {
180 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
181 }
182 return doc_ranks;
183 }
184
185 protected boolean addTermInfo(Element term_list, HashMap params,
186 Object query_result) {
187
188 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
189
190 Vector terms = ((MGPPQueryResult)query_result).getTerms();
191 for (int t = 0; t < terms.size(); t++) {
192 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
193
194 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
195 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
196 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
197 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
198 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
199 String field = term_info.tag_;
200 if (field.equals(query_level)) {
201 // ignore
202 field = "";
203 }
204 term_elem.setAttribute(FIELD_ATT, field);
205
206 Vector equiv_terms = term_info.equiv_terms_;
207 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
208 term_elem.appendChild(equiv_term_list);
209
210 for (int et = 0; et < equiv_terms.size(); et++) {
211 String equiv_term = (String) equiv_terms.get(et);
212
213 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
214 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
215 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
216 equiv_term_elem.setAttribute(FREQ_ATT, "");
217 equiv_term_list.appendChild(equiv_term_elem);
218 }
219
220 term_list.appendChild(term_elem);
221 }
222 return true;
223 }
224
225
226 protected String addFieldInfo(String query, String field) {
227 if (field.equals("") || field.equals("ZZ")) {
228 return query;
229 }
230 return "["+query+"]:"+field;
231 }
232 protected void addQueryElem(StringBuffer final_query, String query,
233 String field, String combine) {
234
235 String comb="";
236 if (final_query.length()>0) {
237 comb = " "+combine+" ";
238 }
239 final_query.append(comb+addFieldInfo(query,field));
240 }
241
242 protected String addStemOptions(String query, String stem,
243 String casef, String accent) {
244 String mods = "#";
245 if (casef != null) {
246 if (casef.equals("1")) {
247 mods += "i";
248 } else {
249 mods += "c";
250 }
251 }
252 if (stem != null) {
253 if (stem.equals("1")) {
254 mods += "s";
255 } else {
256 mods+= "u";
257 }
258 }
259 if (accent != null) {
260 if (accent.equals("1")) {
261 mods += "f";
262 } else {
263 mods += "a";
264 }
265 }
266
267 StringBuffer temp = new StringBuffer();
268 String [] terms = query.split(" ");
269 for (int i=0; i<terms.length; i++) {
270 String t = terms[i].trim();
271 // what is the TX bit about???
272 if (!t.equals("") && !t.equals("TX")) {
273 temp.append(" "+t+mods);
274 }
275 }
276 return temp.toString();
277 }
278
279}
280
281
Note: See TracBrowser for help on using the repository browser.