source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2MGPPSearch.java@ 15326

Last change on this file since 15326 was 15326, checked in by kjdon, 16 years ago

added support for JDBM (or other) in place of GDBM: use SimpleCollectionDatabase instead of GDBMWrapper. new Element in buildConfig file: databaseType, set to gdbm or jdbm. If not present, assume gdbm. Also may be some small style changes to some files

  • Property svn:keywords set to Author Date Id Revision
File size: 8.9 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.MGPPSearchWrapper;
23import org.greenstone.mgpp.MGPPTermInfo;
24import org.greenstone.mgpp.MGPPQueryResult;
25import org.greenstone.mgpp.MGPPDocInfo;
26
27import org.greenstone.gsdl3.util.GSFile;
28import org.greenstone.gsdl3.util.GSXML;
29
30
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// java classes
37import java.util.Iterator;
38import java.util.Set;
39import java.util.HashMap;
40import java.util.Map;
41import java.util.ArrayList;
42import java.util.Vector;
43import java.io.File;
44
45import org.apache.log4j.*;
46
47/**
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
51 */
52
53public class GS2MGPPSearch
54 extends AbstractGS2FieldSearch {
55 private static MGPPSearchWrapper mgpp_src=null;
56
57 private String physical_index_name = "idx";
58
59 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGPPSearch.class.getName());
60
61 /** constructor */
62 public GS2MGPPSearch() {
63 if(mgpp_src == null) {
64 mgpp_src = new MGPPSearchWrapper();
65 }
66 }
67
68 public void cleanUp() {
69 super.cleanUp();
70 mgpp_src.unloadIndexData();
71 }
72 /** process a query */
73 protected Element processAnyQuery(Element request, int query_type) {
74 synchronized (mgpp_src) {
75 return super.processAnyQuery(request, query_type);
76 }
77 }
78 /** configure this service */
79 public boolean configure(Element info, Element extra_info) {
80 if (!super.configure(info, extra_info)){
81 return false;
82 }
83
84 // the default level is also the level which the database is expecting
85 // this must not be overwritten
86 mgpp_src.setReturnLevel(this.default_db_level);
87 // return term info
88 mgpp_src.setReturnTerms(true);
89 // set the default - this may be overwritten by query params
90 mgpp_src.setQueryLevel(this.default_level);
91 mgpp_src.setMaxNumeric(this.maxnumeric);
92
93 return true;
94 }
95
96 /** add in the mgpp specific params to TextQuery */
97 protected void addCustomQueryParams(Element param_list, String lang) {
98 super.addCustomQueryParams(param_list, lang);
99 createParameter(RANK_PARAM, param_list, lang);
100 }
101
102 protected boolean setUpQueryer(HashMap params) {
103
104 // set up the query params
105 Set entries = params.entrySet();
106 Iterator i = entries.iterator();
107 String physical_sub_index_name= this.default_index_subcollection;
108 String physical_index_language_name= this.default_index_language;
109 while (i.hasNext()) {
110 Map.Entry m = (Map.Entry)i.next();
111 String name = (String)m.getKey();
112 String value = (String)m.getValue();
113
114 if (name.equals(CASE_PARAM)) {
115 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
116 mgpp_src.setCase(val);
117 } else if (name.equals(STEM_PARAM)) {
118 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
119 mgpp_src.setStem(val);
120 } else if (name.equals(ACCENT_PARAM)) {
121 boolean val = (value.equals(BOOLEAN_PARAM_ON)?true:false);
122 mgpp_src.setAccentFold(val);
123 } else if (name.equals(MAXDOCS_PARAM)&& !value.equals("")) {
124 int docs = Integer.parseInt(value);
125 mgpp_src.setMaxDocs(docs);
126 } else if (name.equals(LEVEL_PARAM)) {
127 mgpp_src.setQueryLevel(value);
128 } else if (name.equals(MATCH_PARAM)) {
129 int mode;
130 if (value.equals(MATCH_PARAM_ALL)) mode=1;
131 else mode=0;
132 mgpp_src.setMatchMode(mode);
133 } else if (name.equals(RANK_PARAM)) {
134 if (value.equals(RANK_PARAM_RANK)) {
135 mgpp_src.setSortByRank(true);
136 } else if (value.equals(RANK_PARAM_NONE)) {
137 mgpp_src.setSortByRank(false);
138 }
139 } else if (name.equals(INDEX_SUBCOLLECTION_PARAM)) {
140 physical_sub_index_name=value;
141 }else if (name.equals(INDEX_LANGUAGE_PARAM)){
142 physical_index_language_name=value;
143 } // ignore any others
144 }
145
146 if (physical_index_name.equals("idx")){
147 if (physical_sub_index_name!=null) {
148 physical_index_name+=physical_sub_index_name;
149 }
150 if (physical_index_language_name!=null){
151 physical_index_name+=physical_index_language_name;
152 }
153 }
154
155 // set up mgpp_src
156 String indexdir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar + GSFile.collectionIndexPath(this.index_stem, physical_index_name);
157 mgpp_src.loadIndexData(indexdir);
158
159 return true;
160 }
161
162 protected Object runQuery(String query) {
163 mgpp_src.runQuery(query);
164 MGPPQueryResult mqr= mgpp_src.getQueryResult();
165 return mqr;
166
167 }
168
169 protected long numDocsMatched(Object query_result) {
170 return ((MGPPQueryResult)query_result).getTotalDocs();
171 }
172
173 protected String [] getDocIDs(Object query_result) {
174
175 Vector docs = ((MGPPQueryResult)query_result).getDocs();
176 String [] doc_nums = new String [docs.size()];
177 for (int d = 0; d < docs.size(); d++) {
178 doc_nums[d] = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
179 }
180 return doc_nums;
181 }
182
183 protected String [] getDocRanks(Object query_result) {
184
185 Vector docs = ((MGPPQueryResult)query_result).getDocs();
186 String [] doc_ranks = new String [docs.size()];
187 for (int d = 0; d < docs.size(); d++) {
188 doc_ranks[d] = Float.toString(((MGPPDocInfo) docs.elementAt(d)).rank_);
189 }
190 return doc_ranks;
191 }
192
193 protected boolean addTermInfo(Element term_list, HashMap params,
194 Object query_result) {
195
196 String query_level = (String)params.get(LEVEL_PARAM); // the current query level
197
198 Vector terms = ((MGPPQueryResult)query_result).getTerms();
199 for (int t = 0; t < terms.size(); t++) {
200 MGPPTermInfo term_info = (MGPPTermInfo) terms.get(t);
201
202 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
203 term_elem.setAttribute(GSXML.NAME_ATT, term_info.term_);
204 term_elem.setAttribute(STEM_ATT, "" + term_info.stem_method_);
205 term_elem.setAttribute(FREQ_ATT, "" + term_info.term_freq_);
206 term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + term_info.match_docs_);
207 String field = term_info.tag_;
208 if (field.equals(query_level)) {
209 // ignore
210 field = "";
211 }
212 term_elem.setAttribute(FIELD_ATT, field);
213
214 Vector equiv_terms = term_info.equiv_terms_;
215 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
216 term_elem.appendChild(equiv_term_list);
217
218 for (int et = 0; et < equiv_terms.size(); et++) {
219 String equiv_term = (String) equiv_terms.get(et);
220
221 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
222 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term);
223 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "");
224 equiv_term_elem.setAttribute(FREQ_ATT, "");
225 equiv_term_list.appendChild(equiv_term_elem);
226 }
227
228 term_list.appendChild(term_elem);
229 }
230 return true;
231 }
232
233
234 protected String addFieldInfo(String query, String field) {
235 if (field.equals("") || field.equals("ZZ")) {
236 return query;
237 }
238 return "["+query+"]:"+field;
239 }
240 protected void addQueryElem(StringBuffer final_query, String query,
241 String field, String combine) {
242
243 String comb="";
244 if (final_query.length()>0) {
245 comb = " "+combine+" ";
246 }
247 final_query.append(comb+addFieldInfo(query,field));
248 }
249
250 protected String addStemOptions(String query, String stem,
251 String casef, String accent) {
252 String mods = "#";
253 if (casef != null) {
254 if (casef.equals("1")) {
255 mods += "i";
256 } else {
257 mods += "c";
258 }
259 }
260 if (stem != null) {
261 if (stem.equals("1")) {
262 mods += "s";
263 } else {
264 mods+= "u";
265 }
266 }
267 if (accent != null) {
268 if (accent.equals("1")) {
269 mods += "f";
270 } else {
271 mods += "a";
272 }
273 }
274
275 StringBuffer temp = new StringBuffer();
276 String [] terms = query.split(" ");
277 for (int i=0; i<terms.length; i++) {
278 String t = terms[i].trim();
279 // what is the TX bit about???
280 if (!t.equals("") && !t.equals("TX")) {
281 temp.append(" "+t+mods);
282 }
283 }
284 return temp.toString();
285 }
286
287}
288
289
Note: See TracBrowser for help on using the repository browser.