source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 3800

Last change on this file since 3800 was 3800, checked in by mdewsnip, 21 years ago

Removed option to sort by rank/natural order, and added code to deal with term information and equivalent terms.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.3 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gdbm.*;
24import org.greenstone.gsdl3.util.*;
25
26// xml classes
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.Node;
30import org.w3c.dom.NodeList;
31import org.w3c.dom.Text;
32
33// general java classes
34import java.io.File;
35import java.util.HashMap;
36import java.util.Iterator;
37import java.util.Map;
38import java.util.Set;
39import java.util.Vector;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @version $Revision: 3800 $
45 */
46
47public class GS2MGSearch
48 extends ServiceRack {
49
50 // the services on offer
51 // these strings must match what is found in the properties file
52 private static final String TEXT_QUERY_SERVICE = "TextQuery";
53
54 // params used
55 private static final String INDEX_PARAM = "index";
56 private static final String CASE_PARAM = "case";
57 private static final String STEM_PARAM = "stem";
58 private static final String MATCH_PARAM = "matchMode";
59 private static final String MATCH_PARAM_ALL = "all";
60 private static final String MATCH_PARAM_SOME = "some";
61 private static final String RANK_PARAM = "sortBy";
62 private static final String RANK_PARAM_RANK = "rank";
63 private static final String RANK_PARAM_NONE = "natural";
64 private static final String MAXDOCS_PARAM = "maxDocs";
65 private static final String BOOLEAN_PARAM_ON = "1";
66 private static final String BOOLEAN_PARAM_OFF = "0";
67 private static final String QUERY_PARAM = "query";
68
69 // elements used in the config file that are specific to this class
70 private static final String DEFAULT_INDEX_ELEM = "defaultIndex";
71 private static final String INDEX_ELEM = "index";
72
73 private static final String EQUIV_TERM_ELEM = "equivTerm";
74
75 private static final String STEM_ATT = "stem";
76 private static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
77 private static final String FREQ_ATT = "freq";
78
79 private MGWrapper mg_src_ = null;
80 private GDBMWrapper gdbm_src_ = null;
81
82 private String default_index_ = null;
83
84 private Element config_info_ = null;
85
86
87 /** constructor */
88 public GS2MGSearch() {
89 mg_src_ = new MGWrapper();
90 gdbm_src_ = new GDBMWrapper();
91 }
92
93
94 /** configure this service */
95 public boolean configure(Element info)
96 {
97 System.out.println("configuring GS2MGSearch");
98 config_info_ = info;
99
100 // get the default index out of <defaultIndex> (buildConfig.xml)
101 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
102 if (def != null) {
103 default_index_ = def.getAttribute(GSXML.NAME_ATT);
104 }
105 if (default_index_ == null || default_index_.equals("")) {
106 System.err.println("Error: default index not specified!");
107 return false;
108 }
109
110 Element e = null;
111 // these entries should reflect the build config file - some services may not be available depending on how the colleciton was built.
112 // set up short_service_info_ - for now just has name and type
113 e = doc_.createElement(GSXML.SERVICE_ELEM);
114 e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
115 e.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
116 short_service_info_.appendChild(e);
117
118 // set up service_info_map_ - for now, just has the same elements as above
119 // should have full details about each service incl params lists etc.
120 e = doc_.createElement(GSXML.SERVICE_ELEM);
121 e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
122 e.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
123 Element param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
124 createTextQueryParamList(param_list, false, null);
125 e.appendChild(param_list);
126 service_info_map_.put(TEXT_QUERY_SERVICE, e);
127
128 // Open GDBM database for querying
129 String gdbm_db_file = GSFile.GDBMDatabaseFile(site_home_, cluster_name_);
130 if (gdbm_src_.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
131 return true;
132 }
133 else {
134 System.err.println("Error: Could not open gdbm database!");
135 return false;
136 }
137 }
138
139
140 /** creates a new param element and adds it to the param list */
141 protected void createParameter(String name, Element param_list, boolean display, String lang)
142 {
143 Element param=null;
144
145 if (name.equals(INDEX_PARAM)) {
146 // the index info - read from config file
147 Element index_list = (Element)GSXML.getChildByTagName(config_info_, INDEX_ELEM+GSXML.LIST_MODIFIER);
148 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
149 int len = indexes.getLength();
150 if (len > 1) { // add index param to list only if more than one index specified
151 String [] inds = new String[len];
152 for (int i=0; i<len; i++) {
153 inds[i] = ((Element)indexes.item(i)).getAttribute(GSXML.NAME_ATT);
154 }
155 if (display) {
156 // use the same index names for now - should get these out of the config info
157 param = GSXML.createParameterDisplay(doc_, INDEX_PARAM, getTextString("param."+INDEX_PARAM, lang), inds, inds);
158 } else {
159 param = GSXML.createParameter(doc_, INDEX_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, default_index_, inds);
160 }
161 }
162 }
163 else if (name.equals(CASE_PARAM)) {
164 if (display) {
165 String[] bool_ops = {"0", "1"};
166 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
167 param = GSXML.createParameterDisplay(doc_, CASE_PARAM, getTextString("param."+CASE_PARAM, lang), bool_ops, bool_texts);
168 } else {
169 param = GSXML.createParameter(doc_, CASE_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
170 }
171 }
172 else if (name.equals(STEM_PARAM)) {
173 if (display) {
174 String[] bool_ops = {"0", "1"};
175 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
176 param = GSXML.createParameterDisplay(doc_, STEM_PARAM, getTextString("param."+STEM_PARAM, lang), bool_ops, bool_texts);
177 } else {
178 param = GSXML.createParameter(doc_, STEM_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
179 }
180 }
181 else if (name.equals(MATCH_PARAM)) {
182 String[] vals = {MATCH_PARAM_ALL, MATCH_PARAM_SOME};
183 if (display) {
184 String[] val_texts = {getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_ALL, lang),getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_SOME, lang)};
185
186 param = GSXML.createParameterDisplay(doc_, MATCH_PARAM, getTextString("param."+MATCH_PARAM, lang), vals, val_texts);
187 } else {
188 param = GSXML.createParameter(doc_, MATCH_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, MATCH_PARAM_ALL, vals);
189 }
190 }
191 else if (name.equals(MAXDOCS_PARAM)) {
192 if (display) {
193 param = GSXML.createParameterDisplay(doc_, MAXDOCS_PARAM, getTextString("param."+MAXDOCS_PARAM, lang), null, null);
194 } else {
195 param = GSXML.createParameter(doc_, MAXDOCS_PARAM, GSXML.PARAM_TYPE_INTEGER, "10", null);
196 }
197 }
198 else if (name.equals(QUERY_PARAM)) {
199 if (display) {
200 param = GSXML.createParameterDisplay(doc_, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), null, null);
201 } else {
202 param = GSXML.createParameter(doc_, QUERY_PARAM, GSXML.PARAM_TYPE_STRING, null, null);
203 }
204 }
205
206 // add the param to the list
207 if (param != null) {
208 param_list.appendChild(param);
209 }
210 }
211
212
213 /** this creates all teh params and appends them to param_list.
214 * if display=true it creates the text strings version
215 * otherwise it creates the description version
216 */
217 protected boolean createTextQueryParamList(Element param_list, boolean display, String lang)
218 {
219 // the order they are specified here is the order they appear on
220 // the query form
221 createParameter(INDEX_PARAM, param_list, display, lang);
222 createParameter(CASE_PARAM, param_list, display, lang);
223 createParameter(STEM_PARAM, param_list, display, lang);
224 createParameter(MATCH_PARAM, param_list, display, lang);
225 createParameter(MAXDOCS_PARAM, param_list, display, lang);
226 createParameter(QUERY_PARAM, param_list, display, lang);
227 return true;
228 }
229
230
231 /** creates a display element containing all the text strings needed to display the service page, in the language specified */
232 protected Element createServiceDisplay(String service, String lang) {
233 Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
234 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM,
235 getTextString(service+".name", lang)));
236 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_SUBMIT_ELEM,
237 getTextString(service+".submit", lang)));
238
239 // now need to add in the params
240 if (service.equals(TEXT_QUERY_SERVICE)) {
241 createTextQueryParamList(display, true, lang);
242 }
243
244 return display;
245 }
246
247
248 /** process a text query */
249 protected Element processTextQuery(Element request)
250 {
251 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
252 String from = GSPath.appendLink(cluster_name_, TEXT_QUERY_SERVICE);
253 result.setAttribute(GSXML.FROM_ATT, from);
254 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_QUERY);
255
256 // get param list
257 Element param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
258 if (param_list==null) {
259 System.err.println("GS2MGSearch, TextQuery Error: no param list in request!");
260 return result; // empty result
261 }
262 HashMap params = GSXML.extractParams(param_list);
263 String query = (String)params.get(QUERY_PARAM);
264 if (query == null) {
265 // no query, no result
266 return result;
267 }
268
269 String index = (String) params.get(INDEX_PARAM);
270 if (index == null) { // if it is not present, use the default index
271 index = default_index_;
272 }
273
274 // now set up the mg stuff
275 String basedir = GSFile.collectionBaseDir(site_home_, cluster_name_) +
276 File.separatorChar; // Needed for MG
277 String textdir = GSFile.collectionTextPath(cluster_name_);
278 String indexpath = GSFile.collectionIndexPath(cluster_name_, index);
279
280 // set the mg query parameters to the values the user has specified
281 setStandardQueryParams(params);
282 mg_src_.setIndex(indexpath);
283
284 System.out.println("GS2MGSearch, query string: " + query);
285 mg_src_.runQuery(basedir + File.separatorChar, textdir, query);
286 MGQueryResult mqr = mg_src_.getQueryResult();
287 long totalDocs = mqr.getTotalDocs();
288 // System.out.println("Matching documents: " + totalDocs);
289
290 // get the docnums out, and convert to HASH ids
291 Vector docs = mqr.getDocs();
292 if (docs.size() == 0) {
293 // no docs found
294 System.out.println("No results found...\n");
295 }
296
297 // Create a metadata list to store information about the query results
298 Element metadata_list = GSXML.addMetaList(doc_, result);
299
300 // Add a metadata element specifying the number of matching documents
301 Element num_matches_elem = doc_.createElement(GSXML.METADATA_ELEM);
302 num_matches_elem.setAttribute(GSXML.NAME_ATT, "numDocsMatched");
303 num_matches_elem.setAttribute(GSXML.VALUE_ATT, "" + totalDocs);
304 metadata_list.appendChild(num_matches_elem);
305
306 // Response content: documents and terms
307 Element c = doc_.createElement(GSXML.CONTENT_ELEM);
308 result.appendChild(c);
309
310 Element document_list = doc_.createElement(GSXML.DOCUMENT_ELEM+GSXML.LIST_MODIFIER);
311 c.appendChild(document_list);
312 // add each document
313 for (int d = 0; d < docs.size(); d++) {
314 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
315 String id = gdbm_src_.docnum2Oid(docnum);
316 System.out.println("Docnum: " + docnum + " ID: " + id);
317 Node no = GSXML.createDocumentElement(doc_, id);
318 document_list.appendChild(no);
319 }
320
321 Element term_list = doc_.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
322 c.appendChild(term_list);
323 // get the terms out, and add to query result metadata
324 Vector terms = mqr.getTerms();
325 for (int t = 0; t < terms.size(); t++) {
326 MGTermInfo term_info = (MGTermInfo) terms.get(t);
327
328 String term = term_info.term_;
329 int stem_method = term_info.stem_method_;
330 Vector equiv_terms = term_info.equiv_terms_;
331
332 Element term_elem = doc_.createElement(GSXML.TERM_ELEM);
333 term_elem.setAttribute(GSXML.NAME_ATT, term);
334 term_elem.setAttribute(STEM_ATT, "" + stem_method);
335
336 Element equiv_term_list = doc_.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
337 term_elem.appendChild(equiv_term_list);
338
339 long total_term_freq = 0;
340 for (int et = 0; et < equiv_terms.size(); et++) {
341 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
342
343 Element equiv_term_elem = doc_.createElement(GSXML.TERM_ELEM);
344 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
345 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
346 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
347 equiv_term_list.appendChild(equiv_term_elem);
348
349 total_term_freq += equiv_term_info.term_freq_;
350 }
351
352 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
353 term_list.appendChild(term_elem);
354 }
355
356 return result;
357 }
358
359
360 // should probably use a list rather than map
361 protected boolean setStandardQueryParams(HashMap params)
362 {
363 // set the default ones
364 mg_src_.setReturnTerms(true);
365 Set entries = params.entrySet();
366 Iterator i = entries.iterator();
367 while (i.hasNext()) {
368 Map.Entry m = (Map.Entry)i.next();
369 String name = (String)m.getKey();
370 String value = (String)m.getValue();
371
372 if (name.equals(CASE_PARAM)) {
373 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
374 mg_src_.setCase(val);
375 }
376 else if (name.equals(STEM_PARAM)) {
377 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
378 mg_src_.setStem(val);
379 }
380 else if (name.equals(MATCH_PARAM)) {
381 int mode;
382 if (value.equals(MATCH_PARAM_ALL)) mode = 1;
383 else mode = 0;
384 mg_src_.setMatchMode(mode);
385 }
386 else if (name.equals(MAXDOCS_PARAM)) {
387 int docs = Integer.parseInt(value);
388 mg_src_.setMaxDocs(docs);
389 } // ignore any others
390 }
391 return true;
392 }
393}
Note: See TracBrowser for help on using the repository browser.