source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 3770

Last change on this file since 3770 was 3770, checked in by mdewsnip, 21 years ago

Changed two string values to private static constants.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gdbm.*;
24import org.greenstone.gsdl3.util.*;
25
26// xml classes
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.Node;
30import org.w3c.dom.NodeList;
31import org.w3c.dom.Text;
32
33// general java classes
34import java.io.File;
35import java.util.HashMap;
36import java.util.Iterator;
37import java.util.Map;
38import java.util.Set;
39import java.util.Vector;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @version $Revision: 3770 $
45 */
46
47public class GS2MGSearch
48 extends ServiceRack {
49
50 // the services on offer
51 // these strings must match what is found in the properties file
52 private static final String TEXT_QUERY_SERVICE = "TextQuery";
53
54 // params used
55 private static final String INDEX_PARAM = "index";
56 private static final String CASE_PARAM = "case";
57 private static final String STEM_PARAM = "stem";
58 private static final String MATCH_PARAM = "matchMode";
59 private static final String MATCH_PARAM_ALL = "all";
60 private static final String MATCH_PARAM_SOME = "some";
61 private static final String RANK_PARAM = "sortBy";
62 private static final String RANK_PARAM_RANK = "rank";
63 private static final String RANK_PARAM_NONE = "natural";
64 private static final String MAXDOCS_PARAM = "maxDocs";
65 private static final String BOOLEAN_PARAM_ON = "1";
66 private static final String BOOLEAN_PARAM_OFF = "0";
67 private static final String QUERY_PARAM = "query";
68
69 // elements used in the config file that are specific to this class
70 private static final String DEFAULT_INDEX_ELEM = "defaultIndex";
71 private static final String INDEX_ELEM = "index";
72
73 public static final String COUNT_ATT = "count";
74 public static final String STEM_ATT = "stem";
75
76 private MGWrapper mg_src_ = null;
77 private GDBMWrapper gdbm_src_ = null;
78
79 private String default_index_ = null;
80
81 private Element config_info_ = null;
82
83
84 /** constructor */
85 public GS2MGSearch() {
86 mg_src_ = new MGWrapper();
87 gdbm_src_ = new GDBMWrapper();
88 }
89
90
91 /** configure this service */
92 public boolean configure(Element info)
93 {
94 System.out.println("configuring GS2MGSearch");
95 config_info_ = info;
96
97 // get the default index out of <defaultIndex> (buildConfig.xml)
98 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
99 if (def != null) {
100 default_index_ = def.getAttribute(GSXML.NAME_ATT);
101 }
102 if (default_index_ == null || default_index_.equals("")) {
103 System.err.println("Error: default index not specified!");
104 return false;
105 }
106
107 Element e = null;
108 // these entries should reflect the build config file - some services may not be available depending on how the colleciton was built.
109 // set up short_service_info_ - for now just has name and type
110 e = doc_.createElement(GSXML.SERVICE_ELEM);
111 e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
112 e.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
113 short_service_info_.appendChild(e);
114
115 // set up service_info_map_ - for now, just has the same elements as above
116 // should have full details about each service incl params lists etc.
117 e = doc_.createElement(GSXML.SERVICE_ELEM);
118 e.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
119 e.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
120 Element param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
121 createTextQueryParamList(param_list, false, null);
122 e.appendChild(param_list);
123 service_info_map_.put(TEXT_QUERY_SERVICE, e);
124
125 // Open GDBM database for querying
126 String gdbm_db_file = GSFile.GDBMDatabaseFile(site_home_, cluster_name_);
127 if (gdbm_src_.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
128 return true;
129 }
130 else {
131 System.err.println("Error: Could not open gdbm database!");
132 return false;
133 }
134 }
135
136
137 /** creates a new param element and adds it to the param list */
138 protected void createParameter(String name, Element param_list, boolean display, String lang)
139 {
140 Element param=null;
141
142 if (name.equals(INDEX_PARAM)) {
143 // the index info - read from config file
144 Element index_list = (Element)GSXML.getChildByTagName(config_info_, INDEX_ELEM+GSXML.LIST_MODIFIER);
145 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
146 int len = indexes.getLength();
147 if (len > 1) { // add index param to list only if more than one index specified
148 String [] inds = new String[len];
149 for (int i=0; i<len; i++) {
150 inds[i] = ((Element)indexes.item(i)).getAttribute(GSXML.NAME_ATT);
151 }
152 if (display) {
153 // use the same index names for now - should get these out of the config info
154 param = GSXML.createParameterDisplay(doc_, INDEX_PARAM, getTextString("param."+INDEX_PARAM, lang), inds, inds);
155 } else {
156 param = GSXML.createParameter(doc_, INDEX_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, default_index_, inds);
157 }
158 }
159 }
160 else if (name.equals(CASE_PARAM)) {
161 if (display) {
162 String[] bool_ops = {"0", "1"};
163 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
164 param = GSXML.createParameterDisplay(doc_, CASE_PARAM, getTextString("param."+CASE_PARAM, lang), bool_ops, bool_texts);
165 } else {
166 param = GSXML.createParameter(doc_, CASE_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
167 }
168 }
169 else if (name.equals(STEM_PARAM)) {
170 if (display) {
171 String[] bool_ops = {"0", "1"};
172 String[] bool_texts = {getTextString("param.boolean.off", lang),getTextString("param.boolean.on", lang)};
173 param = GSXML.createParameterDisplay(doc_, STEM_PARAM, getTextString("param."+STEM_PARAM, lang), bool_ops, bool_texts);
174 } else {
175 param = GSXML.createParameter(doc_, STEM_PARAM, GSXML.PARAM_TYPE_BOOLEAN, BOOLEAN_PARAM_ON, null);
176 }
177 }
178 else if (name.equals(MATCH_PARAM)) {
179 String[] vals = {MATCH_PARAM_ALL, MATCH_PARAM_SOME};
180 if (display) {
181 String[] val_texts = {getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_ALL, lang),getTextString("param."+MATCH_PARAM+"."+MATCH_PARAM_SOME, lang)};
182
183 param = GSXML.createParameterDisplay(doc_, MATCH_PARAM, getTextString("param."+MATCH_PARAM, lang), vals, val_texts);
184 } else {
185 param = GSXML.createParameter(doc_, MATCH_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, MATCH_PARAM_ALL, vals);
186 }
187 }
188 else if (name.equals(RANK_PARAM)) {
189 String[] vals1 = {RANK_PARAM_RANK, RANK_PARAM_NONE };
190 if (display) {
191 String[] vals1_texts = { getTextString("param."+RANK_PARAM+"."+RANK_PARAM_RANK, lang),
192 getTextString("param."+RANK_PARAM+"."+RANK_PARAM_NONE, lang)};
193
194 param = GSXML.createParameterDisplay(doc_, RANK_PARAM, getTextString("param."+RANK_PARAM, lang), vals1, vals1_texts);
195 } else {
196 param = GSXML.createParameter(doc_, RANK_PARAM, GSXML.PARAM_TYPE_ENUM_SINGLE, RANK_PARAM_RANK, vals1 );
197 }
198 }
199 else if (name.equals(MAXDOCS_PARAM)) {
200 if (display) {
201 param = GSXML.createParameterDisplay(doc_, MAXDOCS_PARAM, getTextString("param."+MAXDOCS_PARAM, lang), null, null);
202 } else {
203 param = GSXML.createParameter(doc_, MAXDOCS_PARAM, GSXML.PARAM_TYPE_INTEGER, "10", null);
204 }
205 }
206 else if (name.equals(QUERY_PARAM)) {
207 if (display) {
208 param = GSXML.createParameterDisplay(doc_, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), null, null);
209 } else {
210 param = GSXML.createParameter(doc_, QUERY_PARAM, GSXML.PARAM_TYPE_STRING, null, null);
211 }
212 }
213
214 // add the param to the list
215 if (param != null) {
216 param_list.appendChild(param);
217 }
218 }
219
220
221 /** this creates all teh params and appends them to param_list.
222 * if display=true it creates the text strings version
223 * otherwise it creates the description version
224 */
225 protected boolean createTextQueryParamList(Element param_list, boolean display, String lang)
226 {
227 // the order they are specified here is the order they appear on
228 // the query form
229 createParameter(INDEX_PARAM, param_list, display, lang);
230 createParameter(CASE_PARAM, param_list, display, lang);
231 createParameter(STEM_PARAM, param_list, display, lang);
232 createParameter(MATCH_PARAM, param_list, display, lang);
233 createParameter(RANK_PARAM, param_list, display, lang);
234 createParameter(MAXDOCS_PARAM, param_list, display, lang);
235 createParameter(QUERY_PARAM, param_list, display, lang);
236 return true;
237 }
238
239
240 /** creates a display element containing all the text strings needed to display the service page, in the language specified */
241 protected Element createServiceDisplay(String service, String lang) {
242 Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
243 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM,
244 getTextString(service+".name", lang)));
245 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_SUBMIT_ELEM,
246 getTextString(service+".submit", lang)));
247
248 // now need to add in the params
249 if (service.equals(TEXT_QUERY_SERVICE)) {
250 createTextQueryParamList(display, true, lang);
251 }
252
253 return display;
254 }
255
256
257 /** process a text query */
258 protected Element processTextQuery(Element request)
259 {
260 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
261 String from = GSPath.appendLink(cluster_name_, TEXT_QUERY_SERVICE);
262 result.setAttribute(GSXML.FROM_ATT, from);
263 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_QUERY);
264 // result.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
265
266 // get param list
267 Element param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
268 if (param_list==null) {
269 System.err.println("GS2MGSearch, TextQuery Error: no param list in request!");
270 return result; // empty result
271 }
272 HashMap params = GSXML.extractParams(param_list);
273 String query = (String)params.get(QUERY_PARAM);
274 if (query == null) {
275 // no query, no result
276 return result;
277 }
278
279 String index = (String) params.get(INDEX_PARAM);
280 if (index == null) { // if it is not present, use the default index
281 index = default_index_;
282 }
283 // System.out.println("Index: " + index);
284
285 // now set up the mg stuff
286 String basedir = GSFile.collectionBaseDir(site_home_, cluster_name_) +
287 File.separatorChar; // Needed for MG
288 String textdir = GSFile.collectionTextPath(cluster_name_);
289 String indexpath = GSFile.collectionIndexPath(cluster_name_, index);
290
291 // set the mg query parameters to the values the user has specified
292 setStandardQueryParams(params);
293 mg_src_.setIndex(indexpath);
294
295 System.out.println("GS2MGSearch, query string: " + query);
296 mg_src_.runQuery(basedir + File.separatorChar, textdir, query);
297 MGQueryResult mqr = mg_src_.getQueryResult();
298 long totalDocs = mqr.getTotalDocs();
299 // System.out.println("Matching documents: " + totalDocs);
300
301 // get the docnums out, and convert to HASH ids
302 Vector docs = mqr.getDocs();
303 if (docs.size() == 0) {
304 // no docs found
305 System.out.println("No results found...\n");
306 }
307
308 // Create a metadata list to store information about the query results
309 Element metadata_list = GSXML.addMetaList(doc_, result);
310
311 // Add a metadata element specifying the number of matching documents
312 Element num_matches_elem = doc_.createElement(GSXML.METADATA_ELEM);
313 num_matches_elem.setAttribute(GSXML.NAME_ATT, "numDocsMatched");
314 num_matches_elem.setAttribute(GSXML.VALUE_ATT, "" + totalDocs);
315 metadata_list.appendChild(num_matches_elem);
316 // System.out.println("Metadata list: " + converter_.getString(metadata_list));
317
318 // Response content: documents and terms
319 Element c = doc_.createElement(GSXML.CONTENT_ELEM);
320 result.appendChild(c);
321
322 Element document_list = doc_.createElement(GSXML.DOCUMENT_ELEM+GSXML.LIST_MODIFIER);
323 c.appendChild(document_list);
324 // add each document
325 for (int d = 0; d < docs.size(); d++) {
326 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
327 String id = gdbm_src_.docnum2Oid(docnum);
328 System.out.println("Docnum: " + docnum + " ID: " + id);
329 Node no = GSXML.createDocumentElement(doc_, id);
330 document_list.appendChild(no);
331 }
332
333 Element term_list = doc_.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
334 c.appendChild(term_list);
335 // get the terms out, and add to query result metadata
336 Vector terms = mqr.getTerms();
337 for (int t = 0; t < terms.size(); t++) {
338 String term = ((MGTermInfo) terms.elementAt(t)).term_;
339 long term_freq = ((MGTermInfo) terms.elementAt(t)).term_freq_;
340 int stem_method = ((MGTermInfo) terms.elementAt(t)).stem_method_;
341
342 Element term_elem = doc_.createElement(GSXML.TERM_ELEM);
343 term_elem.setAttribute(GSXML.NAME_ATT, term);
344 term_elem.setAttribute(COUNT_ATT, "" + term_freq);
345 term_elem.setAttribute(STEM_ATT, "" + stem_method);
346 term_list.appendChild(term_elem);
347 }
348
349 return result;
350 }
351
352
353 // should probably use a list rather than map
354 protected boolean setStandardQueryParams(HashMap params)
355 {
356 // set the default ones
357 mg_src_.setReturnTerms(true);
358 Set entries = params.entrySet();
359 Iterator i = entries.iterator();
360 while (i.hasNext()) {
361 Map.Entry m = (Map.Entry)i.next();
362 String name = (String)m.getKey();
363 String value = (String)m.getValue();
364
365 if (name.equals(CASE_PARAM)) {
366 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
367 mg_src_.setCase(val);
368 }
369 else if (name.equals(STEM_PARAM)) {
370 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
371 mg_src_.setStem(val);
372 }
373 else if (name.equals(MATCH_PARAM)) {
374 int mode;
375 if (value.equals(MATCH_PARAM_ALL)) mode = 1;
376 else mode = 0;
377 mg_src_.setMatchMode(mode);
378 }
379 else if (name.equals(RANK_PARAM)) {
380 if (value.equals(RANK_PARAM_RANK)) {
381 mg_src_.setSortByRank(true);
382 } else if (value.equals(RANK_PARAM_NONE)) {
383 mg_src_.setSortByRank(false);
384 }
385 }
386 else if (name.equals(MAXDOCS_PARAM)) {
387 int docs = Integer.parseInt(value);
388 mg_src_.setMaxDocs(docs);
389 } // ignore any others
390 }
391 return true;
392 }
393}
Note: See TracBrowser for help on using the repository browser.