source: branches/ant-install-branch/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractMGSearch.java@ 9815

Last change on this file since 9815 was 9815, checked in by kjdon, 19 years ago

some methods from documentretrieve classes now throw GSExceptions. am trying to make it so that no Exceptions get to the user interface. returning a lot more error elements too, in the hope that they may be useful for other people

  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1/*
2 * AbstractMGSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39/** Partially implements a generic MG search service
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 */
43
44abstract public class AbstractMGSearch
45 extends AbstractSearch
46{
47 protected static final String CASE_PARAM = "case";
48 protected static final String STEM_PARAM = "stem";
49 protected static final String MATCH_PARAM = "matchMode";
50 protected static final String MATCH_PARAM_ALL = "all";
51 protected static final String MATCH_PARAM_SOME = "some";
52
53 protected static final String BOOLEAN_PARAM_ON = "1";
54 protected static final String BOOLEAN_PARAM_OFF = "0";
55
56 protected static final String EQUIV_TERM_ELEM = "equivTerm";
57
58 protected static final String STEM_ATT = "stem";
59 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
60 protected static final String FREQ_ATT = "freq";
61
62 // Elements used in the config file that are specific to this class
63 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
64 protected static final String INDEX_STEM_ELEM = "indexStem";
65 protected static final String INDEX_ELEM = "index";
66
67 /** the default index */
68 protected String default_index = null;
69 /** the stem used for the index files */
70 protected String index_stem = null;
71 protected MGWrapper mg_src = null;
72
73
74 public AbstractMGSearch()
75 {
76 this.mg_src = new MGWrapper();
77 }
78
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info)) {
82 return false;
83 }
84 // do we support any of the extended features?
85 does_chunking = true;
86
87 // Get the default index out of <defaultIndex> (buildConfig.xml)
88 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
89 if (def != null) {
90 this.default_index = def.getAttribute(GSXML.NAME_ATT);
91 }
92 if (this.default_index == null || this.default_index.equals("")) {
93 System.err.println("Error: default index not specified!");
94 return false;
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null) {
100 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
101 }
102 if (this.index_stem == null || this.index_stem.equals("")) {
103 System.err.println("AbstractMGSearch.configure(): indexStem element not found, stem will default to collection name");
104 this.index_stem = this.cluster_name;
105 }
106
107 // get display info from extra info
108 if (extra_info !=null) {
109 Document owner = info.getOwnerDocument();
110 // so far we have index specific display elements, and global format elements
111 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
112 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
113
114 for (int i=0; i<indexes.getLength();i++) {
115 Element ind = (Element)indexes.item(i);
116 String name = ind.getAttribute(GSXML.NAME_ATT);
117 Element node_extra = GSXML.getNamedElement(config_search,
118 GSXML.INDEX_ELEM,
119 GSXML.NAME_ATT,
120 name);
121 if (node_extra == null) {
122 System.err.println("GS2Search: haven't found extra info for index named "+name);
123 continue;
124 }
125
126 // get the display elements if any - displayName
127 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
128 if (display_names !=null) {
129 for (int j=0; j<display_names.getLength(); j++) {
130 Element e = (Element)display_names.item(j);
131 ind.appendChild(owner.importNode(e, true));
132 }
133 }
134 } // for each index
135 }
136 return true;
137 }
138
139 protected void addCustomQueryParams(Element param_list, String lang)
140 {
141 createParameter(CASE_PARAM, param_list, lang);
142 createParameter(STEM_PARAM, param_list, lang);
143 createParameter(MATCH_PARAM, param_list, lang);
144 }
145
146 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
147 {
148 // the index info - read from config file - cache it??
149 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
150 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
151 int len = indexes.getLength();
152 // now add even if there is only one
153 for (int i=0; i<len; i++) {
154 Element index = (Element)indexes.item(i);
155 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
156 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
157
158 }
159
160 }
161
162 /** do the actual query */
163 protected Element processTextQuery(Element request)
164 {
165
166 // Create a new (empty) result message
167 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
168 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
169 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
170
171 // Get the parameters of the request
172 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
173 if (param_list == null) {
174 System.err.println("Error: TextQuery request had no paramList.");
175 return result; // Return the empty result
176 }
177
178 // Process the request parameters
179 HashMap params = GSXML.extractParams(param_list, false);
180
181 // Make sure a query has been specified
182 String query = (String) params.get(QUERY_PARAM);
183 if (query == null || query.equals("")) {
184 return result; // Return the empty result
185 }
186
187 // If an index hasn't been specified, use the default
188 String index = (String) params.get(INDEX_PARAM);
189 if (index == null) {
190 index = this.default_index;
191 }
192
193 // The location of the MG index and text files
194 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
195 String textdir = GSFile.collectionTextPath(this.index_stem);
196 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
197 this.mg_src.setIndex(indexpath);
198
199 // set the mg query parameters to the values the user has specified
200 setStandardQueryParams(params);
201 this.mg_src.runQuery(basedir, textdir, query);
202 MGQueryResult mqr = this.mg_src.getQueryResult();
203 if (mqr.isClear()) {
204 // something has gone wrong
205 GSXML.addError(this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
206 return result;
207 }
208 long totalDocs = mqr.getTotalDocs();
209
210 // Get the docnums out, and convert to HASH ids
211 Vector docs = mqr.getDocs();
212 if (docs.size() == 0) {
213 System.err.println("GS2MGSearch: Warning: No results found...\n");
214 }
215
216 // Create a metadata list to store information about the query results
217 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
218 result.appendChild(metadata_list);
219
220 // Add a metadata element specifying the number of matching documents
221 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
222 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
223 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
224 GSXML.addMetadata(this.doc, metadata_list, "query", query);
225
226 // Create a document list to store the matching documents, and add them
227 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
228 result.appendChild(document_list);
229 for (int d = 0; d < docs.size(); d++) {
230 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
231 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
232 String doc_id = MGNum2OID(docnum);
233 Element doc_node = createDocNode(doc_id, Float.toString(rank));
234 document_list.appendChild(doc_node);
235 }
236
237 // Create a term list to store the term information, and add it
238 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
239 result.appendChild(term_list);
240 Vector terms = mqr.getTerms();
241 for (int t = 0; t < terms.size(); t++) {
242 MGTermInfo term_info = (MGTermInfo) terms.get(t);
243
244 String term = term_info.term_;
245 int stem_method = term_info.stem_method_;
246 Vector equiv_terms = term_info.equiv_terms_;
247
248 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
249 term_elem.setAttribute(GSXML.NAME_ATT, term);
250 term_elem.setAttribute(STEM_ATT, "" + stem_method);
251
252 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
253 term_elem.appendChild(equiv_term_list);
254
255 long total_term_freq = 0;
256 for (int et = 0; et < equiv_terms.size(); et++) {
257 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
258
259 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
260 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
261 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
262 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
263 equiv_term_list.appendChild(equiv_term_elem);
264
265 total_term_freq += equiv_term_info.term_freq_;
266 }
267
268 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
269 term_list.appendChild(term_elem);
270 }
271 return result;
272 }
273
274 // should probably use a list rather than map
275 protected boolean setStandardQueryParams(HashMap params)
276 {
277 // set the default ones
278 this.mg_src.setReturnTerms(true);
279 this.mg_src.setCase(true); // turn casefolding on by default
280 Set entries = params.entrySet();
281 Iterator i = entries.iterator();
282 while (i.hasNext()) {
283 Map.Entry m = (Map.Entry)i.next();
284 String name = (String)m.getKey();
285 String value = (String)m.getValue();
286
287 if (name.equals(CASE_PARAM)) {
288 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
289 this.mg_src.setCase(val);
290 }
291 else if (name.equals(STEM_PARAM)) {
292 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
293 this.mg_src.setStem(val);
294 }
295 else if (name.equals(MATCH_PARAM)) {
296 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
297 this.mg_src.setMatchMode(mode);
298 }
299 else if (name.equals(MAXDOCS_PARAM)) {
300 int docs = Integer.parseInt(value);
301 this.mg_src.setMaxDocs(docs);
302 } // ignore any others
303 }
304 return true;
305 }
306
307 /** convert MG internal id to Greenstone oid */
308 abstract protected String MGNum2OID(long docnum);
309}
Note: See TracBrowser for help on using the repository browser.