source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractMGSearch.java@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:keywords set to Author Date Id Revision
File size: 11.4 KB
Line 
1/*
2 * AbstractMGSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39/** Partially implements a generic MG search service
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 */
43
44abstract public class AbstractMGSearch
45 extends AbstractSearch
46{
47 protected static final String CASE_PARAM = "case";
48 protected static final String STEM_PARAM = "stem";
49 protected static final String MATCH_PARAM = "matchMode";
50 protected static final String MATCH_PARAM_ALL = "all";
51 protected static final String MATCH_PARAM_SOME = "some";
52
53 protected static final String BOOLEAN_PARAM_ON = "1";
54 protected static final String BOOLEAN_PARAM_OFF = "0";
55
56 protected static final String EQUIV_TERM_ELEM = "equivTerm";
57
58 protected static final String STEM_ATT = "stem";
59 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
60 protected static final String FREQ_ATT = "freq";
61
62 // Elements used in the config file that are specific to this class
63 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
64 protected static final String INDEX_STEM_ELEM = "indexStem";
65 protected static final String INDEX_ELEM = "index";
66
67 /** the default index */
68 protected String default_index = null;
69 /** the stem used for the index files */
70 protected String index_stem = null;
71 protected MGWrapper mg_src = null;
72
73
74 public AbstractMGSearch()
75 {
76 this.mg_src = new MGWrapper();
77 }
78
79 public void cleanUp() {
80 super.cleanUp();
81 this.mg_src.unloadIndexData();
82 }
83 public boolean configure(Element info, Element extra_info)
84 {
85 if (!super.configure(info, extra_info)) {
86 return false;
87 }
88 // do we support any of the extended features?
89 does_chunking = true;
90
91 // Get the default index out of <defaultIndex> (buildConfig.xml)
92 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
93 if (def != null) {
94 this.default_index = def.getAttribute(GSXML.NAME_ATT);
95 }
96 if (this.default_index == null || this.default_index.equals("")) {
97 System.err.println("Error: default index not specified!");
98 return false;
99 }
100
101 // the index stem is either the collection name or is specified in the config file
102 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
103 if (index_stem_elem != null) {
104 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
105 }
106 if (this.index_stem == null || this.index_stem.equals("")) {
107 System.err.println("AbstractMGSearch.configure(): indexStem element not found, stem will default to collection name");
108 this.index_stem = this.cluster_name;
109 }
110
111 // get display info from extra info
112 if (extra_info !=null) {
113 Document owner = info.getOwnerDocument();
114 // so far we have index specific display elements, and global format elements
115 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
116 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
117
118 for (int i=0; i<indexes.getLength();i++) {
119 Element ind = (Element)indexes.item(i);
120 String name = ind.getAttribute(GSXML.NAME_ATT);
121 Element node_extra = GSXML.getNamedElement(config_search,
122 GSXML.INDEX_ELEM,
123 GSXML.NAME_ATT,
124 name);
125 if (node_extra == null) {
126 System.err.println("GS2Search: haven't found extra info for index named "+name);
127 continue;
128 }
129
130 // get the display elements if any - displayName
131 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
132 if (display_names !=null) {
133 for (int j=0; j<display_names.getLength(); j++) {
134 Element e = (Element)display_names.item(j);
135 ind.appendChild(owner.importNode(e, true));
136 }
137 }
138 } // for each index
139 }
140 return true;
141 }
142
143 protected void addCustomQueryParams(Element param_list, String lang)
144 {
145 createParameter(CASE_PARAM, param_list, lang);
146 createParameter(STEM_PARAM, param_list, lang);
147 createParameter(MATCH_PARAM, param_list, lang);
148 }
149
150 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
151 {
152 // the index info - read from config file - cache it??
153 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
154 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
155 int len = indexes.getLength();
156 // now add even if there is only one
157 for (int i=0; i<len; i++) {
158 Element index = (Element)indexes.item(i);
159 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
160 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
161
162 }
163
164 }
165
166 /** do the actual query */
167 protected Element processTextQuery(Element request)
168 {
169
170 // Create a new (empty) result message
171 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
172 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
173 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
174
175 // Get the parameters of the request
176 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
177 if (param_list == null) {
178 System.err.println("Error: TextQuery request had no paramList.");
179 return result; // Return the empty result
180 }
181
182 // Process the request parameters
183 HashMap params = GSXML.extractParams(param_list, false);
184
185 // Make sure a query has been specified
186 String query = (String) params.get(QUERY_PARAM);
187 if (query == null || query.equals("")) {
188 return result; // Return the empty result
189 }
190
191 // If an index hasn't been specified, use the default
192 String index = (String) params.get(INDEX_PARAM);
193 if (index == null) {
194 index = this.default_index;
195 }
196
197 // The location of the MG index and text files
198 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
199 String textdir = GSFile.collectionTextPath(this.index_stem);
200 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
201 this.mg_src.setIndex(indexpath);
202
203 // set the mg query parameters to the values the user has specified
204 setStandardQueryParams(params);
205 this.mg_src.runQuery(basedir, textdir, query);
206 MGQueryResult mqr = this.mg_src.getQueryResult();
207 if (mqr.isClear()) {
208 // something has gone wrong
209 GSXML.addError(this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
210 return result;
211 }
212 long totalDocs = mqr.getTotalDocs();
213
214 // Get the docnums out, and convert to HASH ids
215 Vector docs = mqr.getDocs();
216 if (docs.size() == 0) {
217 System.err.println("GS2MGSearch: Warning: No results found...\n");
218 }
219
220 // Create a metadata list to store information about the query results
221 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
222 result.appendChild(metadata_list);
223
224 // Add a metadata element specifying the number of matching documents
225 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
226 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
227 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
228 GSXML.addMetadata(this.doc, metadata_list, "query", query);
229
230 // Create a document list to store the matching documents, and add them
231 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
232 result.appendChild(document_list);
233 for (int d = 0; d < docs.size(); d++) {
234 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
235 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
236 String doc_id = MGNum2OID(docnum);
237 Element doc_node = createDocNode(doc_id, Float.toString(rank));
238 document_list.appendChild(doc_node);
239 }
240
241 // Create a term list to store the term information, and add it
242 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
243 result.appendChild(term_list);
244 Vector terms = mqr.getTerms();
245 for (int t = 0; t < terms.size(); t++) {
246 MGTermInfo term_info = (MGTermInfo) terms.get(t);
247
248 String term = term_info.term_;
249 int stem_method = term_info.stem_method_;
250 Vector equiv_terms = term_info.equiv_terms_;
251
252 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
253 term_elem.setAttribute(GSXML.NAME_ATT, term);
254 term_elem.setAttribute(STEM_ATT, "" + stem_method);
255
256 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
257 term_elem.appendChild(equiv_term_list);
258
259 long total_term_freq = 0;
260 for (int et = 0; et < equiv_terms.size(); et++) {
261 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
262
263 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
264 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
265 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
266 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
267 equiv_term_list.appendChild(equiv_term_elem);
268
269 total_term_freq += equiv_term_info.term_freq_;
270 }
271
272 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
273 term_list.appendChild(term_elem);
274 }
275 return result;
276 }
277
278 // should probably use a list rather than map
279 protected boolean setStandardQueryParams(HashMap params)
280 {
281 // set the default ones
282 this.mg_src.setReturnTerms(true);
283 this.mg_src.setCase(true); // turn casefolding on by default
284 Set entries = params.entrySet();
285 Iterator i = entries.iterator();
286 while (i.hasNext()) {
287 Map.Entry m = (Map.Entry)i.next();
288 String name = (String)m.getKey();
289 String value = (String)m.getValue();
290
291 if (name.equals(CASE_PARAM)) {
292 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
293 this.mg_src.setCase(val);
294 }
295 else if (name.equals(STEM_PARAM)) {
296 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
297 this.mg_src.setStem(val);
298 }
299 else if (name.equals(MATCH_PARAM)) {
300 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
301 this.mg_src.setMatchMode(mode);
302 }
303 else if (name.equals(MAXDOCS_PARAM)) {
304 int docs = Integer.parseInt(value);
305 this.mg_src.setMaxDocs(docs);
306 } // ignore any others
307 }
308 return true;
309 }
310
311 /** convert MG internal id to Greenstone oid */
312 abstract protected String MGNum2OID(long docnum);
313}
Note: See TracBrowser for help on using the repository browser.