source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractMGSearch.java@ 9280

Last change on this file since 9280 was 9280, checked in by kjdon, 19 years ago

no longer need the dictionary_name field

  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/*
2 * AbstractMGSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39/** Partially implements a generic MG search service
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 */
43
44abstract public class AbstractMGSearch
45 extends AbstractSearch
46{
47 protected static final String CASE_PARAM = "case";
48 protected static final String STEM_PARAM = "stem";
49 protected static final String MATCH_PARAM = "matchMode";
50 protected static final String MATCH_PARAM_ALL = "all";
51 protected static final String MATCH_PARAM_SOME = "some";
52
53 protected static final String BOOLEAN_PARAM_ON = "1";
54 protected static final String BOOLEAN_PARAM_OFF = "0";
55
56 protected static final String EQUIV_TERM_ELEM = "equivTerm";
57
58 protected static final String STEM_ATT = "stem";
59 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
60 protected static final String FREQ_ATT = "freq";
61
62 // Elements used in the config file that are specific to this class
63 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
64 protected static final String INDEX_STEM_ELEM = "indexStem";
65 protected static final String INDEX_ELEM = "index";
66
67 /** the default index */
68 protected String default_index = null;
69 /** the stem used for the index files */
70 protected String index_stem = null;
71 protected MGWrapper mg_src = null;
72
73
74 public AbstractMGSearch()
75 {
76 this.mg_src = new MGWrapper();
77 }
78
79 public boolean configure(Element info, Element extra_info)
80 {
81 if (!super.configure(info, extra_info)) {
82 return false;
83 }
84 // do we support any of the extended features?
85 does_chunking = true;
86
87 // Get the default index out of <defaultIndex> (buildConfig.xml)
88 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
89 if (def != null) {
90 this.default_index = def.getAttribute(GSXML.NAME_ATT);
91 }
92 if (this.default_index == null || this.default_index.equals("")) {
93 System.err.println("Error: default index not specified!");
94 return false;
95 }
96
97 // the index stem is either the collection name or is specified in the config file
98 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
99 if (index_stem_elem != null) {
100 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
101 }
102 if (this.index_stem == null || this.index_stem.equals("")) {
103 System.err.println("AbstractMGSearch.configure(): indexStem element not found, stem will default to collection name");
104 this.index_stem = this.cluster_name;
105 }
106
107 // get display info from extra info
108 if (extra_info !=null) {
109 Document owner = info.getOwnerDocument();
110 // so far we have index specific display elements, and global format elements
111 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
112 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
113
114 for (int i=0; i<indexes.getLength();i++) {
115 Element ind = (Element)indexes.item(i);
116 String name = ind.getAttribute(GSXML.NAME_ATT);
117 Element node_extra = GSXML.getNamedElement(config_search,
118 GSXML.INDEX_ELEM,
119 GSXML.NAME_ATT,
120 name);
121 if (node_extra == null) {
122 System.err.println("GS2Search: haven't found extra info for index named "+name);
123 continue;
124 }
125
126 // get the display elements if any - displayName
127 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
128 if (display_names !=null) {
129 for (int j=0; j<display_names.getLength(); j++) {
130 Element e = (Element)display_names.item(j);
131 ind.appendChild(owner.importNode(e, true));
132 }
133 }
134 } // for each index
135 }
136 return true;
137 }
138
139 protected void addCustomQueryParams(Element param_list, String lang)
140 {
141 createParameter(CASE_PARAM, param_list, lang);
142 createParameter(STEM_PARAM, param_list, lang);
143 createParameter(MATCH_PARAM, param_list, lang);
144 }
145
146 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
147 {
148 // the index info - read from config file - cache it??
149 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
150 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
151 int len = indexes.getLength();
152 // now add even if there is only one
153 for (int i=0; i<len; i++) {
154 Element index = (Element)indexes.item(i);
155 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
156 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
157
158 }
159
160 }
161
162 /** do the actual query */
163 protected Element processTextQuery(Element request)
164 {
165
166 // Create a new (empty) result message
167 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
168 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
169 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
170
171 // Get the parameters of the request
172 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
173 if (param_list == null) {
174 System.err.println("Error: TextQuery request had no paramList.");
175 return result; // Return the empty result
176 }
177
178 // Process the request parameters
179 HashMap params = GSXML.extractParams(param_list, false);
180
181 // Make sure a query has been specified
182 String query = (String) params.get(QUERY_PARAM);
183 if (query == null || query.equals("")) {
184 return result; // Return the empty result
185 }
186
187 // If an index hasn't been specified, use the default
188 String index = (String) params.get(INDEX_PARAM);
189 if (index == null) {
190 index = this.default_index;
191 }
192
193 // The location of the MG index and text files
194 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
195 String textdir = GSFile.collectionTextPath(this.index_stem);
196 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
197 this.mg_src.setIndex(indexpath);
198
199 // set the mg query parameters to the values the user has specified
200 setStandardQueryParams(params);
201 this.mg_src.runQuery(basedir, textdir, query);
202 MGQueryResult mqr = this.mg_src.getQueryResult();
203 long totalDocs = mqr.getTotalDocs();
204
205 // Get the docnums out, and convert to HASH ids
206 Vector docs = mqr.getDocs();
207 if (docs.size() == 0) {
208 System.err.println("GS2MGSearch: Warning: No results found...\n");
209 }
210
211 // Create a metadata list to store information about the query results
212 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
213 result.appendChild(metadata_list);
214
215 // Add a metadata element specifying the number of matching documents
216 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
217 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
218 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
219 GSXML.addMetadata(this.doc, metadata_list, "query", query);
220
221 // Create a document list to store the matching documents, and add them
222 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
223 result.appendChild(document_list);
224 for (int d = 0; d < docs.size(); d++) {
225 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
226 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
227 String doc_id = MGNum2OID(docnum);
228 Element doc_node = createDocNode(doc_id, Float.toString(rank));
229 document_list.appendChild(doc_node);
230 }
231
232 // Create a term list to store the term information, and add it
233 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
234 result.appendChild(term_list);
235 Vector terms = mqr.getTerms();
236 for (int t = 0; t < terms.size(); t++) {
237 MGTermInfo term_info = (MGTermInfo) terms.get(t);
238
239 String term = term_info.term_;
240 int stem_method = term_info.stem_method_;
241 Vector equiv_terms = term_info.equiv_terms_;
242
243 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
244 term_elem.setAttribute(GSXML.NAME_ATT, term);
245 term_elem.setAttribute(STEM_ATT, "" + stem_method);
246
247 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
248 term_elem.appendChild(equiv_term_list);
249
250 long total_term_freq = 0;
251 for (int et = 0; et < equiv_terms.size(); et++) {
252 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
253
254 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
255 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
256 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
257 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
258 equiv_term_list.appendChild(equiv_term_elem);
259
260 total_term_freq += equiv_term_info.term_freq_;
261 }
262
263 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
264 term_list.appendChild(term_elem);
265 }
266 return result;
267 }
268
269 // should probably use a list rather than map
270 protected boolean setStandardQueryParams(HashMap params)
271 {
272 // set the default ones
273 this.mg_src.setReturnTerms(true);
274 this.mg_src.setCase(true); // turn casefolding on by default
275 Set entries = params.entrySet();
276 Iterator i = entries.iterator();
277 while (i.hasNext()) {
278 Map.Entry m = (Map.Entry)i.next();
279 String name = (String)m.getKey();
280 String value = (String)m.getValue();
281
282 if (name.equals(CASE_PARAM)) {
283 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
284 this.mg_src.setCase(val);
285 }
286 else if (name.equals(STEM_PARAM)) {
287 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
288 this.mg_src.setStem(val);
289 }
290 else if (name.equals(MATCH_PARAM)) {
291 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
292 this.mg_src.setMatchMode(mode);
293 }
294 else if (name.equals(MAXDOCS_PARAM)) {
295 int docs = Integer.parseInt(value);
296 this.mg_src.setMaxDocs(docs);
297 } // ignore any others
298 }
299 return true;
300 }
301
302 /** convert MG internal id to Greenstone oid */
303 abstract protected String MGNum2OID(long docnum);
304}
Note: See TracBrowser for help on using the repository browser.