source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/AbstractMGSearch.java@ 9268

Last change on this file since 9268 was 9268, checked in by kjdon, 19 years ago

removed the abstract declarations for methods that are now defined in AbstractSearch

  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/*
2 * AbstractMGSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39/** Partially implements a generic MG search service
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 */
43
44abstract public class AbstractMGSearch
45 extends AbstractSearch
46{
47 protected static final String CASE_PARAM = "case";
48 protected static final String STEM_PARAM = "stem";
49 protected static final String MATCH_PARAM = "matchMode";
50 protected static final String MATCH_PARAM_ALL = "all";
51 protected static final String MATCH_PARAM_SOME = "some";
52
53 protected static final String BOOLEAN_PARAM_ON = "1";
54 protected static final String BOOLEAN_PARAM_OFF = "0";
55
56 protected static final String EQUIV_TERM_ELEM = "equivTerm";
57
58 protected static final String STEM_ATT = "stem";
59 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
60 protected static final String FREQ_ATT = "freq";
61
62 // Elements used in the config file that are specific to this class
63 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
64 protected static final String INDEX_STEM_ELEM = "indexStem";
65 protected static final String INDEX_ELEM = "index";
66
67 /** the default index */
68 protected String default_index = null;
69 /** the stem used for the index files */
70 protected String index_stem = null;
71 protected MGWrapper mg_src = null;
72
73
74 public AbstractMGSearch()
75 {
76 this.mg_src = new MGWrapper();
77 this.dictionary_name = "MGSearch";
78 }
79
80 public boolean configure(Element info, Element extra_info)
81 {
82 // Get the default index out of <defaultIndex> (buildConfig.xml)
83 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
84 if (def != null) {
85 this.default_index = def.getAttribute(GSXML.NAME_ATT);
86 }
87 if (this.default_index == null || this.default_index.equals("")) {
88 System.err.println("Error: default index not specified!");
89 return false;
90 }
91
92 // the index stem is either the collection name or is specified in the config file
93 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
94 if (index_stem_elem != null) {
95 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
96 }
97 if (this.index_stem == null || this.index_stem.equals("")) {
98 System.err.println("AbstractMGSearch.configure(): indexStem element not found, stem will default to collection name");
99 this.index_stem = this.cluster_name;
100 }
101
102 // get display info from extra info
103 if (extra_info !=null) {
104 Document owner = info.getOwnerDocument();
105 // so far we have index specific display elements, and global format elements
106 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
107 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
108
109 for (int i=0; i<indexes.getLength();i++) {
110 Element ind = (Element)indexes.item(i);
111 String name = ind.getAttribute(GSXML.NAME_ATT);
112 Element node_extra = GSXML.getNamedElement(config_search,
113 GSXML.INDEX_ELEM,
114 GSXML.NAME_ATT,
115 name);
116 if (node_extra == null) {
117 System.err.println("GS2Search: haven't found extra info for index named "+name);
118 continue;
119 }
120
121 // get the display elements if any - displayName
122 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
123 if (display_names !=null) {
124 for (int j=0; j<display_names.getLength(); j++) {
125 Element e = (Element)display_names.item(j);
126 ind.appendChild(owner.importNode(e, true));
127 }
128 }
129 } // for each index
130 }
131 return super.configure(info, extra_info);
132 }
133
134 protected void addCustomQueryParams(Element param_list, String lang)
135 {
136 createParameter(CASE_PARAM, param_list, lang);
137 createParameter(STEM_PARAM, param_list, lang);
138 createParameter(MATCH_PARAM, param_list, lang);
139 }
140
141 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
142 {
143 // the index info - read from config file - cache it??
144 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
145 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
146 int len = indexes.getLength();
147 // now add even if there is only one
148 for (int i=0; i<len; i++) {
149 Element index = (Element)indexes.item(i);
150 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
151 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
152
153 }
154
155 }
156
157 /** do the actual query */
158 protected Element processTextQuery(Element request)
159 {
160
161 // Create a new (empty) result message
162 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
163 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
164 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
165
166 // Get the parameters of the request
167 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
168 if (param_list == null) {
169 System.err.println("Error: TextQuery request had no paramList.");
170 return result; // Return the empty result
171 }
172
173 // Process the request parameters
174 HashMap params = GSXML.extractParams(param_list, false);
175
176 // Make sure a query has been specified
177 String query = (String) params.get(QUERY_PARAM);
178 if (query == null || query.equals("")) {
179 return result; // Return the empty result
180 }
181
182 // If an index hasn't been specified, use the default
183 String index = (String) params.get(INDEX_PARAM);
184 if (index == null) {
185 index = this.default_index;
186 }
187
188 // The location of the MG index and text files
189 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
190 String textdir = GSFile.collectionTextPath(this.index_stem);
191 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
192 this.mg_src.setIndex(indexpath);
193
194 // set the mg query parameters to the values the user has specified
195 setStandardQueryParams(params);
196 this.mg_src.runQuery(basedir, textdir, query);
197 MGQueryResult mqr = this.mg_src.getQueryResult();
198 long totalDocs = mqr.getTotalDocs();
199
200 // Get the docnums out, and convert to HASH ids
201 Vector docs = mqr.getDocs();
202 if (docs.size() == 0) {
203 System.err.println("GS2MGSearch: Warning: No results found...\n");
204 }
205
206 // Create a metadata list to store information about the query results
207 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
208 result.appendChild(metadata_list);
209
210 // Add a metadata element specifying the number of matching documents
211 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
212 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
213 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
214 GSXML.addMetadata(this.doc, metadata_list, "query", query);
215
216 // Create a document list to store the matching documents, and add them
217 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
218 result.appendChild(document_list);
219 for (int d = 0; d < docs.size(); d++) {
220 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
221 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
222 String doc_id = MGNum2OID(docnum);
223 Element doc_node = createDocNode(doc_id, Float.toString(rank));
224 document_list.appendChild(doc_node);
225 }
226
227 // Create a term list to store the term information, and add it
228 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
229 result.appendChild(term_list);
230 Vector terms = mqr.getTerms();
231 for (int t = 0; t < terms.size(); t++) {
232 MGTermInfo term_info = (MGTermInfo) terms.get(t);
233
234 String term = term_info.term_;
235 int stem_method = term_info.stem_method_;
236 Vector equiv_terms = term_info.equiv_terms_;
237
238 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
239 term_elem.setAttribute(GSXML.NAME_ATT, term);
240 term_elem.setAttribute(STEM_ATT, "" + stem_method);
241
242 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
243 term_elem.appendChild(equiv_term_list);
244
245 long total_term_freq = 0;
246 for (int et = 0; et < equiv_terms.size(); et++) {
247 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
248
249 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
250 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
251 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
252 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
253 equiv_term_list.appendChild(equiv_term_elem);
254
255 total_term_freq += equiv_term_info.term_freq_;
256 }
257
258 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
259 term_list.appendChild(term_elem);
260 }
261 return result;
262 }
263
264 // should probably use a list rather than map
265 protected boolean setStandardQueryParams(HashMap params)
266 {
267 // set the default ones
268 this.mg_src.setReturnTerms(true);
269 this.mg_src.setCase(true); // turn casefolding on by default
270 Set entries = params.entrySet();
271 Iterator i = entries.iterator();
272 while (i.hasNext()) {
273 Map.Entry m = (Map.Entry)i.next();
274 String name = (String)m.getKey();
275 String value = (String)m.getValue();
276
277 if (name.equals(CASE_PARAM)) {
278 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
279 this.mg_src.setCase(val);
280 }
281 else if (name.equals(STEM_PARAM)) {
282 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
283 this.mg_src.setStem(val);
284 }
285 else if (name.equals(MATCH_PARAM)) {
286 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
287 this.mg_src.setMatchMode(mode);
288 }
289 else if (name.equals(MAXDOCS_PARAM)) {
290 int docs = Integer.parseInt(value);
291 this.mg_src.setMaxDocs(docs);
292 } // ignore any others
293 }
294 return true;
295 }
296
297 /** convert MG internal id to Greenstone oid */
298 abstract protected String MGNum2OID(long docnum);
299}
Note: See TracBrowser for help on using the repository browser.