source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/service/AbstractMGSearch.java@ 13242

Last change on this file since 13242 was 13242, checked in by kjdon, 17 years ago

these files were removed from main greenstone repository, cos I reordered the class inheritance. so added them in here in case we ever want to use this gs3 building. can't guarantee that they will work without modification

  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1/*
2 * AbstractMGSearch.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.GSFile;
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/** Partially implements a generic MG search service
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 */
45
46abstract public class AbstractMGSearch
47 extends AbstractSearch
48{
49
50 static Category logger = Category.getInstance(org.greenstone.gsdl3.service.AbstractMGSearch.class.getName());
51 protected static final String CASE_PARAM = "case";
52 protected static final String STEM_PARAM = "stem";
53 protected static final String MATCH_PARAM = "matchMode";
54 protected static final String MATCH_PARAM_ALL = "all";
55 protected static final String MATCH_PARAM_SOME = "some";
56
57 protected static final String BOOLEAN_PARAM_ON = "1";
58 protected static final String BOOLEAN_PARAM_OFF = "0";
59
60 protected static final String EQUIV_TERM_ELEM = "equivTerm";
61
62 protected static final String STEM_ATT = "stem";
63 protected static final String NUM_DOCS_MATCH_ATT = "numDocsMatch";
64 protected static final String FREQ_ATT = "freq";
65
66 // Elements used in the config file that are specific to this class
67 protected static final String DEFAULT_INDEX_ELEM = "defaultIndex";
68 protected static final String INDEX_STEM_ELEM = "indexStem";
69 protected static final String INDEX_ELEM = "index";
70
71 /** the stem used for the index files */
72 protected String index_stem = null;
73 protected MGWrapper mg_src = null;
74
75
76 public AbstractMGSearch()
77 {
78 this.mg_src = new MGWrapper();
79 }
80
81 public void cleanUp() {
82 super.cleanUp();
83 this.mg_src.unloadIndexData();
84 }
85 public boolean configure(Element info, Element extra_info)
86 {
87 if (!super.configure(info, extra_info)) {
88 return false;
89 }
90 // do we support any of the extended features?
91 does_chunking = true;
92
93 // Get the default index out of <defaultIndex> (buildConfig.xml)
94 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
95 if (def != null) {
96 this.default_index = def.getAttribute(GSXML.NAME_ATT);
97 } // otherwise will be "", and the first one will be the default
98
99 // the index stem is either the collection name or is specified in the config file
100 Element index_stem_elem = (Element) GSXML.getChildByTagName(info, INDEX_STEM_ELEM);
101 if (index_stem_elem != null) {
102 this.index_stem = index_stem_elem.getAttribute(GSXML.NAME_ATT);
103 }
104 if (this.index_stem == null || this.index_stem.equals("")) {
105 logger.error("AbstractMGSearch.configure(): indexStem element not found, stem will default to collection name");
106 this.index_stem = this.cluster_name;
107 }
108
109 // get display info from extra info
110 if (extra_info !=null) {
111 Document owner = info.getOwnerDocument();
112 // so far we have index specific display elements, and global format elements
113 NodeList indexes = info.getElementsByTagName(GSXML.INDEX_ELEM);
114 Element config_search = (Element)GSXML.getChildByTagName(extra_info, GSXML.SEARCH_ELEM);
115
116 for (int i=0; i<indexes.getLength();i++) {
117 Element ind = (Element)indexes.item(i);
118 String name = ind.getAttribute(GSXML.NAME_ATT);
119 Element node_extra = GSXML.getNamedElement(config_search,
120 GSXML.INDEX_ELEM,
121 GSXML.NAME_ATT,
122 name);
123 if (node_extra == null) {
124 logger.error("haven't found extra info for index named "+name);
125 continue;
126 }
127
128 // get the display elements if any - displayName
129 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
130 if (display_names !=null) {
131 for (int j=0; j<display_names.getLength(); j++) {
132 Element e = (Element)display_names.item(j);
133 ind.appendChild(owner.importNode(e, true));
134 }
135 }
136 } // for each index
137 }
138 return true;
139 }
140
141 protected void addCustomQueryParams(Element param_list, String lang)
142 {
143 createParameter(CASE_PARAM, param_list, lang);
144 createParameter(STEM_PARAM, param_list, lang);
145 createParameter(MATCH_PARAM, param_list, lang);
146 }
147
148 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang) {
149 // the index info -
150 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
151 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
152 int len = indexes.getLength();
153 // now add even if there is only one
154 for (int i=0; i<len; i++) {
155 Element index = (Element)indexes.item(i);
156 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
157 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
158
159 }
160
161 }
162 /** do the actual query */
163 protected Element processTextQuery(Element request)
164 {
165
166 // Create a new (empty) result message
167 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
168 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
169 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
170
171 // Get the parameters of the request
172 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
173 if (param_list == null) {
174 logger.error("TextQuery request had no paramList.");
175 return result; // Return the empty result
176 }
177
178 // Process the request parameters
179 HashMap params = GSXML.extractParams(param_list, false);
180
181 // Make sure a query has been specified
182 String query = (String) params.get(QUERY_PARAM);
183 if (query == null || query.equals("")) {
184 return result; // Return the empty result
185 }
186
187 // If an index hasn't been specified, use the default
188 String index = (String) params.get(INDEX_PARAM);
189 if (index == null) {
190 index = this.default_index;
191 }
192
193 // The location of the MG index and text files
194 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
195 String textdir = GSFile.collectionTextPath(this.index_stem);
196 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
197 this.mg_src.setIndex(indexpath);
198
199 // set the mg query parameters to the values the user has specified
200 setStandardQueryParams(params);
201 this.mg_src.runQuery(basedir, textdir, query);
202 MGQueryResult mqr = this.mg_src.getQueryResult();
203 if (mqr.isClear()) {
204 // something has gone wrong
205 GSXML.addError(this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
206 return result;
207 }
208 long totalDocs = mqr.getTotalDocs();
209
210 // Get the docnums out, and convert to HASH ids
211 Vector docs = mqr.getDocs();
212 if (docs.size() == 0) {
213 logger.error("No results found...\n");
214 }
215
216 // Create a metadata list to store information about the query results
217 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
218 result.appendChild(metadata_list);
219
220 // Add a metadata element specifying the number of matching documents
221 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
222 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
223 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
224 GSXML.addMetadata(this.doc, metadata_list, "query", query);
225
226 if (docs.size() > 0) {
227 // Create a document list to store the matching documents, and add them
228 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
229 result.appendChild(document_list);
230 for (int d = 0; d < docs.size(); d++) {
231 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
232 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
233 String doc_id = MGNum2OID(docnum);
234 Element doc_node = createDocNode(doc_id, Float.toString(rank));
235 document_list.appendChild(doc_node);
236 }
237 }
238
239 // Create a term list to store the term information, and add it
240 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
241 result.appendChild(term_list);
242 Vector terms = mqr.getTerms();
243 for (int t = 0; t < terms.size(); t++) {
244 MGTermInfo term_info = (MGTermInfo) terms.get(t);
245
246 String term = term_info.term_;
247 int stem_method = term_info.stem_method_;
248 Vector equiv_terms = term_info.equiv_terms_;
249
250 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
251 term_elem.setAttribute(GSXML.NAME_ATT, term);
252 term_elem.setAttribute(STEM_ATT, "" + stem_method);
253
254 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
255 term_elem.appendChild(equiv_term_list);
256
257 long total_term_freq = 0;
258 for (int et = 0; et < equiv_terms.size(); et++) {
259 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
260
261 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
262 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
263 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
264 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
265 equiv_term_list.appendChild(equiv_term_elem);
266
267 total_term_freq += equiv_term_info.term_freq_;
268 }
269
270 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
271 term_list.appendChild(term_elem);
272 }
273 return result;
274 }
275
276 // should probably use a list rather than map
277 protected boolean setStandardQueryParams(HashMap params)
278 {
279 // set the default ones
280 this.mg_src.setReturnTerms(true);
281 this.mg_src.setCase(true); // turn casefolding on by default
282 Set entries = params.entrySet();
283 Iterator i = entries.iterator();
284 while (i.hasNext()) {
285 Map.Entry m = (Map.Entry)i.next();
286 String name = (String)m.getKey();
287 String value = (String)m.getValue();
288
289 if (name.equals(CASE_PARAM)) {
290 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
291 this.mg_src.setCase(val);
292 }
293 else if (name.equals(STEM_PARAM)) {
294 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
295 this.mg_src.setStem(val);
296 }
297 else if (name.equals(MATCH_PARAM)) {
298 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
299 this.mg_src.setMatchMode(mode);
300 }
301 else if (name.equals(MAXDOCS_PARAM)) {
302 int docs = Integer.parseInt(value);
303 this.mg_src.setMaxDocs(docs);
304 } // ignore any others
305 }
306 return true;
307 }
308
309 /** convert MG internal id to Greenstone oid */
310 abstract protected String MGNum2OID(long docnum);
311}
Note: See TracBrowser for help on using the repository browser.