source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 14517

Last change on this file since 14517 was 14517, checked in by shaoqun, 17 years ago

reset the state of the mg indexer before setting it according to the query parameters

  • Property svn:keywords set to Author Date Id Revision
File size: 9.5 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
45 */
46
47public class GS2MGSearch
48extends AbstractGS2Search {
49
50// protected MGSearchWrapper mg_src = null;
51 protected MGSearchWrapper mg_src = null;
52
53 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2MGSearch.class.getName ());
54
55
56 /** constructor */
57 public GS2MGSearch () {
58// this.mg_src = new MGSearchWrapper ();
59 this.mg_src = new MGSearchWrapper ();
60
61 }
62 public void cleanUp () {
63 super.cleanUp ();
64 this.mg_src.unloadIndexData ();
65 }
66
67 /** configure this service */
68 public boolean configure (Element info, Element extra_info) {
69 if (!super.configure (info, extra_info)){
70 return false;
71 }
72
73 this.mg_src.setMaxNumeric (this.maxnumeric);
74
75 return true;
76 }
77
78
79
80 /** do the actual query */
81 protected Element processTextQuery (Element request) {
82
83 // Create a new (empty) result message ('doc' is in ServiceRack.java)
84 Element result = this.doc.createElement (GSXML.RESPONSE_ELEM);
85 result.setAttribute (GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
86 result.setAttribute (GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
87
88 // Get the parameters of the request
89 Element param_list = (Element) GSXML.getChildByTagName (request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
90 if (param_list == null) {
91 logger.error ("TextQuery request had no paramList.");
92 return result; // Return the empty result
93 }
94
95 // Process the request parameters
96 HashMap params = GSXML.extractParams (param_list, false);
97
98 // Make sure a query has been specified
99 String query = (String) params.get (QUERY_PARAM);
100 if (query == null || query.equals ("")) {
101 return result; // Return the empty result
102 }
103
104 // If an index hasn't been specified, use the default
105 String index = (String) params.get (INDEX_PARAM);
106 if (index == null) {
107 index = this.default_index;
108 }
109
110 // If a subcollection index has been specified, use it
111 String indexSub = (String) params.get (INDEX_SUBCOLLECTION_PARAM);
112 if (indexSub != null) {
113 index += indexSub;
114 }
115 else{
116 if (!this.default_index_subcollection.equals ("")){
117 index += this.default_index_subcollection;
118 }
119 }
120
121 // If a subcollection index has been specified, use it
122 String indexLang = (String) params.get (INDEX_LANGUAGE_PARAM);
123 if (indexLang != null) {
124 index += indexLang;
125 }
126 else{
127 if (!this.default_index_language.equals ("")){
128 index += this.default_index_language;
129 }
130 }
131
132 // The location of the MG index and text files
133 String basedir = GSFile.collectionBaseDir (this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
134 String textdir = GSFile.collectionTextPath (this.index_stem);
135 String indexpath = GSFile.collectionIndexPath (this.index_stem, index);
136 this.mg_src.setIndex (indexpath);
137 System.err.println ("index path = "+indexpath);
138 // set the mg query parameters to the values the user has specified
139 setStandardQueryParams (params);
140 this.mg_src.runQuery (basedir, textdir, query);
141 MGQueryResult mqr = this.mg_src.getQueryResult ();
142 if (mqr.isClear ()) {
143 // something has gone wrong
144 GSXML.addError (this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
145 return result;
146 }
147 long totalDocs = mqr.getTotalDocs ();
148
149 // Get the docnums out, and convert to HASH ids
150 Vector docs = mqr.getDocs ();
151 if (docs.size () == 0) {
152 logger.error ("No results found...\n");
153 }
154
155 // Create a metadata list to store information about the query results
156 Element metadata_list = this.doc.createElement (GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
157 result.appendChild (metadata_list);
158
159 // Add a metadata element specifying the number of matching documents
160 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
161 GSXML.addMetadata (this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
162 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
163 GSXML.addMetadata (this.doc, metadata_list, "query", query);
164
165 if (docs.size () > 0) {
166 // Create a document list to store the matching documents, and add them
167 Element document_list = this.doc.createElement (GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
168 result.appendChild (document_list);
169 for (int d = 0; d < docs.size (); d++) {
170 long docnum = ((MGDocInfo) docs.elementAt (d)).num_;
171 float rank = ((MGDocInfo) docs.elementAt (d)).rank_;
172 String doc_id = internalNum2OID (docnum);
173 Element doc_node = createDocNode (doc_id, Float.toString (rank));
174 document_list.appendChild (doc_node);
175 }
176 }
177
178 // Create a term list to store the term information, and add it
179 Element term_list = this.doc.createElement (GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
180 result.appendChild (term_list);
181 Vector terms = mqr.getTerms ();
182 for (int t = 0; t < terms.size (); t++) {
183 MGTermInfo term_info = (MGTermInfo) terms.get (t);
184
185 String term = term_info.term_;
186 int stem_method = term_info.stem_method_;
187 Vector equiv_terms = term_info.equiv_terms_;
188
189 Element term_elem = this.doc.createElement (GSXML.TERM_ELEM);
190 term_elem.setAttribute (GSXML.NAME_ATT, term);
191 term_elem.setAttribute (STEM_ATT, "" + stem_method);
192
193 Element equiv_term_list = this.doc.createElement (EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
194 term_elem.appendChild (equiv_term_list);
195
196 long total_term_freq = 0;
197 for (int et = 0; et < equiv_terms.size (); et++) {
198 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get (et);
199
200 Element equiv_term_elem = this.doc.createElement (GSXML.TERM_ELEM);
201 equiv_term_elem.setAttribute (GSXML.NAME_ATT, equiv_term_info.term_);
202 equiv_term_elem.setAttribute (NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
203 equiv_term_elem.setAttribute (FREQ_ATT, "" + equiv_term_info.term_freq_);
204 equiv_term_list.appendChild (equiv_term_elem);
205
206 total_term_freq += equiv_term_info.term_freq_;
207 }
208
209 term_elem.setAttribute (FREQ_ATT, "" + total_term_freq);
210 term_list.appendChild (term_elem);
211 }
212 return result;
213 }
214
215 // should probably use a list rather than map
216 protected boolean setStandardQueryParams(HashMap params)
217 {
218 // set the default ones
219 this.mg_src.setReturnTerms(true);
220 this.mg_src.setCase(true); // turn casefolding on by default
221 Set entries = params.entrySet();
222 Iterator i = entries.iterator();
223 this.mg_src.setCase(false);
224 this.mg_src.setStem(false);
225 while (i.hasNext()) {
226 Map.Entry m = (Map.Entry)i.next();
227 String name = (String)m.getKey();
228 String value = (String)m.getValue();
229
230 if (name.equals(CASE_PARAM) && this.does_case) {
231 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
232 this.mg_src.setCase(val);
233 }
234 else if (name.equals(STEM_PARAM) && this.does_stem) {
235 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
236 this.mg_src.setStem(val);
237 }
238 else if (name.equals(MATCH_PARAM)) {
239 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
240 this.mg_src.setMatchMode(mode);
241 }
242 else if (name.equals(MAXDOCS_PARAM)) {
243 int docs = Integer.parseInt(value);
244 this.mg_src.setMaxDocs(docs);
245 } // ignore any others
246 }
247 return true;
248 }
249
250
251}
252
253
Note: See TracBrowser for help on using the repository browser.