source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 20238

Last change on this file since 20238 was 20238, checked in by kjdon, 15 years ago

set case to true by default (and don't set it to false again straight after that)

  • Property svn:keywords set to Author Date Id Revision
File size: 9.5 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
45 */
46
47public class GS2MGSearch
48extends AbstractGS2Search {
49
50 protected static MGSearchWrapper mg_src = null;
51
52 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2MGSearch.class.getName ());
53
54
55 /** constructor */
56 public GS2MGSearch () {
57 if(this.mg_src == null){
58 this.mg_src = new MGSearchWrapper ();
59 }
60 }
61 public void cleanUp () {
62 super.cleanUp ();
63 this.mg_src.unloadIndexData ();
64 }
65
66 /** configure this service */
67 public boolean configure (Element info, Element extra_info) {
68 if (!super.configure (info, extra_info)){
69 return false;
70 }
71
72 this.mg_src.setMaxNumeric (this.maxnumeric);
73 return true;
74 }
75
76
77
78 /** do the actual query */
79 protected Element processTextQuery (Element request) {
80 synchronized(this.mg_src){
81 // Create a new (empty) result message ('doc' is in ServiceRack.java)
82 Element result = this.doc.createElement (GSXML.RESPONSE_ELEM);
83 result.setAttribute (GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
84 result.setAttribute (GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
85
86 // Get the parameters of the request
87 Element param_list = (Element) GSXML.getChildByTagName (request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
88 if (param_list == null) {
89 logger.error ("TextQuery request had no paramList.");
90 return result; // Return the empty result
91 }
92
93 // Process the request parameters
94 HashMap params = GSXML.extractParams (param_list, false);
95
96 // Make sure a query has been specified
97 String query = (String) params.get (QUERY_PARAM);
98 if (query == null || query.equals ("")) {
99 return result; // Return the empty result
100 }
101
102 // If an index hasn't been specified, use the default
103 String index = (String) params.get (INDEX_PARAM);
104 if (index == null) {
105 index = this.default_index;
106 }
107
108 // If a subcollection index has been specified, use it
109 String indexSub = (String) params.get (INDEX_SUBCOLLECTION_PARAM);
110 if (indexSub != null) {
111 index += indexSub;
112 }
113 else{
114 if (!this.default_index_subcollection.equals ("")){
115 index += this.default_index_subcollection;
116 }
117 }
118
119 // If a subcollection index has been specified, use it
120 String indexLang = (String) params.get (INDEX_LANGUAGE_PARAM);
121 if (indexLang != null) {
122 index += indexLang;
123 }
124 else{
125 if (!this.default_index_language.equals ("")){
126 index += this.default_index_language;
127 }
128 }
129
130 // The location of the MG index and text files
131 String basedir = GSFile.collectionBaseDir (this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
132 String textdir = GSFile.collectionTextPath (this.index_stem);
133 String indexpath = GSFile.collectionIndexPath (this.index_stem, index);
134 this.mg_src.setIndex (indexpath);
135 System.err.println ("index path = "+indexpath);
136 // set the mg query parameters to the values the user has specified
137 setStandardQueryParams (params);
138 this.mg_src.runQuery (basedir, textdir, query);
139 MGQueryResult mqr = this.mg_src.getQueryResult ();
140 if (mqr.isClear ()) {
141 // something has gone wrong
142 GSXML.addError (this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
143 return result;
144 }
145 long totalDocs = mqr.getTotalDocs ();
146
147 // Get the docnums out, and convert to HASH ids
148 Vector docs = mqr.getDocs ();
149 if (docs.size () == 0) {
150 logger.error ("No results found...\n");
151 }
152
153 // Create a metadata list to store information about the query results
154 Element metadata_list = this.doc.createElement (GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
155 result.appendChild (metadata_list);
156
157 // Add a metadata element specifying the number of matching documents
158 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
159 GSXML.addMetadata (this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
160 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
161 GSXML.addMetadata (this.doc, metadata_list, "query", query);
162
163 if (docs.size () > 0) {
164 // Create a document list to store the matching documents, and add them
165 Element document_list = this.doc.createElement (GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
166 result.appendChild (document_list);
167 for (int d = 0; d < docs.size (); d++) {
168 long docnum = ((MGDocInfo) docs.elementAt (d)).num_;
169 float rank = ((MGDocInfo) docs.elementAt (d)).rank_;
170 String doc_id = internalNum2OID (docnum);
171 Element doc_node = createDocNode (doc_id, Float.toString (rank));
172 document_list.appendChild (doc_node);
173 }
174 }
175
176 // Create a term list to store the term information, and add it
177 Element term_list = this.doc.createElement (GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
178 result.appendChild (term_list);
179 Vector terms = mqr.getTerms ();
180 for (int t = 0; t < terms.size (); t++) {
181 MGTermInfo term_info = (MGTermInfo) terms.get (t);
182
183 String term = term_info.term_;
184 int stem_method = term_info.stem_method_;
185 Vector equiv_terms = term_info.equiv_terms_;
186
187 Element term_elem = this.doc.createElement (GSXML.TERM_ELEM);
188 term_elem.setAttribute (GSXML.NAME_ATT, term);
189 term_elem.setAttribute (STEM_ATT, "" + stem_method);
190
191 Element equiv_term_list = this.doc.createElement (EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
192 term_elem.appendChild (equiv_term_list);
193
194 long total_term_freq = 0;
195 for (int et = 0; et < equiv_terms.size (); et++) {
196 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get (et);
197
198 Element equiv_term_elem = this.doc.createElement (GSXML.TERM_ELEM);
199 equiv_term_elem.setAttribute (GSXML.NAME_ATT, equiv_term_info.term_);
200 equiv_term_elem.setAttribute (NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
201 equiv_term_elem.setAttribute (FREQ_ATT, "" + equiv_term_info.term_freq_);
202 equiv_term_list.appendChild (equiv_term_elem);
203
204 total_term_freq += equiv_term_info.term_freq_;
205 }
206
207 term_elem.setAttribute (FREQ_ATT, "" + total_term_freq);
208 term_list.appendChild (term_elem);
209 }
210 return result;
211 }//end of synchronized
212 }
213
214 // should probably use a list rather than map
215 protected boolean setStandardQueryParams(HashMap params)
216 {
217 // set the default settings that gs uses
218 this.mg_src.setReturnTerms(true);
219 this.mg_src.setCase(true);
220 this.mg_src.setStem(false);
221 Set entries = params.entrySet();
222 Iterator i = entries.iterator();
223 while (i.hasNext()) {
224 Map.Entry m = (Map.Entry)i.next();
225 String name = (String)m.getKey();
226 String value = (String)m.getValue();
227
228 if (name.equals(CASE_PARAM) && this.does_case) {
229 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
230 this.mg_src.setCase(val);
231 }
232 else if (name.equals(STEM_PARAM) && this.does_stem) {
233 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
234 this.mg_src.setStem(val);
235 }
236 else if (name.equals(MATCH_PARAM)) {
237 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
238 this.mg_src.setMatchMode(mode);
239 }
240 else if (name.equals(MAXDOCS_PARAM)) {
241 int docs = Integer.parseInt(value);
242 this.mg_src.setMaxDocs(docs);
243 } // ignore any others
244 }
245 return true;
246 }
247
248
249}
250
251
Note: See TracBrowser for help on using the repository browser.