source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 29318

Last change on this file since 29318 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.5 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38import java.io.Serializable;
39
40import org.apache.log4j.*;
41
42/**
43 *
44 * @author Katherine Don
45 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
46 */
47
48public class GS2MGSearch
49extends AbstractGS2TextSearch {
50
51 protected static MGSearchWrapper mg_src = null;
52
53 static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2MGSearch.class.getName ());
54
55
56 /** constructor */
57 public GS2MGSearch () {
58 if(this.mg_src == null){
59 this.mg_src = new MGSearchWrapper ();
60 }
61 }
62 public void cleanUp () {
63 super.cleanUp ();
64 this.mg_src.unloadIndexData ();
65 }
66
67 /** configure this service */
68 public boolean configure (Element info, Element extra_info) {
69 if (!super.configure (info, extra_info)){
70 return false;
71 }
72
73 this.mg_src.setMaxNumeric (this.maxnumeric);
74 return true;
75 }
76
77
78
79 /** do the actual query */
80 protected Element processTextQuery (Element request) {
81 synchronized(this.mg_src){
82 // Create a new (empty) result message ('doc' is in ServiceRack.java)
83 Document result_doc = XMLConverter.newDOM();
84 Element result = result_doc.createElement (GSXML.RESPONSE_ELEM);
85 result.setAttribute (GSXML.FROM_ATT, QUERY_SERVICE);
86 result.setAttribute (GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
87
88 // Get the parameters of the request
89 Element param_list = (Element) GSXML.getChildByTagName (request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
90 if (param_list == null) {
91 logger.error ("TextQuery request had no paramList.");
92 return result; // Return the empty result
93 }
94
95 // Process the request parameters
96 HashMap<String, Serializable> params = GSXML.extractParams (param_list, false);
97
98 // Make sure a query has been specified
99 String query = (String) params.get (QUERY_PARAM);
100 if (query == null || query.equals ("")) {
101 return result; // Return the empty result
102 }
103
104 // If an index hasn't been specified, use the default
105 String index = (String) params.get (INDEX_PARAM);
106 if (index == null) {
107 index = this.default_index;
108 }
109
110 // If a subcollection index has been specified, use it
111 String indexSub = (String) params.get (INDEX_SUBCOLLECTION_PARAM);
112 if (indexSub != null) {
113 index += indexSub;
114 }
115 else{
116 if (!this.default_index_subcollection.equals ("")){
117 index += this.default_index_subcollection;
118 }
119 }
120
121 // If a subcollection index has been specified, use it
122 String indexLang = (String) params.get (INDEX_LANGUAGE_PARAM);
123 if (indexLang != null) {
124 index += indexLang;
125 }
126 else{
127 if (!this.default_index_language.equals ("")){
128 index += this.default_index_language;
129 }
130 }
131
132 // The location of the MG index and text files
133 String basedir = GSFile.collectionBaseDir (this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
134 String textdir = GSFile.collectionTextPath (this.index_stem);
135 String indexpath = GSFile.collectionIndexPath (this.index_stem, index);
136 this.mg_src.setIndex (indexpath);
137
138 // set the mg query parameters to the values the user has specified
139 setStandardQueryParams (params);
140 this.mg_src.runQuery (basedir, textdir, query);
141 MGQueryResult mqr = this.mg_src.getQueryResult ();
142 if (mqr.isClear ()) {
143 // something has gone wrong
144 GSXML.addError (result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
145 return result;
146 }
147 long totalDocs = mqr.getTotalDocs ();
148
149 // Get the docnums out, and convert to HASH ids
150 Vector docs = mqr.getDocs ();
151 if (docs.size () == 0) {
152 logger.error ("No results found...\n");
153 }
154
155 // Create a metadata list to store information about the query results
156 Element metadata_list = result_doc.createElement (GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
157 result.appendChild (metadata_list);
158
159 // Add a metadata element specifying the number of matching documents
160 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
161 GSXML.addMetadata (metadata_list, "numDocsReturned", ""+totalDocs);
162 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
163 GSXML.addMetadata (metadata_list, "query", query);
164
165 if (docs.size () > 0) {
166 // Create a document list to store the matching documents, and add them
167 Element document_list = result_doc.createElement (GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
168 result.appendChild (document_list);
169 for (int d = 0; d < docs.size (); d++) {
170 long docnum = ((MGDocInfo) docs.elementAt (d)).num_;
171 float rank = ((MGDocInfo) docs.elementAt (d)).rank_;
172 String doc_id = internalNum2OID (docnum);
173 Element doc_node = createDocNode (result_doc, doc_id, Float.toString (rank));
174 document_list.appendChild (doc_node);
175 }
176 }
177
178 // Create a term list to store the term information, and add it
179 Element term_list = result_doc.createElement (GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
180 result.appendChild (term_list);
181 Vector terms = mqr.getTerms ();
182 for (int t = 0; t < terms.size (); t++) {
183 MGTermInfo term_info = (MGTermInfo) terms.get (t);
184
185 String term = term_info.term_;
186 int stem_method = term_info.stem_method_;
187 Vector equiv_terms = term_info.equiv_terms_;
188
189 Element term_elem = result_doc.createElement (GSXML.TERM_ELEM);
190 term_elem.setAttribute (GSXML.NAME_ATT, term);
191 term_elem.setAttribute (STEM_ATT, "" + stem_method);
192
193 Element equiv_term_list = result_doc.createElement (EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
194 term_elem.appendChild (equiv_term_list);
195
196 long total_term_freq = 0;
197 for (int et = 0; et < equiv_terms.size (); et++) {
198 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get (et);
199
200 Element equiv_term_elem = result_doc.createElement (GSXML.TERM_ELEM);
201 equiv_term_elem.setAttribute (GSXML.NAME_ATT, equiv_term_info.term_);
202 equiv_term_elem.setAttribute (NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
203 equiv_term_elem.setAttribute (FREQ_ATT, "" + equiv_term_info.term_freq_);
204 equiv_term_list.appendChild (equiv_term_elem);
205
206 total_term_freq += equiv_term_info.term_freq_;
207 }
208
209 term_elem.setAttribute (FREQ_ATT, "" + total_term_freq);
210 term_list.appendChild (term_elem);
211 }
212 return result;
213 }//end of synchronized
214 }
215
216 // should probably use a list rather than map
217 protected boolean setStandardQueryParams(HashMap<String, Serializable> params)
218 {
219 // set the default settings that gs uses
220 this.mg_src.setReturnTerms(true);
221 this.mg_src.setCase(true);
222 this.mg_src.setStem(false);
223 Set entries = params.entrySet();
224 Iterator i = entries.iterator();
225 while (i.hasNext()) {
226 Map.Entry m = (Map.Entry)i.next();
227 String name = (String)m.getKey();
228 String value = (String)m.getValue();
229
230 if (name.equals(CASE_PARAM) && this.does_case) {
231 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
232 this.mg_src.setCase(val);
233 }
234 else if (name.equals(STEM_PARAM) && this.does_stem) {
235 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
236 this.mg_src.setStem(val);
237 }
238 else if (name.equals(MATCH_PARAM)) {
239 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
240 this.mg_src.setMatchMode(mode);
241 }
242 else if (name.equals(MAXDOCS_PARAM)) {
243 int docs = Integer.parseInt(value);
244 this.mg_src.setMaxDocs(docs);
245 } // ignore any others
246 }
247 return true;
248 }
249
250
251}
252
253
Note: See TracBrowser for help on using the repository browser.