source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2MGSearch.java@ 13911

Last change on this file since 13911 was 13911, checked in by kjdon, 17 years ago

changed the format of index and field info in buildConfig and collectionConfig. No fields any more, just use indexes. index has a shortname and a name - name comes from collect.cfg, shortname from build.cfg, eg text and TX, or section:text and ste. mg/mgpp/lucene colls config files are more similar now

  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1/*
2 * GS2MGSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mg.*;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.NodeList;
29
30// java
31import java.util.Vector;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35import java.util.Set;
36import java.util.Iterator;
37import java.io.File;
38
39import org.apache.log4j.*;
40
41/**
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
45 */
46
47public class GS2MGSearch
48 extends AbstractGS2Search
49{
50
51 protected MGWrapper mg_src = null;
52
53 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2MGSearch.class.getName());
54
55
56 /** constructor */
57 public GS2MGSearch()
58 {
59 this.mg_src = new MGWrapper();
60
61 }
62 public void cleanUp() {
63 super.cleanUp();
64 this.mg_src.unloadIndexData();
65 }
66
67 /** configure this service */
68 public boolean configure(Element info, Element extra_info)
69 {
70 if (!super.configure(info, extra_info)){
71 return false;
72 }
73
74 this.mg_src.setMaxNumeric(this.maxnumeric);
75
76
77 return true;
78 }
79
80
81
82 /** do the actual query */
83 protected Element processTextQuery(Element request)
84 {
85
86 // Create a new (empty) result message
87 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
88 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
89 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
90
91 // Get the parameters of the request
92 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
93 if (param_list == null) {
94 logger.error("TextQuery request had no paramList.");
95 return result; // Return the empty result
96 }
97
98 // Process the request parameters
99 HashMap params = GSXML.extractParams(param_list, false);
100
101 // Make sure a query has been specified
102 String query = (String) params.get(QUERY_PARAM);
103 if (query == null || query.equals("")) {
104 return result; // Return the empty result
105 }
106
107 // If an index hasn't been specified, use the default
108 String index = (String) params.get(INDEX_PARAM);
109 if (index == null) {
110 index = this.default_index;
111 }
112
113 // The location of the MG index and text files
114 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed for MG
115 String textdir = GSFile.collectionTextPath(this.index_stem);
116 String indexpath = GSFile.collectionIndexPath(this.index_stem, index);
117 this.mg_src.setIndex(indexpath);
118 System.err.println("index path = "+indexpath);
119 // set the mg query parameters to the values the user has specified
120 setStandardQueryParams(params);
121 this.mg_src.runQuery(basedir, textdir, query);
122 MGQueryResult mqr = this.mg_src.getQueryResult();
123 if (mqr.isClear()) {
124 // something has gone wrong
125 GSXML.addError(this.doc, result, "Couldn't query the mg database", GSXML.ERROR_TYPE_SYSTEM);
126 return result;
127 }
128 long totalDocs = mqr.getTotalDocs();
129
130 // Get the docnums out, and convert to HASH ids
131 Vector docs = mqr.getDocs();
132 if (docs.size() == 0) {
133 logger.error("No results found...\n");
134 }
135
136 // Create a metadata list to store information about the query results
137 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
138 result.appendChild(metadata_list);
139
140 // Add a metadata element specifying the number of matching documents
141 // because teh total number is just the number returned, use numDocsReturned, not numDocsMatched
142 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+totalDocs);
143 // add a metadata item to specify what actual query was done - eg if stuff was stripped out etc. and then we can use the query later, cos we don't know which parameter was the query
144 GSXML.addMetadata(this.doc, metadata_list, "query", query);
145
146 if (docs.size() > 0) {
147 // Create a document list to store the matching documents, and add them
148 Element document_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
149 result.appendChild(document_list);
150 for (int d = 0; d < docs.size(); d++) {
151 long docnum = ((MGDocInfo) docs.elementAt(d)).num_;
152 float rank = ((MGDocInfo) docs.elementAt(d)).rank_;
153 String doc_id = internalNum2OID(docnum);
154 Element doc_node = createDocNode(doc_id, Float.toString(rank));
155 document_list.appendChild(doc_node);
156 }
157 }
158
159 // Create a term list to store the term information, and add it
160 Element term_list = this.doc.createElement(GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
161 result.appendChild(term_list);
162 Vector terms = mqr.getTerms();
163 for (int t = 0; t < terms.size(); t++) {
164 MGTermInfo term_info = (MGTermInfo) terms.get(t);
165
166 String term = term_info.term_;
167 int stem_method = term_info.stem_method_;
168 Vector equiv_terms = term_info.equiv_terms_;
169
170 Element term_elem = this.doc.createElement(GSXML.TERM_ELEM);
171 term_elem.setAttribute(GSXML.NAME_ATT, term);
172 term_elem.setAttribute(STEM_ATT, "" + stem_method);
173
174 Element equiv_term_list = this.doc.createElement(EQUIV_TERM_ELEM+GSXML.LIST_MODIFIER);
175 term_elem.appendChild(equiv_term_list);
176
177 long total_term_freq = 0;
178 for (int et = 0; et < equiv_terms.size(); et++) {
179 MGEquivTermInfo equiv_term_info = (MGEquivTermInfo) equiv_terms.get(et);
180
181 Element equiv_term_elem = this.doc.createElement(GSXML.TERM_ELEM);
182 equiv_term_elem.setAttribute(GSXML.NAME_ATT, equiv_term_info.term_);
183 equiv_term_elem.setAttribute(NUM_DOCS_MATCH_ATT, "" + equiv_term_info.match_docs_);
184 equiv_term_elem.setAttribute(FREQ_ATT, "" + equiv_term_info.term_freq_);
185 equiv_term_list.appendChild(equiv_term_elem);
186
187 total_term_freq += equiv_term_info.term_freq_;
188 }
189
190 term_elem.setAttribute(FREQ_ATT, "" + total_term_freq);
191 term_list.appendChild(term_elem);
192 }
193 return result;
194 }
195
196 // should probably use a list rather than map
197 protected boolean setStandardQueryParams(HashMap params)
198 {
199 // set the default ones
200 this.mg_src.setReturnTerms(true);
201 this.mg_src.setCase(true); // turn casefolding on by default
202 Set entries = params.entrySet();
203 Iterator i = entries.iterator();
204 while (i.hasNext()) {
205 Map.Entry m = (Map.Entry)i.next();
206 String name = (String)m.getKey();
207 String value = (String)m.getValue();
208
209 if (name.equals(CASE_PARAM)) {
210 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
211 this.mg_src.setCase(val);
212 }
213 else if (name.equals(STEM_PARAM)) {
214 boolean val = (value.equals(BOOLEAN_PARAM_ON) ? true : false);
215 this.mg_src.setStem(val);
216 }
217 else if (name.equals(MATCH_PARAM)) {
218 int mode = (value.equals(MATCH_PARAM_ALL) ? 1 : 0);
219 this.mg_src.setMatchMode(mode);
220 }
221 else if (name.equals(MAXDOCS_PARAM)) {
222 int docs = Integer.parseInt(value);
223 this.mg_src.setMaxDocs(docs);
224 } // ignore any others
225 }
226 return true;
227 }
228
229
230}
231
232
Note: See TracBrowser for help on using the repository browser.