source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/MGPPGDBMService.java@ 3377

Last change on this file since 3377 was 3363, checked in by kjdon, 22 years ago

modified

  • Property svn:keywords set to Author Date Id Revision
File size: 14.2 KB
Line 
1/*
2 * MGPPGDBMService.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.mgpp.*;
22import org.greenstone.gdbm.*;
23import org.greenstone.gsdl3.util.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Node;
27import org.w3c.dom.Text;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31import java.util.HashMap;
32import java.util.Vector;
33import java.util.Set;
34import java.util.Map;
35import java.util.Iterator;
36/**
37 * A Service class for MGPP+GDBM
38 *
39 * @author <a href="mailto:[email protected]">Katherine Don</a>
40 * @version $Revision: 3363 $
41 */
42
43
44public class MGPPGDBMService
45 extends ServiceModule {
46 // not sure where this should go - maybe need a greenstone constants?
47 // or is it per collection, so should be specified in the buildcfg.xml?
48 public static final String DEFAULT_TEXT_LEVEL="Section";
49 private MGPPWrapper mgpp_src_=null;
50 private GDBMWrapper gdbm_src_=null;
51
52 private String default_index_=null;
53 public MGPPGDBMService() {
54 mgpp_src_ = new MGPPWrapper();
55 gdbm_src_ = new GDBMWrapper();
56
57 }
58 /** passes the request Element to the appropriate service function*/
59 protected Element processService(String name, Element request) {
60
61 // dont need to check that the service name is supported for this particular object, because that has been checked by ServiceModule
62 if (name.equals("TextQuery")) {
63 return processTextQuery(request);
64 } else if(name.equals("FieldQuery")) {
65 return processFieldQuery(request);
66 } else if (name.equals("ResourceRetrieve")) {
67 return processResourceRetrieve(request);
68 } else if (name.equals("MetadataRetrieve")) {
69 return processMetadataRetrieve(request);
70 }
71
72 System.err.println("MGPPGDBMService:should never get here. service type wrong:"+name);
73 return null;
74
75
76 }
77
78
79 /** configure this service */
80 public boolean configure(Element info) {
81
82 System.out.println("configuring MGPPGDBMService");
83 System.out.println("config info=");
84 System.out.println(converter_.getString(info));
85 // get the default index out of metadata
86 NodeList metadata = info.getElementsByTagName("metadata");
87 String name=null;
88 for(int i=0; i<metadata.getLength(); i++) {
89 Element e = (Element)metadata.item(i);
90 name = e.getAttribute("name");
91 if (name.equals("defaultIndex")) {
92 default_index_ = GSXML.getValue(e);
93 break;
94 }
95 }
96 System.out.println("default index="+default_index_);
97 Element e = null;
98 // these entries should reflect the build config file - some services may not be available depending on how the collection was built.
99 // set up short_service_info_ - for now just has name and type
100 e = doc_.createElement("service");
101 e.setAttribute("type", "query");
102 e.setAttribute("name", "TextQuery");
103 short_service_info_.appendChild(e);
104
105 e = doc_.createElement("service");
106 e.setAttribute("type", "query");
107 e.setAttribute("name", "FieldQuery");
108 short_service_info_.appendChild(e);
109
110 e = doc_.createElement("service");
111 e.setAttribute("type", "query");
112 e.setAttribute("name", "ResourceRetrieve");
113 short_service_info_.appendChild(e);
114
115 e = doc_.createElement("service");
116 e.setAttribute("type", "query");
117 e.setAttribute("name", "MetadataRetrieve");
118 short_service_info_.appendChild(e);
119
120 // set up service_info_map_ - for now, just has the same elements as above
121 // should have full details about each service incl params lists etc.
122 // do the text query one - for now a static list. later use buildcfg.xml values to dynamically change this
123 e = doc_.createElement("service");
124 e.setAttribute("type", "query");
125 e.setAttribute("name", "TextQuery");
126 Element paramList = doc_.createElement("paramList");
127 // boolean params
128 Element param = GSXML.createParameter(doc_, "case", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
129 paramList.appendChild(param);
130 param = GSXML.createParameter(doc_, "stem", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
131 paramList.appendChild(param);
132 // enum params
133 String []vals = {"all", "some"};
134 param = GSXML.createParameter(doc_, "matchMode", GSXML.PARAM_TYPE_ENUM, "all", vals);
135 paramList.appendChild(param);
136 String [] vals1 = {"rank", "natural"};
137 param = GSXML.createParameter(doc_, "sortBy", GSXML.PARAM_TYPE_ENUM, "rank", vals1 );
138 paramList.appendChild(param);
139 String [] vals2 = {"Document", "Section"};
140 param = GSXML.createParameter(doc_, "queryLevel", GSXML.PARAM_TYPE_ENUM, "Section", vals2);
141 paramList.appendChild(param);
142 // Integer params
143 param = GSXML.createParameter(doc_, "maxDocs", GSXML.PARAM_TYPE_INTEGER, "10", null);
144 paramList.appendChild(param);
145
146
147 e.appendChild(paramList);
148
149 service_info_map_.put("TextQuery", e);
150
151
152 e = doc_.createElement("service");
153 e.setAttribute("type", "query");
154 e.setAttribute("name", "FieldQuery");
155 service_info_map_.put("FieldQuery", e);
156
157 e = doc_.createElement("service");
158 e.setAttribute("type", "query");
159 e.setAttribute("name", "ResourceRetrieve");
160 service_info_map_.put("ResourceRetrieve", e);
161
162 e = doc_.createElement("service");
163 e.setAttribute("type", "query");
164 e.setAttribute("name", "MetadataRetrieve");
165 service_info_map_.put("MetadataRetrieve", e);
166
167
168 if (gdbm_src_.openDatabase(site_home_+"/collect/"+collection_name_+"/index/text/"+collection_name_+".ldb", GDBMWrapper.READER)) {
169 return true;
170 }
171 else {
172 System.err.println("couldn't open gdbm database!");
173 return false;
174 }
175 }
176
177
178 /** process a text query */
179 protected Element processTextQuery(Element request) {
180
181 Element result = doc_.createElement("response");
182 String from = GSPath.appendLink(collection_name_, "TextQuery");
183 result.setAttribute("from", from);
184 result.setAttribute("type", "query");
185
186 // get param list and content
187 Element param_elem=null;
188 Element content_elem=null;
189 Node n = request.getFirstChild();
190 while (n!=null) {
191 String node_name = n.getNodeName();
192 if (node_name.equals("paramList")) {
193 param_elem = (Element)n;
194 } else if (node_name.equals("content")) {
195 content_elem = (Element)n;
196 }
197 n = n.getNextSibling();
198 }
199
200 if (param_elem==null || content_elem==null) {
201 System.out.println("bad query request");
202 return result; // empty result
203 }
204
205 // get the query string
206 String query = GSXML.getNodeText(content_elem);
207 if (query==null) {
208 System.out.println("nothing to query");
209 return result;
210 }
211
212 HashMap params = GSXML.extractParams(param_elem);
213 String index = (String)params.get("Index");
214 if (index==null) { // if its not present, use the default index
215 index=default_index_;
216 }
217
218 // now set up the mgpp stuff
219 String basedir = GSFile.collectionBaseDir(site_home_,
220 collection_name_);
221 String indexdir = GSFile.collectionIndexPath(collection_name_, index);
222
223 mgpp_src_.loadIndexData(basedir, indexdir);
224 setQueryParams(params);
225
226 mgpp_src_.runQuery(query);
227 MGPPQueryResult mqr= mgpp_src_.getQueryResult();
228
229 // get the docnums out, and convert to HASH ids
230 Vector docs = mqr.getDocs();
231 if (docs.size()==0) {
232 // no docs found
233 return result;
234 }
235 // create a resourceList element
236 Element c = doc_.createElement("content");
237 result.appendChild(c);
238 Element resource_list = doc_.createElement("resourceList");
239 c.appendChild(resource_list);
240 // add each resource
241 for (int d=0; d<docs.size(); d++) {
242 long docnum = ((MGPPDocInfo)docs.elementAt(d)).num_;
243 String id = gdbm_src_.docnum2Oid(docnum);
244 Node no = GSXML.createResourceElement(doc_, id);
245 resource_list.appendChild(no);
246 }
247 return result;
248 }//processTextQuery
249
250 /** process a fielded query */
251 protected Element processFieldQuery(Element request) {
252 Element result = doc_.createElement("response");
253 String from = GSPath.appendLink(collection_name_, "FieldQuery");
254 result.setAttribute("from", from);
255 result.setAttribute("type", "query");
256
257
258 // dummy result
259 Text t = null;
260 t = doc_.createTextNode("Fieldquery result... ");
261 result.appendChild(t);
262 return result;
263 }
264
265 /** retrieve a document */
266 protected Element processResourceRetrieve(Element request) {
267
268 // where the mgpp text files are
269 String basedir = GSFile.collectionBaseDir(site_home_,
270 collection_name_);
271 String textdir = GSFile.collectionTextPath(collection_name_);
272
273 // an empty result
274 Element result = doc_.createElement("response");
275 String from = GSPath.appendLink(collection_name_, "ResourceRetrieve");
276 result.setAttribute("from", from);
277 result.setAttribute("type", "query");
278 Element result_doc = doc_.createElement("content");
279 result.appendChild(result_doc);
280
281 // get param list and content - this code same as for TextQuery - put
282 // somewhere else?
283 Element param_elem=null;
284 Element content_elem=null;
285 Node n = request.getFirstChild();
286 while (n!=null) {
287 String node_name = n.getNodeName();
288 if (node_name.equals("paramList")) {
289 param_elem = (Element)n;
290 } else if (node_name.equals("content")) {
291 content_elem = (Element)n;
292 }
293 n = n.getNextSibling();
294 }
295
296 if (param_elem==null || content_elem==null) {
297 System.out.println("bad query request");
298 return result; // empty result
299 }
300
301 HashMap params = GSXML.extractParams(param_elem);
302 String resource_type = (String)params.get("resourceType");
303 if (resource_type==null) {
304 System.out.println("no resourceType defined, assume 'core'");
305 // have error? or assume that its core?
306 resource_type="core";
307 }
308
309 if (resource_type.equals("core")) {
310 // get docs from mgpp, and possibly associated files too
311 String level = (String)params.get("level"); // level at which to
312 // retrieve a doc
313 if (level==null) {
314 level=DEFAULT_TEXT_LEVEL;
315 }
316 // whether to just get the doc, or all assoc files too.
317 String retrieve_extent = (String)params.get("retrieveExtent");
318 if (retrieve_extent==null) {
319 // assume just the doc
320 retrieve_extent = "docOnly";
321 }
322
323 // always get the doc (for now - this may change later)
324 // get the doc text, and if retrieveExtent = allFiles, get assoc files too
325
326 String []ids = GSXML.getResourceNameList(content_elem);
327 for (int j=0; j<ids.length; j++) {
328 long real_num = gdbm_src_.oid2Docnum(ids[j]);
329 System.out.println("getting doc text, basedir="+basedir+", textdir="+textdir+", level="+level+", num="+real_num);
330 String document = mgpp_src_.getDocument(basedir, textdir, level, real_num);
331 // for now, stick it in a text node - eventually should be parsed as xml??
332
333 // something funny with the doc -
334 Element new_doc = GSXML.createResourceElement(doc_, ids[j]);
335 GSXML.addDocText(doc_, new_doc, document);
336 if (retrieve_extent.equals("allFiles")) {
337 // get the assoc files too
338 }
339 result_doc.appendChild(new_doc);
340 }
341
342 } else if (resource_type.equals("assoc")) {
343 // get associated files
344 String assoc_file_dir = (String)params.get("assocfilepath");
345 if (assoc_file_dir==null) {
346 System.err.println("need assocfilepath to find files");
347
348 } else {
349 String []files = GSXML.getResourceNameList(content_elem);
350 for (int j=0; j<files.length; j++) {
351 Element new_doc = GSXML.createResourceElement(doc_,
352 files[j]);
353 // add the file
354 result_doc.appendChild(new_doc);
355 }
356 }
357 } else {
358 System.out.println("error: resource Type is not legal");
359 }
360
361 return result;
362
363 }
364
365 /** retrieve metadata */
366 protected Element processMetadataRetrieve(Element request) {
367 Element result = doc_.createElement("response");
368 String from = GSPath.appendLink(collection_name_, "MetadataRetrieve");
369 result.setAttribute("from", from);
370 result.setAttribute("type", "query");
371 Element result_content = doc_.createElement("content");
372 result.appendChild(result_content);
373 Element resource_list = doc_.createElement("resourceList");
374 result_content.appendChild(resource_list);
375 // get the metadata
376
377 Element content = (Element)request.getElementsByTagName("content").item(0);
378 if (content==null) {
379 // error: query had no content!! - should make an error message
380 return result;
381 }
382 String []metas = GSXML.getMetaNameList(content);
383 String []ids = GSXML.getResourceNameList(content);
384 for (int j=0; j<ids.length; j++) { // for each document
385 Element doc = GSXML.createResourceElement(doc_, ids[j]);
386 Element list = GSXML.addMetaList(doc_, doc);
387 DBInfo info = gdbm_src_.getInfo(ids[j]);
388 for (int m=0; m<metas.length; m++) {
389 String value = info.getInfo(metas[m]);
390 GSXML.addMetadata(doc_, list, metas[m], value);
391 }
392 resource_list.appendChild(doc);
393 }
394 return result;
395 }
396
397 // should probably use a list rather than map
398 protected boolean setQueryParams(HashMap params) {
399
400 Set entries = params.entrySet();
401 Iterator i = entries.iterator();
402 while (i.hasNext()) {
403 Map.Entry m = (Map.Entry)i.next();
404 String name = (String)m.getKey();
405 String value = (String)m.getValue();
406
407 if (name.equals("case")) {
408 boolean val = (value.equals("1")?true:false);
409 mgpp_src_.setCase(val);
410 } else if (name.equals("stem")) {
411 boolean val = (value.equals("1")?true:false);
412 mgpp_src_.setStem(val);
413 } else if (name.equals("maxDocs")) {
414 int docs = Integer.parseInt(value);
415 mgpp_src_.setMaxDocs(docs);
416 } else if (name.equals("queryLevel")) {
417 mgpp_src_.setQueryLevel(value);
418 } else if (name.equals("resultLevel")) {
419 mgpp_src_.setReturnLevel(value);
420 } else if (name.equals("matchMode")) {
421 int mode;
422 if (value.equals("all")) mode=1;
423 else mode=0;
424 mgpp_src_.setMatchMode(mode);
425 } else if (name.equals("sortBy")) {
426 if (value.equals("rank")) {
427 mgpp_src_.setSortByRank(true);
428 } else if (value.equals("natural")) {
429 mgpp_src_.setSortByRank(false);
430 }
431 } else if (name.equals("termInfo")) {
432 boolean val = (value.equals("1")?true:false);
433 mgpp_src_.setReturnTerms(val);
434 } // ignore any others
435 }
436 return true;
437 }
438
439
440}
441
442
Note: See TracBrowser for help on using the repository browser.