source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/MGPPGDBMService.java@ 3471

Last change on this file since 3471 was 3471, checked in by kjdon, 22 years ago

service modules now belong to a serviceCluster or colleciton - collection_name has been changed to the more general cluster_name. service module cant configure itself from a file - we no longer know where the appropriate file is. so must be configured by passing the xml node to the configure method

  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1/*
2 * MGPPGDBMService.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.mgpp.*;
22import org.greenstone.gdbm.*;
23import org.greenstone.gsdl3.util.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Node;
27import org.w3c.dom.Text;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31import java.util.HashMap;
32import java.util.Vector;
33import java.util.Set;
34import java.util.Map;
35import java.util.Iterator;
36/**
37 * A Service class for MGPP+GDBM
38 *
39 * @author <a href="mailto:[email protected]">Katherine Don</a>
40 * @version $Revision: 3471 $
41 */
42
43
44public class MGPPGDBMService
45 extends ServiceModule {
46
47 private MGPPWrapper mgpp_src_=null;
48 private GDBMWrapper gdbm_src_=null;
49
50 private String default_index_=null;
51 private String default_level_=null;
52
53 public MGPPGDBMService() {
54 mgpp_src_ = new MGPPWrapper();
55 gdbm_src_ = new GDBMWrapper();
56
57 }
58
59
60 /** configure this service */
61 public boolean configure(Element info) {
62
63 System.out.println("configuring MGPPGDBMService");
64
65 // get the default index from <defaultIndex>
66 Element def = (Element)GSXML.getChildByTagName(info, "defaultIndex");
67 if (def !=null) {
68 default_index_ = def.getAttribute("name");
69 }
70 if (default_index_==null||default_index_.equals("")) {
71 System.err.println("Error: default index not specified!");
72 return false;
73 }
74
75 System.out.println("default index="+default_index_);
76
77 // get the default level out of <defaultLevel>
78 def = (Element)GSXML.getChildByTagName(info, "defaultLevel");
79 if (def !=null) {
80 default_level_ = def.getAttribute("name");
81 }
82 if (default_level_==null||default_level_.equals("")) {
83 System.err.println("Error: default level not specified!");
84 return false;
85 }
86 // the default level is also the level which gdbm is expecting
87 mgpp_src_.setReturnLevel(default_level_);
88 System.out.println("default level="+default_level_);
89
90 // set up which services are available for this collection
91 Element e = null;
92 // these entries should reflect the build config file - some services may not be available depending on how the collection was built.
93
94 // set up short_service_info_ - for now just has name and type
95 e = doc_.createElement("service");
96 e.setAttribute("type", "query");
97 e.setAttribute("name", "TextQuery");
98 short_service_info_.appendChild(e);
99
100 e = doc_.createElement("service");
101 e.setAttribute("type", "query");
102 e.setAttribute("name", "FieldQuery");
103 short_service_info_.appendChild(e);
104
105 e = doc_.createElement("service");
106 e.setAttribute("type", "query");
107 e.setAttribute("name", "ResourceRetrieve");
108 short_service_info_.appendChild(e);
109
110 e = doc_.createElement("service");
111 e.setAttribute("type", "query");
112 e.setAttribute("name", "MetadataRetrieve");
113 short_service_info_.appendChild(e);
114
115 // set up service_info_map_ - for now, just has the same elements as above
116 // should have full details about each service incl params lists etc.
117 // do the text query one - for now a static list. later use buildcfg.xml values to dynamically change this
118 e = doc_.createElement("service");
119 e.setAttribute("type", "query");
120 e.setAttribute("name", "TextQuery");
121 Element paramList = doc_.createElement("paramList");
122 Element param;
123 // first do index and level params
124
125 // the index info - read from config file
126 Element index_list = (Element)GSXML.getChildByTagName(info, "indexList");
127 NodeList indexes = index_list.getElementsByTagName("index");
128 int len = indexes.getLength();
129 if (len > 1) {
130 // add level param to list
131 String [] inds = new String[len];
132 for (int i=0; i<len; i++) {
133 inds[i] = ((Element)indexes.item(i)).getAttribute("name");
134 }
135 param = GSXML.createParameter(doc_, "index", GSXML.PARAM_TYPE_ENUM, default_index_, inds);
136 paramList.appendChild(param);
137 } // else dont need to add it
138
139 // the level info - read from config file
140 Element level_list = (Element)GSXML.getChildByTagName(info, "levelList");
141 NodeList levels = level_list.getElementsByTagName("level");
142 len = levels.getLength();
143 if (len > 1) {
144 // add level param to list
145 String [] levs = new String[len];
146 for (int i=0; i<len; i++) {
147 levs[i] = ((Element)levels.item(i)).getAttribute("name");
148 }
149 param = GSXML.createParameter(doc_, "queryLevel", GSXML.PARAM_TYPE_ENUM, default_level_, levs);
150 paramList.appendChild(param);
151 } // else dont need to add it
152
153 // boolean params
154 param = GSXML.createParameter(doc_, "case", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
155 paramList.appendChild(param);
156 param = GSXML.createParameter(doc_, "stem", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
157 paramList.appendChild(param);
158 // enum params
159 String []vals = {"all", "some"};
160 param = GSXML.createParameter(doc_, "matchMode", GSXML.PARAM_TYPE_ENUM, "all", vals);
161 paramList.appendChild(param);
162 String [] vals1 = {"rank", "natural"};
163 param = GSXML.createParameter(doc_, "sortBy", GSXML.PARAM_TYPE_ENUM, "rank", vals1 );
164 paramList.appendChild(param);
165
166 // Integer params
167 param = GSXML.createParameter(doc_, "maxDocs", GSXML.PARAM_TYPE_INTEGER, "10", null);
168 paramList.appendChild(param);
169
170
171 e.appendChild(paramList);
172
173 service_info_map_.put("TextQuery", e);
174
175
176 e = doc_.createElement("service");
177 e.setAttribute("type", "query");
178 e.setAttribute("name", "FieldQuery");
179 service_info_map_.put("FieldQuery", e);
180
181 e = doc_.createElement("service");
182 e.setAttribute("type", "query");
183 e.setAttribute("name", "ResourceRetrieve");
184 service_info_map_.put("ResourceRetrieve", e);
185
186 e = doc_.createElement("service");
187 e.setAttribute("type", "query");
188 e.setAttribute("name", "MetadataRetrieve");
189 service_info_map_.put("MetadataRetrieve", e);
190
191 if (gdbm_src_.openDatabase(GSFile.GDBMDatabaseFile(site_home_, cluster_name_), GDBMWrapper.READER)) {
192 return true;
193 }
194 else {
195 System.err.println("couldn't open gdbm database!");
196 return false;
197 }
198 }
199
200
201 /** passes the request Element to the appropriate service function*/
202 protected Element processService(String name, Element request) {
203
204 // dont need to check that the service name is supported for this particular object, because that has been checked by ServiceModule
205 if (name.equals("TextQuery")) {
206 return processTextQuery(request);
207 } else if(name.equals("FieldQuery")) {
208 return processFieldQuery(request);
209 } else if (name.equals("ResourceRetrieve")) {
210 return processResourceRetrieve(request);
211 } else if (name.equals("MetadataRetrieve")) {
212 return processMetadataRetrieve(request);
213 }
214
215 System.err.println("MGPPGDBMService:should never get here. service type wrong:"+name);
216 return null;
217
218
219 }
220
221 /** process a text query */
222 protected Element processTextQuery(Element request) {
223
224 Element result = doc_.createElement("response");
225 String from = GSPath.appendLink(cluster_name_, "TextQuery");
226 result.setAttribute("from", from);
227 result.setAttribute("type", "query");
228
229 // get param list and content
230 Element param_elem=null;
231 Element content_elem=null;
232 Node n = request.getFirstChild();
233 while (n!=null) {
234 String node_name = n.getNodeName();
235 if (node_name.equals("paramList")) {
236 param_elem = (Element)n;
237 } else if (node_name.equals("content")) {
238 content_elem = (Element)n;
239 }
240 n = n.getNextSibling();
241 }
242
243 if (param_elem==null || content_elem==null) {
244 System.out.println("bad query request");
245 return result; // empty result
246 }
247
248 // get the query string
249 String query = GSXML.getNodeText(content_elem);
250 if (query==null) {
251 System.out.println("nothing to query");
252 return result;
253 }
254
255 HashMap params = GSXML.extractParams(param_elem);
256 String index = (String)params.get("index");
257 if (index==null) { // if its not present, use the default index
258 index=default_index_;
259 }
260
261 // now set up the mgpp stuff
262 String basedir = GSFile.collectionBaseDir(site_home_,
263 cluster_name_);
264 String indexdir = GSFile.collectionIndexPath(cluster_name_, index);
265
266 mgpp_src_.loadIndexData(basedir, indexdir);
267 setQueryParams(params);
268
269 mgpp_src_.runQuery(query);
270 MGPPQueryResult mqr= mgpp_src_.getQueryResult();
271
272 // get the docnums out, and convert to HASH ids
273 Vector docs = mqr.getDocs();
274 if (docs.size()==0) {
275 // no docs found
276 return result;
277 }
278 // create a resourceList element
279 Element c = doc_.createElement("content");
280 result.appendChild(c);
281 Element resource_list = doc_.createElement("resourceList");
282 c.appendChild(resource_list);
283 // add each resource
284 for (int d=0; d<docs.size(); d++) {
285 long docnum = ((MGPPDocInfo)docs.elementAt(d)).num_;
286 String id = gdbm_src_.docnum2Oid(docnum);
287 Node no = GSXML.createResourceElement(doc_, id);
288 resource_list.appendChild(no);
289 }
290 return result;
291 }//processTextQuery
292
293 /** process a fielded query */
294 protected Element processFieldQuery(Element request) {
295 Element result = doc_.createElement("response");
296 String from = GSPath.appendLink(cluster_name_, "FieldQuery");
297 result.setAttribute("from", from);
298 result.setAttribute("type", "query");
299
300
301 // dummy result
302 Text t = null;
303 t = doc_.createTextNode("Fieldquery result... ");
304 result.appendChild(t);
305 return result;
306 }
307
308 /** retrieve a document */
309 protected Element processResourceRetrieve(Element request) {
310
311 // where the mgpp text files are
312 String basedir = GSFile.collectionBaseDir(site_home_,
313 cluster_name_);
314 String textdir = GSFile.collectionTextPath(cluster_name_);
315
316 // an empty result
317 Element result = doc_.createElement("response");
318 String from = GSPath.appendLink(cluster_name_, "ResourceRetrieve");
319 result.setAttribute("from", from);
320 result.setAttribute("type", "query");
321 Element result_doc = doc_.createElement("content");
322 result.appendChild(result_doc);
323
324 // get param list and content - this code same as for TextQuery - put
325 // somewhere else?
326 Element param_elem=null;
327 Element content_elem=null;
328 Node n = request.getFirstChild();
329 while (n!=null) {
330 String node_name = n.getNodeName();
331 if (node_name.equals("paramList")) {
332 param_elem = (Element)n;
333 } else if (node_name.equals("content")) {
334 content_elem = (Element)n;
335 }
336 n = n.getNextSibling();
337 }
338
339 if (param_elem==null || content_elem==null) {
340 System.out.println("bad query request");
341 return result; // empty result
342 }
343
344 HashMap params = GSXML.extractParams(param_elem);
345 String resource_type = (String)params.get("resourceType");
346 if (resource_type==null) {
347 System.out.println("no resourceType defined, assume 'core'");
348 // have error? or assume that its core?
349 resource_type="core";
350 }
351
352 if (resource_type.equals("core")) {
353 // get docs from mgpp, and possibly associated files too
354 String level = (String)params.get("level"); // level at which to
355 // retrieve a doc
356 if (level==null) {
357 level=default_level_;
358 }
359 // whether to just get the doc, or all assoc files too.
360 String retrieve_extent = (String)params.get("retrieveExtent");
361 if (retrieve_extent==null) {
362 // assume just the doc
363 retrieve_extent = "docOnly";
364 }
365
366 // always get the doc (for now - this may change later)
367 // get the doc text, and if retrieveExtent = allFiles, get assoc files too
368
369 String []ids = GSXML.getResourceNameList(content_elem);
370 for (int j=0; j<ids.length; j++) {
371 long real_num = gdbm_src_.oid2Docnum(ids[j]);
372 System.out.println("getting doc text, basedir="+basedir+", textdir="+textdir+", level="+level+", num="+real_num);
373 String document = mgpp_src_.getDocument(basedir, textdir, level, real_num);
374 // for now, stick it in a text node - eventually should be parsed as xml??
375
376 // something funny with the doc -
377 Element new_doc = GSXML.createResourceElement(doc_, ids[j]);
378 GSXML.addDocText(doc_, new_doc, document);
379 if (retrieve_extent.equals("allFiles")) {
380 // get the assoc files too
381 }
382 result_doc.appendChild(new_doc);
383 }
384
385 } else if (resource_type.equals("assoc")) {
386 // get associated files
387 String assoc_file_dir = (String)params.get("assocfilepath");
388 if (assoc_file_dir==null) {
389 System.err.println("need assocfilepath to find files");
390
391 } else {
392 String []files = GSXML.getResourceNameList(content_elem);
393 for (int j=0; j<files.length; j++) {
394 Element new_doc = GSXML.createResourceElement(doc_,
395 files[j]);
396 // add the file
397 result_doc.appendChild(new_doc);
398 }
399 }
400 } else {
401 System.out.println("error: resource Type is not legal");
402 }
403
404 return result;
405
406 }
407
408 /** retrieve metadata */
409 protected Element processMetadataRetrieve(Element request) {
410 Element result = doc_.createElement("response");
411 String from = GSPath.appendLink(cluster_name_, "MetadataRetrieve");
412 result.setAttribute("from", from);
413 result.setAttribute("type", "query");
414 Element result_content = doc_.createElement("content");
415 result.appendChild(result_content);
416 Element resource_list = doc_.createElement("resourceList");
417 result_content.appendChild(resource_list);
418 // get the metadata
419
420 Element content = (Element)request.getElementsByTagName("content").item(0);
421 if (content==null) {
422 // error: query had no content!! - should make an error message
423 return result;
424 }
425 String []metas = GSXML.getMetaNameList(content);
426 String []ids = GSXML.getResourceNameList(content);
427 for (int j=0; j<ids.length; j++) { // for each document
428 Element doc = GSXML.createResourceElement(doc_, ids[j]);
429 Element list = GSXML.addMetaList(doc_, doc);
430 DBInfo info = gdbm_src_.getInfo(ids[j]);
431 for (int m=0; m<metas.length; m++) {
432 String value = info.getInfo(metas[m]);
433 GSXML.addMetadata(doc_, list, metas[m], value);
434 }
435 resource_list.appendChild(doc);
436 }
437 return result;
438 }
439
440 // should probably use a list rather than map
441 protected boolean setQueryParams(HashMap params) {
442
443 Set entries = params.entrySet();
444 Iterator i = entries.iterator();
445 while (i.hasNext()) {
446 Map.Entry m = (Map.Entry)i.next();
447 String name = (String)m.getKey();
448 String value = (String)m.getValue();
449
450 if (name.equals("case")) {
451 boolean val = (value.equals("1")?true:false);
452 mgpp_src_.setCase(val);
453 } else if (name.equals("stem")) {
454 boolean val = (value.equals("1")?true:false);
455 mgpp_src_.setStem(val);
456 } else if (name.equals("maxDocs")) {
457 int docs = Integer.parseInt(value);
458 mgpp_src_.setMaxDocs(docs);
459 } else if (name.equals("queryLevel")) {
460 mgpp_src_.setQueryLevel(value);
461 } else if (name.equals("resultLevel")) {
462 mgpp_src_.setReturnLevel(value);
463 } else if (name.equals("matchMode")) {
464 int mode;
465 if (value.equals("all")) mode=1;
466 else mode=0;
467 mgpp_src_.setMatchMode(mode);
468 } else if (name.equals("sortBy")) {
469 if (value.equals("rank")) {
470 mgpp_src_.setSortByRank(true);
471 } else if (value.equals("natural")) {
472 mgpp_src_.setSortByRank(false);
473 }
474 } else if (name.equals("termInfo")) {
475 boolean val = (value.equals("1")?true:false);
476 mgpp_src_.setReturnTerms(val);
477 } // ignore any others
478 }
479 return true;
480 }
481
482
483}
484
485
Note: See TracBrowser for help on using the repository browser.