source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/MGPPGDBMServices.java@ 3492

Last change on this file since 3492 was 3492, checked in by kjdon, 22 years ago

ServiceModule renamed to ServicesImpl, each class renamed to xxxServices eg
MGPPGDBMService --> MGPPGDBMServices, cos its a collection of services :-)

  • Property svn:keywords set to Author Date Id Revision
File size: 15.5 KB
Line 
1/*
2 * MGPPGDBMServices.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.mgpp.*;
22import org.greenstone.gdbm.*;
23import org.greenstone.gsdl3.util.*;
24
25import org.w3c.dom.Document;
26import org.w3c.dom.Node;
27import org.w3c.dom.Text;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31import java.util.HashMap;
32import java.util.Vector;
33import java.util.Set;
34import java.util.Map;
35import java.util.Iterator;
36/**
37 * A ServicesImpl class for MGPP+GDBM
38 *
39 * @author <a href="mailto:[email protected]">Katherine Don</a>
40 * @version $Revision: 3492 $
41 * @see ServicesImpl
42 */
43
44
45public class MGPPGDBMServices
46 extends ServicesImpl {
47
48 private MGPPWrapper mgpp_src_=null;
49 private GDBMWrapper gdbm_src_=null;
50
51 private String default_index_=null;
52 private String default_level_=null;
53
54 public MGPPGDBMServices() {
55 mgpp_src_ = new MGPPWrapper();
56 gdbm_src_ = new GDBMWrapper();
57
58 }
59
60
61 /** configure this service */
62 public boolean configure(Element info) {
63
64 System.out.println("configuring MGPPGDBMServices");
65
66 // get the default index from <defaultIndex>
67 Element def = (Element)GSXML.getChildByTagName(info, "defaultIndex");
68 if (def !=null) {
69 default_index_ = def.getAttribute("name");
70 }
71 if (default_index_==null||default_index_.equals("")) {
72 System.err.println("Error: default index not specified!");
73 return false;
74 }
75
76 System.out.println("default index="+default_index_);
77
78 // get the default level out of <defaultLevel>
79 def = (Element)GSXML.getChildByTagName(info, "defaultLevel");
80 if (def !=null) {
81 default_level_ = def.getAttribute("name");
82 }
83 if (default_level_==null||default_level_.equals("")) {
84 System.err.println("Error: default level not specified!");
85 return false;
86 }
87 // the default level is also the level which gdbm is expecting
88 mgpp_src_.setReturnLevel(default_level_);
89 System.out.println("default level="+default_level_);
90
91 // set up which services are available for this collection
92 Element e = null;
93 // these entries should reflect the build config file - some services may not be available depending on how the collection was built.
94
95 // set up short_service_info_ - for now just has name and type
96 e = doc_.createElement("service");
97 e.setAttribute("type", "query");
98 e.setAttribute("name", "TextQuery");
99 short_service_info_.appendChild(e);
100
101 e = doc_.createElement("service");
102 e.setAttribute("type", "query");
103 e.setAttribute("name", "FieldQuery");
104 short_service_info_.appendChild(e);
105
106 e = doc_.createElement("service");
107 e.setAttribute("type", "query");
108 e.setAttribute("name", "ResourceRetrieve");
109 short_service_info_.appendChild(e);
110
111 e = doc_.createElement("service");
112 e.setAttribute("type", "query");
113 e.setAttribute("name", "MetadataRetrieve");
114 short_service_info_.appendChild(e);
115
116 // set up service_info_map_ - for now, just has the same elements as above
117 // should have full details about each service incl params lists etc.
118 // do the text query one - for now a static list. later use buildcfg.xml values to dynamically change this
119 e = doc_.createElement("service");
120 e.setAttribute("type", "query");
121 e.setAttribute("name", "TextQuery");
122 Element paramList = doc_.createElement("paramList");
123 Element param;
124 // first do index and level params
125
126 // the index info - read from config file
127 Element index_list = (Element)GSXML.getChildByTagName(info, "indexList");
128 NodeList indexes = index_list.getElementsByTagName("index");
129 int len = indexes.getLength();
130 if (len > 1) {
131 // add level param to list
132 String [] inds = new String[len];
133 for (int i=0; i<len; i++) {
134 inds[i] = ((Element)indexes.item(i)).getAttribute("name");
135 }
136 param = GSXML.createParameter(doc_, "index", GSXML.PARAM_TYPE_ENUM, default_index_, inds);
137 paramList.appendChild(param);
138 } // else dont need to add it
139
140 // the level info - read from config file
141 Element level_list = (Element)GSXML.getChildByTagName(info, "levelList");
142 NodeList levels = level_list.getElementsByTagName("level");
143 len = levels.getLength();
144 if (len > 1) {
145 // add level param to list
146 String [] levs = new String[len];
147 for (int i=0; i<len; i++) {
148 levs[i] = ((Element)levels.item(i)).getAttribute("name");
149 }
150 param = GSXML.createParameter(doc_, "queryLevel", GSXML.PARAM_TYPE_ENUM, default_level_, levs);
151 paramList.appendChild(param);
152 } // else dont need to add it
153
154 // boolean params
155 param = GSXML.createParameter(doc_, "case", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
156 paramList.appendChild(param);
157 param = GSXML.createParameter(doc_, "stem", GSXML.PARAM_TYPE_BOOLEAN, "1", null);
158 paramList.appendChild(param);
159 // enum params
160 String []vals = {"all", "some"};
161 param = GSXML.createParameter(doc_, "matchMode", GSXML.PARAM_TYPE_ENUM, "all", vals);
162 paramList.appendChild(param);
163 String [] vals1 = {"rank", "natural"};
164 param = GSXML.createParameter(doc_, "sortBy", GSXML.PARAM_TYPE_ENUM, "rank", vals1 );
165 paramList.appendChild(param);
166
167 // Integer params
168 param = GSXML.createParameter(doc_, "maxDocs", GSXML.PARAM_TYPE_INTEGER, "10", null);
169 paramList.appendChild(param);
170
171
172 e.appendChild(paramList);
173
174 service_info_map_.put("TextQuery", e);
175
176
177 e = doc_.createElement("service");
178 e.setAttribute("type", "query");
179 e.setAttribute("name", "FieldQuery");
180 service_info_map_.put("FieldQuery", e);
181
182 e = doc_.createElement("service");
183 e.setAttribute("type", "query");
184 e.setAttribute("name", "ResourceRetrieve");
185 service_info_map_.put("ResourceRetrieve", e);
186
187 e = doc_.createElement("service");
188 e.setAttribute("type", "query");
189 e.setAttribute("name", "MetadataRetrieve");
190 service_info_map_.put("MetadataRetrieve", e);
191
192 if (gdbm_src_.openDatabase(GSFile.GDBMDatabaseFile(site_home_, cluster_name_), GDBMWrapper.READER)) {
193 return true;
194 }
195 else {
196 System.err.println("couldn't open gdbm database!");
197 return false;
198 }
199 }
200
201
202 /** passes the request Element to the appropriate service function*/
203 protected Element processService(String name, Element request) {
204
205 // dont need to check that the service name is supported for this particular object, because that has been checked by ServicesImpl
206 if (name.equals("TextQuery")) {
207 return processTextQuery(request);
208 } else if(name.equals("FieldQuery")) {
209 return processFieldQuery(request);
210 } else if (name.equals("ResourceRetrieve")) {
211 return processResourceRetrieve(request);
212 } else if (name.equals("MetadataRetrieve")) {
213 return processMetadataRetrieve(request);
214 }
215
216 System.err.println("MGPPGDBMServices:should never get here. service type wrong:"+name);
217 return null;
218
219
220 }
221
222 /** process a text query */
223 protected Element processTextQuery(Element request) {
224
225 Element result = doc_.createElement("response");
226 String from = GSPath.appendLink(cluster_name_, "TextQuery");
227 result.setAttribute("from", from);
228 result.setAttribute("type", "query");
229
230 // get param list and content
231 Element param_elem=null;
232 Element content_elem=null;
233 Node n = request.getFirstChild();
234 while (n!=null) {
235 String node_name = n.getNodeName();
236 if (node_name.equals("paramList")) {
237 param_elem = (Element)n;
238 } else if (node_name.equals("content")) {
239 content_elem = (Element)n;
240 }
241 n = n.getNextSibling();
242 }
243
244 if (param_elem==null || content_elem==null) {
245 System.out.println("bad query request");
246 return result; // empty result
247 }
248
249 // get the query string
250 String query = GSXML.getNodeText(content_elem);
251 if (query==null) {
252 System.out.println("nothing to query");
253 return result;
254 }
255
256 HashMap params = GSXML.extractParams(param_elem);
257 String index = (String)params.get("index");
258 if (index==null) { // if its not present, use the default index
259 index=default_index_;
260 }
261
262 // now set up the mgpp stuff
263 String basedir = GSFile.collectionBaseDir(site_home_,
264 cluster_name_);
265 String indexdir = GSFile.collectionIndexPath(cluster_name_, index);
266
267 mgpp_src_.loadIndexData(basedir, indexdir);
268 setQueryParams(params);
269
270 mgpp_src_.runQuery(query);
271 MGPPQueryResult mqr= mgpp_src_.getQueryResult();
272
273 // get the docnums out, and convert to HASH ids
274 Vector docs = mqr.getDocs();
275 if (docs.size()==0) {
276 // no docs found
277 return result;
278 }
279 // create a resourceList element
280 Element c = doc_.createElement("content");
281 result.appendChild(c);
282 Element resource_list = doc_.createElement("resourceList");
283 c.appendChild(resource_list);
284 // add each resource
285 for (int d=0; d<docs.size(); d++) {
286 long docnum = ((MGPPDocInfo)docs.elementAt(d)).num_;
287 String id = gdbm_src_.docnum2Oid(docnum);
288 Node no = GSXML.createResourceElement(doc_, id);
289 resource_list.appendChild(no);
290 }
291 return result;
292 }//processTextQuery
293
294 /** process a fielded query */
295 protected Element processFieldQuery(Element request) {
296 Element result = doc_.createElement("response");
297 String from = GSPath.appendLink(cluster_name_, "FieldQuery");
298 result.setAttribute("from", from);
299 result.setAttribute("type", "query");
300
301
302 // dummy result
303 Text t = null;
304 t = doc_.createTextNode("Fieldquery result... ");
305 result.appendChild(t);
306 return result;
307 }
308
309 /** retrieve a document */
310 protected Element processResourceRetrieve(Element request) {
311
312 // where the mgpp text files are
313 String basedir = GSFile.collectionBaseDir(site_home_,
314 cluster_name_);
315 String textdir = GSFile.collectionTextPath(cluster_name_);
316
317 // an empty result
318 Element result = doc_.createElement("response");
319 String from = GSPath.appendLink(cluster_name_, "ResourceRetrieve");
320 result.setAttribute("from", from);
321 result.setAttribute("type", "query");
322 Element result_doc = doc_.createElement("content");
323 result.appendChild(result_doc);
324
325 // get param list and content - this code same as for TextQuery - put
326 // somewhere else?
327 Element param_elem=null;
328 Element content_elem=null;
329 Node n = request.getFirstChild();
330 while (n!=null) {
331 String node_name = n.getNodeName();
332 if (node_name.equals("paramList")) {
333 param_elem = (Element)n;
334 } else if (node_name.equals("content")) {
335 content_elem = (Element)n;
336 }
337 n = n.getNextSibling();
338 }
339
340 if (param_elem==null || content_elem==null) {
341 System.out.println("bad query request");
342 return result; // empty result
343 }
344
345 HashMap params = GSXML.extractParams(param_elem);
346 String resource_type = (String)params.get("resourceType");
347 if (resource_type==null) {
348 System.out.println("no resourceType defined, assume 'core'");
349 // have error? or assume that its core?
350 resource_type="core";
351 }
352
353 if (resource_type.equals("core")) {
354 // get docs from mgpp, and possibly associated files too
355 String level = (String)params.get("level"); // level at which to
356 // retrieve a doc
357 if (level==null) {
358 level=default_level_;
359 }
360 // whether to just get the doc, or all assoc files too.
361 String retrieve_extent = (String)params.get("retrieveExtent");
362 if (retrieve_extent==null) {
363 // assume just the doc
364 retrieve_extent = "docOnly";
365 }
366
367 // always get the doc (for now - this may change later)
368 // get the doc text, and if retrieveExtent = allFiles, get assoc files too
369
370 String []ids = GSXML.getResourceNameList(content_elem);
371 for (int j=0; j<ids.length; j++) {
372 long real_num = gdbm_src_.oid2Docnum(ids[j]);
373 System.out.println("getting doc text, basedir="+basedir+", textdir="+textdir+", level="+level+", num="+real_num);
374 String document = mgpp_src_.getDocument(basedir, textdir, level, real_num);
375 // for now, stick it in a text node - eventually should be parsed as xml??
376
377 // something funny with the doc -
378 Element new_doc = GSXML.createResourceElement(doc_, ids[j]);
379 GSXML.addDocText(doc_, new_doc, document);
380 if (retrieve_extent.equals("allFiles")) {
381 // get the assoc files too
382 }
383 result_doc.appendChild(new_doc);
384 }
385
386 } else if (resource_type.equals("assoc")) {
387 // get associated files
388 String assoc_file_dir = (String)params.get("assocfilepath");
389 if (assoc_file_dir==null) {
390 System.err.println("need assocfilepath to find files");
391
392 } else {
393 String []files = GSXML.getResourceNameList(content_elem);
394 for (int j=0; j<files.length; j++) {
395 Element new_doc = GSXML.createResourceElement(doc_,
396 files[j]);
397 // add the file
398 result_doc.appendChild(new_doc);
399 }
400 }
401 } else {
402 System.out.println("error: resource Type is not legal");
403 }
404
405 return result;
406
407 }
408
409 /** retrieve metadata */
410 protected Element processMetadataRetrieve(Element request) {
411 Element result = doc_.createElement("response");
412 String from = GSPath.appendLink(cluster_name_, "MetadataRetrieve");
413 result.setAttribute("from", from);
414 result.setAttribute("type", "query");
415 Element result_content = doc_.createElement("content");
416 result.appendChild(result_content);
417 Element resource_list = doc_.createElement("resourceList");
418 result_content.appendChild(resource_list);
419 // get the metadata
420
421 Element content = (Element)request.getElementsByTagName("content").item(0);
422 if (content==null) {
423 // error: query had no content!! - should make an error message
424 return result;
425 }
426 String []metas = GSXML.getMetaNameList(content);
427 String []ids = GSXML.getResourceNameList(content);
428 for (int j=0; j<ids.length; j++) { // for each document
429 Element doc = GSXML.createResourceElement(doc_, ids[j]);
430 Element list = GSXML.addMetaList(doc_, doc);
431 DBInfo info = gdbm_src_.getInfo(ids[j]);
432 for (int m=0; m<metas.length; m++) {
433 String value = info.getInfo(metas[m]);
434 GSXML.addMetadata(doc_, list, metas[m], value);
435 }
436 resource_list.appendChild(doc);
437 }
438 return result;
439 }
440
441 // should probably use a list rather than map
442 protected boolean setQueryParams(HashMap params) {
443
444 Set entries = params.entrySet();
445 Iterator i = entries.iterator();
446 while (i.hasNext()) {
447 Map.Entry m = (Map.Entry)i.next();
448 String name = (String)m.getKey();
449 String value = (String)m.getValue();
450
451 if (name.equals("case")) {
452 boolean val = (value.equals("1")?true:false);
453 mgpp_src_.setCase(val);
454 } else if (name.equals("stem")) {
455 boolean val = (value.equals("1")?true:false);
456 mgpp_src_.setStem(val);
457 } else if (name.equals("maxDocs")) {
458 int docs = Integer.parseInt(value);
459 mgpp_src_.setMaxDocs(docs);
460 } else if (name.equals("queryLevel")) {
461 mgpp_src_.setQueryLevel(value);
462 } else if (name.equals("resultLevel")) {
463 mgpp_src_.setReturnLevel(value);
464 } else if (name.equals("matchMode")) {
465 int mode;
466 if (value.equals("all")) mode=1;
467 else mode=0;
468 mgpp_src_.setMatchMode(mode);
469 } else if (name.equals("sortBy")) {
470 if (value.equals("rank")) {
471 mgpp_src_.setSortByRank(true);
472 } else if (value.equals("natural")) {
473 mgpp_src_.setSortByRank(false);
474 }
475 } else if (name.equals("termInfo")) {
476 boolean val = (value.equals("1")?true:false);
477 mgpp_src_.setReturnTerms(val);
478 } // ignore any others
479 }
480 return true;
481 }
482
483
484}
485
486
Note: See TracBrowser for help on using the repository browser.