source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2Retrieve.java@ 4861

Last change on this file since 4861 was 4861, checked in by kjdon, 21 years ago

not using orientation and documentInterleave for classifiers any more. also added in support for parent and ancestor metadata

  • Property svn:keywords set to Author Date Id Revision
File size: 29.2 KB
Line 
1/*
2 * GS2Retrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gdbm.*;
24import org.greenstone.gsdl3.util.*;
25
26// XML classes
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31// General Java classes
32import java.io.File;
33import java.util.StringTokenizer;
34import java.util.Vector;
35import java.util.Set;
36import java.util.Iterator;
37
38/** Implements the generic retrieval and classifier services for GS2
39 * collections.
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
43 * @version $Revision: 4861 $
44 */
45
46public abstract class GS2Retrieve
47 extends ServiceRack {
48
49 // the services on offer
50 // these strings must match what is found in the properties file
51 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
52 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
53 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
54
55
56 // the browsing services - now in here, these will only be advertised if classifiers have been specified in the config file
57 private static final String CLASSIFIER_SERVICE = "ClassifierBrowse";
58 private static final String CLASSIFIER_METADATA_SERVICE = "ClassifierBrowseMetadataRetrieve";
59
60 protected static final String STRUCT_PARAM = "structure";
61 protected static final String INFO_PARAM = "info";
62
63 protected static final String STRUCT_ANCESTORS = "ancestors";
64 protected static final String STRUCT_PARENT = "parent";
65 protected static final String STRUCT_SIBS = "siblings";
66 protected static final String STRUCT_CHILDREN = "children";
67 protected static final String STRUCT_DESCENDS = "descendants";
68
69 protected static final String INFO_NUM_SIBS = "numSiblings";
70 protected static final String INFO_NUM_CHILDREN = "numChildren";
71 protected static final String INFO_SIB_POS = "siblingPosition";
72
73 protected static final int DOCUMENT=1;
74 protected static final int CLASSIFIER=2;
75
76 protected GDBMWrapper gdbm_src_ = null;
77
78 protected Element config_info_ = null; // the xml from the config file
79
80 /** constructor */
81 protected GS2Retrieve()
82 {
83 gdbm_src_ = new GDBMWrapper();
84 }
85
86
87 /** configure this service */
88 public boolean configure(Element info, Element extra_info)
89 {
90 System.out.println("Configuring GS2Retrieve...");
91 config_info_ = info;
92
93 // set up short_service_info_ - for now just has name and type
94 Element dsr_service = doc_.createElement(GSXML.SERVICE_ELEM);
95 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
96 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
97 short_service_info_.appendChild(dsr_service);
98
99 Element dmr_service = doc_.createElement(GSXML.SERVICE_ELEM);
100 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
101 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
102 short_service_info_.appendChild(dmr_service);
103
104 Element dcr_service = doc_.createElement(GSXML.SERVICE_ELEM);
105 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
106 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
107 short_service_info_.appendChild(dcr_service);
108
109 // set up service_info_map_ - for now, just has the same elements as above
110 // should have full details about each service incl params lists etc.
111 service_info_map_.put(DOCUMENT_STRUCTURE_RETRIEVE_SERVICE, dsr_service);
112 service_info_map_.put(DOCUMENT_METADATA_RETRIEVE_SERVICE, dmr_service);
113 service_info_map_.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, dcr_service);
114
115 // Open GDBM database for querying
116 String gdbm_db_file = GSFile.GDBMDatabaseFile(site_home_, cluster_name_);
117 if (!gdbm_src_.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
118 System.err.println("Error: Could not open GDBM database!");
119 return false;
120 }
121
122 // now do the classifier browse service
123
124 // check that there are classifiers specified
125 Element class_list = (Element)GSXML.getChildByTagName(info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
126 if (class_list == null) {
127 // no classifiers specified
128 return true;
129 }
130
131 // get the display and format elements from the coll config file for
132 // the classifiers
133 extractExtraClassifierInfo(info, extra_info);
134 config_info_ = info;
135
136 // short_service_info_ - the browse one
137 Element cb_service = doc_.createElement(GSXML.SERVICE_ELEM);
138 cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
139 cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
140 short_service_info_.appendChild(cb_service);
141
142 // metadata retrieval for the browsing
143 Element cbmr_service = doc_.createElement(GSXML.SERVICE_ELEM);
144 cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
145 cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
146 short_service_info_.appendChild(cbmr_service);
147
148 // service_info_map_
149
150 // the metadata one has the same info for now - should this advertise what metadata is available??
151 Element cbmr_service_info = (Element) cbmr_service.cloneNode(true);
152 service_info_map_.put(CLASSIFIER_METADATA_SERVICE, cbmr_service_info);
153
154 //the browse one
155 Element cb_service_info = (Element)cb_service.cloneNode(true);
156 Element cl_list = doc_.createElement(GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
157 cb_service_info.appendChild(cl_list);
158
159 // the format info
160 Element cb_format_info = doc_.createElement(GSXML.FORMAT_ELEM);
161 boolean format_found = false;
162 // add in to the description a simplified list of classifiers
163 NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
164 for(int i=0; i<classifiers.getLength(); i++) {
165 Element cl = (Element)classifiers.item(i);
166 Element new_cl = (Element)doc_.importNode(cl, false); // just import this node, not the children
167
168 cl_list.appendChild(new_cl);
169
170 // get the format info out, and put inside a classifier element
171 Element format_cl = (Element)new_cl.cloneNode(false);
172 Element format = (Element)GSXML.getChildByTagName(cl, GSXML.FORMAT_ELEM);
173 if (format != null) {
174
175 //copy all the children
176 NodeList elems = format.getChildNodes();
177 for (int j=0; j<elems.getLength();j++) {
178 format_cl.appendChild(doc_.importNode(elems.item(j), true));
179 }
180 cb_format_info.appendChild(format_cl);
181 format_found = true;
182 }
183
184
185 }
186
187
188 service_info_map_.put(CLASSIFIER_SERVICE, cb_service_info);
189 if (format_found) {
190 format_info_map_.put(CLASSIFIER_SERVICE, cb_format_info);
191 }
192
193 // look for document display format
194 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
195 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
196 if (display_format != null) {
197 System.out.println("display format not null!");
198 format_info_map_.put(DOCUMENT_METADATA_RETRIEVE_SERVICE, doc_.importNode(display_format, true));
199 // shoudl we make a copy?
200 }
201 return true;
202 }
203
204 /** this looks for any classifier specific display or format info from extra_info and adds it in to the correct place in info */
205 protected boolean extractExtraClassifierInfo(Element info, Element extra_info) {
206
207 if (extra_info == null) {
208 return false;
209 }
210
211 Document owner = info.getOwnerDocument();
212 // so far we have display and format elements that we need for classifiers
213 NodeList classifiers = info.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
214 Element config_browse = (Element)GSXML.getChildByTagName(extra_info, GSXML.BROWSE_ELEM);
215
216 for (int i=0; i<classifiers.getLength();i++) {
217 Element cl = (Element)classifiers.item(i);
218 String name = cl.getAttribute(GSXML.NAME_ATT);
219 Element node_extra = GSXML.getNamedElement(config_browse,
220 GSXML.CLASSIFIER_ELEM,
221 GSXML.NAME_ATT,
222 name);
223 if (node_extra == null) {
224 System.err.println("GS2REtrieve: haven't found extra info for classifier named "+name);
225 continue;
226 }
227
228 // get the display elements if any - displayName
229 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAYNAME_ELEM);
230 if (display_names !=null) {
231 Element display = owner.createElement(GSXML.DISPLAY_ELEM);
232 for (int j=0; j<display_names.getLength(); j++) {
233 Element e = (Element)display_names.item(j);
234
235 Element display_name = GSXML.createTextElement(owner, GSXML.DISPLAY_NAME_ELEM, GSXML.getNodeText(e));
236 display_name.setAttribute(GSXML.LANG_ATT, e.getAttribute(GSXML.LANG_ATT));
237 display.appendChild(display_name);
238 }
239 cl.appendChild(display);
240 }
241
242 // get the format element if any
243 Element format = (Element)GSXML.getChildByTagName(node_extra, GSXML.FORMAT_ELEM);
244 if (format==null) { // try a generic one that applies to all classifiers
245 format = (Element)GSXML.getChildByTagName(extra_info,
246 GSXML.FORMAT_ELEM);
247 }
248 if (format!=null) { // append to index info
249 cl.appendChild(owner.importNode(format, true));
250 }
251 } // for each classifier
252 return true;
253 }
254
255
256 /** creates a display element containing all the text strings needed to display the service page, in the language specified
257 * the retrieval services dont get displayed to the users - they are only used internally by the actions. so this returns an empty display element
258 * for those services. CLASSIFIER_BROWSE service returns some info */
259 protected Element createServiceDisplay(String service, String lang)
260 {
261 Element display = doc_.createElement(GSXML.DISPLAY_ELEM);
262 if (!service.equals(CLASSIFIER_SERVICE)) {
263 return display;
264 }
265
266 // CLASSIFIER_SERVICE
267 display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM, getTextString(service+".name", lang)));
268 //display.appendChild(GSXML.createTextElement(doc_, GSXML.DISPLAY_SUBMIT_ELEM, getTextString(service+".submit", lang)));
269
270 // need to add in the classifier name info
271 // add <classifier name="CL1">text name</classifier> to the
272 // display node
273 NodeList classifiers = config_info_.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
274 for (int i=0; i<classifiers.getLength(); i++) {
275 Element cl = (Element)classifiers.item(i);
276 Element disp = (Element)GSXML.getChildByTagName(cl, GSXML.DISPLAY_ELEM);
277 String text = null;
278 if (disp !=null) {
279 text = GSXML.getDisplayText(disp,
280 GSXML.DISPLAY_NAME_ELEM,
281 lang, "en");
282 }
283 if (text == null || text.equals("")) {
284 // no display element was specified, use the metadata name
285 // for now this looks in the class properties file
286 // this needs to use a general metadata thing instead
287 text = getTextString(cl.getAttribute(GSXML.CLASSIFIER_CONTENT_ATT), lang);
288 }
289
290 Element cl_elem = doc_.createElement(GSXML.CLASSIFIER_ELEM);
291 cl_elem.setAttribute(GSXML.NAME_ATT, cl.getAttribute(GSXML.NAME_ATT));
292 Element cl_name = GSXML.createTextElement(doc_, GSXML.DISPLAY_NAME_ELEM, text);
293 cl_elem.appendChild(cl_name);
294
295 display.appendChild(cl_elem);
296
297 }
298
299 return display;
300
301 }
302
303 /** parent is true if this node is definitely the parent of something,
304 * child is true is it definitely is a child of something - just for efficiency purposes */
305 protected Element createDocNode(String node_id, boolean parent, boolean child) {
306
307 // create this here or pass it in?
308 DBInfo info = gdbm_src_.getInfo(node_id);
309 Element node;
310 if (isClassifier(node_id)) {
311 node = doc_.createElement(GSXML.CLASS_NODE_ELEM);
312 //String childtype = info.getInfo("childtype");
313 //String orientation="";
314 //if (childtype.equals("HList")) {
315 // orientation = "horizontal";
316 //} else { // assume vertical
317 // orientation = "vertical";
318 //}
319 //node.setAttribute(GSXML.CLASS_NODE_ORIENTATION_ATT, orientation);
320 } else {
321
322 node = doc_.createElement(GSXML.DOC_NODE_ELEM);
323
324 String top_id = OID.getTop(node_id);
325 boolean is_top = (top_id.equals(node_id) ? true : false);
326
327 String children = info.getInfo("contains");
328 boolean is_leaf = (children.equals("") ? true : false);
329
330 // set teh node type att
331 if (is_top) {
332 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
333 } else if (is_leaf) {
334 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
335 } else {
336 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
337 }
338
339 // set teh doc type att
340 if (is_top && is_leaf) { // a single section document
341 node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
342
343 } else {
344
345 if (!is_top) { // we need to look at the top info
346 info = gdbm_src_.getInfo(top_id);
347 }
348
349 String childtype = info.getInfo("childtype");
350 if (childtype.equals("Paged")) {
351 node.setAttribute(GSXML.DOC_TYPE_ATT, "paged");
352 } else {
353 node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
354 }
355 }
356
357 }
358 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
359 return node;
360
361 }
362 /** Returns the parent of a specified documentID, or null if none exists */
363 protected Element getParent(String doc_id)
364 {
365 String parent_id = OID.getParent(doc_id);
366 if (parent_id.equals(doc_id))
367 return null;
368
369 return createDocNode(parent_id, true, false);
370 }
371
372
373 /** adds all the children of doc_id the the doc element,
374 * and if recursive=true, adds all their children as well*/
375 protected void addDescendants(Element doc, String doc_id,
376 boolean recursive)
377 {
378 DBInfo info = gdbm_src_.getInfo(doc_id);
379 String contains = info.getInfo("contains");
380
381 StringTokenizer st = new StringTokenizer(contains, ";");
382 while (st.hasMoreTokens()) {
383 String child_id = st.nextToken().replaceAll("\"", doc_id);
384 Element child = createDocNode(child_id, false, true);
385 doc.appendChild(child);
386
387 // Apply recursively, if desired
388 if (recursive) {
389 addDescendants(child, child_id, recursive);
390 }
391
392 }
393 }
394
395 /** adds all the siblings of current_id to the parent element. */
396 protected Element addSiblings(Element parent, String parent_id, String current_id) {
397 Element current_node = (Element)parent.getFirstChild();
398 if (current_node ==null) {
399 // create a sensible error message
400 System.err.println("Error: there should be a first child.");
401 return null;
402 }
403 // remove the current child,- will add it in later in its correct place
404 parent.removeChild(current_node);
405
406 // add in all the siblings,
407 addDescendants(parent, parent_id, false);
408
409 // find the node that is now the current node
410 // this assumes that the new node that was created is the same as
411 // the old one that was removed - we may want to replace the new one
412 // with the old one.
413 Element new_current = GSXML.getNamedElement(parent, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
414 return new_current;
415
416 }
417 /** Returns true if the OID specifies a leaf node, false otherwise
418 Note: this makes a request to the GDBM database so it may not be
419 a particularly cheap operation */
420 protected boolean isLeafNode(String oid)
421 {
422 DBInfo info = gdbm_src_.getInfo(oid);
423 String children = info.getInfo("contains");
424 return (children.equals(""));
425 }
426
427 // for now just use CL for classifiers - should have a type? in teh gdbm
428 // database.
429 protected boolean isClassifier(String oid) {
430 if (oid.startsWith("CL")) {
431 return true;
432 }
433 return false;
434 }
435
436 protected Element processDocumentStructureRetrieve(Element request) {
437 return genericStructureRetrieve(request, DOCUMENT);
438 }
439
440 protected Element processClassifierBrowse(Element request) {
441 return genericStructureRetrieve(request, CLASSIFIER);
442 }
443
444 /** Retrieve the structure of a document */
445 protected Element genericStructureRetrieve(Element request, int type)
446 {
447 // Create a new (empty) result message
448 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
449
450 String node_name;
451 String service_name;
452 if (type==DOCUMENT) {
453 service_name = DOCUMENT_STRUCTURE_RETRIEVE_SERVICE;
454 node_name = GSXML.DOC_NODE_ELEM;
455 } else {
456 service_name = CLASSIFIER_SERVICE;
457 node_name = GSXML.CLASS_NODE_ELEM;
458 }
459
460 result.setAttribute(GSXML.FROM_ATT, service_name);
461 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
462
463 // Get the parameters of the request
464 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
465 if (param_list == null) {
466 System.err.println("Error: DocumentStructureRetrieve request had no paramList.");
467 return result; // Return the empty result
468 }
469
470 // the type of info required
471 boolean want_structure = false;
472 boolean want_info = false;
473
474 Vector info_types=new Vector();
475 // The document structure information desired
476 boolean want_ancestors = false;
477 boolean want_parent = false;
478 boolean want_siblings = false;
479 boolean want_children = false;
480 boolean want_descendants = false;
481
482 // Process the request parameters
483 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
484 for (int i=0; i<params.getLength();i++) {
485
486 Element param = (Element)params.item(i);
487 String p_name = param.getAttribute(GSXML.NAME_ATT);
488 String p_value = GSXML.getValue(param);
489 // Identify the structure information desired
490 if (p_name.equals(STRUCT_PARAM)) {
491 want_structure = true;
492
493 // This is NOT locale sensitive
494 if (p_value.equals(STRUCT_ANCESTORS))
495 want_ancestors = true;
496 else if (p_value.equals(STRUCT_PARENT))
497 want_parent = true;
498 else if (p_value.equals(STRUCT_SIBS))
499 want_siblings = true;
500 else if (p_value.equals(STRUCT_CHILDREN))
501 want_children = true;
502 else if (p_value.equals(STRUCT_DESCENDS))
503 want_descendants = true;
504 else
505 System.err.println("Warning: Unknown value \"" + p_value + "\".");
506 } else if (p_name.equals(INFO_PARAM)) {
507 want_info = true;
508 info_types.add(p_value);
509 }
510 }
511
512 // Make sure there is no repeated information
513 if (want_ancestors)
514 want_parent = false;
515 if (want_descendants)
516 want_children = false;
517
518
519 Element query_doc_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
520 if (query_doc_list == null) {
521 System.err.println("Error: DocumentStructureRetrieve request specified no doc nodes.\n");
522 return result;
523 }
524
525 Element doc_list = doc_.createElement(node_name+GSXML.LIST_MODIFIER);
526 result.appendChild(doc_list);
527
528 // Get the documents
529 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
530 GSXML.NODE_ID_ATT);
531 for (int i = 0; i < doc_ids.length; i++) {
532 String doc_id = doc_ids[i];
533
534 System.out.println("doc_id = "+doc_id);
535 if (OID.needsTranslating(doc_id)) {
536 doc_id = gdbm_src_.translateOID(doc_id);
537 System.out.println("translated doc_id = "+doc_id);
538 }
539
540 // Add the document to the list
541 Element doc = doc_.createElement(node_name);
542 doc_list.appendChild(doc);
543 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
544
545
546 if (want_info) {
547
548 Element info_elem = doc_.createElement("nodeStructureInfo");
549 doc.appendChild(info_elem);
550
551 for (int j=0; j<info_types.size(); j++) {
552 String info_type = (String)info_types.get(j);
553 Element inf = getInfo(doc_id, info_type);
554 if (inf != null) {
555 info_elem.appendChild(inf);
556 }
557 }
558 }
559 if (want_structure) {
560 // all structure info goes into a nodeStructure elem
561 Element structure_elem = doc_.createElement(GSXML.NODE_STRUCTURE_ELEM);
562 doc.appendChild(structure_elem);
563
564 // Add the requested structure information
565 Element current = createDocNode(doc_id, false, false);
566
567 //Ancestors: continually add parent nodes until the root is reached
568 Element top_node = current; // the top node so far
569 if (want_ancestors) {
570 String current_id = doc_id;
571 while (true) {
572 Element parent = getParent(current_id);
573 if (parent == null)
574 break;
575
576 parent.appendChild(top_node);
577 current_id = parent.getAttribute(GSXML.NODE_ID_ATT);
578 top_node = parent;
579 }
580 }
581 // Parent: get the parent of the selected node
582 if (want_parent) {
583 Element parent = getParent(doc_id);
584 if (parent != null) {
585 parent.appendChild(current);
586 top_node = parent;
587 }
588 }
589
590
591 // now the top node is the root of the structure
592 structure_elem.appendChild(top_node);
593
594 //Siblings: get the other descendants of the selected node's parent
595 if (want_siblings) {
596 Element parent = (Element)current.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
597 String parent_id = OID.getParent(doc_id);
598
599 // add siblings, - returns a pointer to the new current node
600 current = addSiblings(parent, parent_id, doc_id);
601 }
602
603 // Children: get the descendants, but only one level deep
604 if (want_children)
605 addDescendants(current, doc_id, false);
606 // Descendants: recursively get every descendant of the selected node
607 if (want_descendants)
608 addDescendants(current, doc_id, true);
609 } // if want structure
610 } // for each doc
611 return result;
612 }
613
614
615 protected Element processDocumentMetadataRetrieve(Element request) {
616 return genericMetadataRetrieve(request, DOCUMENT);
617 }
618
619 protected Element processClassifierBrowseMetadataRetrieve(Element request) {
620 return genericMetadataRetrieve(request, CLASSIFIER);
621 }
622
623
624 /** Retrieve metadata associated with a document or classifier node*/
625 protected Element genericMetadataRetrieve(Element request, int type)
626 {
627 // Create a new (empty) result message
628 Element result = doc_.createElement(GSXML.RESPONSE_ELEM);
629
630 String node_name;
631
632 String service_name;
633 if (type==DOCUMENT) {
634 service_name = DOCUMENT_METADATA_RETRIEVE_SERVICE;
635 node_name = GSXML.DOC_NODE_ELEM;
636 } else {
637 service_name = CLASSIFIER_METADATA_SERVICE;
638 node_name = GSXML.CLASS_NODE_ELEM;
639 }
640 result.setAttribute(GSXML.FROM_ATT, service_name);
641 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
642
643 // Get the parameters of the request
644 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
645 if (param_list == null) {
646 System.err.println("GS2Retrieve, DocumentMetadataRetrieve Error: missing paramList.\n");
647 return result; // Return the empty result
648 }
649
650 // The metadata information required
651 Vector metadata_list = new Vector();
652 boolean all_metadata = false;
653 // Process the request parameters
654 Element param = (Element) param_list.getFirstChild();
655 while (param != null) {
656 // Identify the metadata information desired
657 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
658 String metadata = GSXML.getValue(param);
659 if (metadata.equals("all")) {
660 all_metadata = true;
661 break;
662 }
663 metadata_list.add(metadata);
664 }
665 param = (Element) param.getNextSibling();
666 }
667
668 Element node_list = doc_.createElement(node_name+GSXML.LIST_MODIFIER);
669 result.appendChild(node_list);
670
671 // Get the documents
672 Element request_node_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
673 if (request_node_list == null) {
674 System.err.println("Error: DocumentMetadataRetrieve request had no "+node_name+"List.\n");
675 return result;
676 }
677
678 NodeList request_nodes = request_node_list.getChildNodes();
679 for (int i = 0; i < request_nodes.getLength(); i++) {
680 Element request_node = (Element) request_nodes.item(i);
681 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
682
683 if (OID.needsTranslating(node_id)) {
684 node_id = gdbm_src_.translateOID(node_id);
685 }
686
687 // Add the document to the list
688 Element new_node = (Element)doc_.importNode(request_node, false);
689 node_list.appendChild(new_node);
690
691 // Add the requested metadata information
692 Element node_meta_list = doc_.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
693 new_node.appendChild(node_meta_list);
694 DBInfo info = gdbm_src_.getInfo(node_id);
695 if (info == null) {// I have had a case where it is null!
696 continue;
697 }
698 System.out.println("found the info");
699 if (all_metadata) {
700 System.out.println("trying to get all the metadata");
701 // return everything out of the database
702 Set keys = info.getKeys();
703 Iterator it = keys.iterator();
704 while(it.hasNext()) {
705 String key = (String)it.next();
706 System.out.println("getting metadata "+key);
707 String value = info.getInfo(key);
708 GSXML.addMetadata(doc_, node_meta_list, key, value);
709 }
710 } else { // just get the selected ones
711
712 for (int m = 0; m < metadata_list.size(); m++) {
713 String metadata = (String) metadata_list.get(m);
714 String value = getMetadata(node_id, info, metadata);
715 GSXML.addMetadata(doc_, node_meta_list, metadata, value);
716 }
717 }
718 //String v = getMetadata (node_id, info, "parent_Title");
719 //System.out.println("parent title = "+v);
720 //v = getMetadata (node_id, info, "ancestors': '_Title");
721 //System.out.println("ancestortitle = "+v);
722 }
723
724 return result;
725 }
726
727 protected final char RELATION_SEP_CHAR = '_';
728 protected final String SEPARATOR_SEP_STRING = "'";
729
730 protected String getMetadata(String node_id, DBInfo info,
731 String metadata) {
732 int pos = metadata.indexOf(RELATION_SEP_CHAR);
733 if (pos ==-1) {
734 return info.getInfo(metadata);
735 }
736
737 String relation = metadata.substring(0, pos);
738 if (relation.equals("parent") || relation.startsWith("ancestors")) {
739 String parent_id = OID.getParent(node_id);
740 if (parent_id.equals(node_id)){
741 // no parents or ancestors
742 return "";
743 }
744 DBInfo parent_info = gdbm_src_.getInfo(parent_id);
745 if (parent_info == null) return "";
746 String new_meta = metadata.substring(pos+1);
747 if (relation.equals("parent")) {
748 return parent_info.getInfo(new_meta);
749 }
750 // do ancestor stuff
751 // get teh separating string
752 String items [] = relation.split(SEPARATOR_SEP_STRING);
753 String separator = "";
754 if (items.length > 1) {
755 separator = items[1];
756 relation = items[0];
757 }
758
759 StringBuffer value = new StringBuffer();
760 value.append(parent_info.getInfo(new_meta));
761 String current_id = parent_id;
762 parent_id = OID.getParent(current_id);
763 while (!parent_id.equals(current_id)) {
764 parent_info = gdbm_src_.getInfo(parent_id);
765 if (parent_info == null) return value.toString();
766 value.insert(0, separator);
767 value.insert(0, parent_info.getInfo(new_meta));
768
769 current_id = parent_id;
770 parent_id = OID.getParent(current_id);
771 }
772
773 return value.toString();
774 }
775
776
777 // its not a relation after all
778 return info.getInfo(metadata);
779
780 }
781
782 /** Retrieve the content of a document - implemented by concrete subclasses */
783 protected abstract Element processDocumentContentRetrieve(Element request);
784
785 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
786 protected String resolveImages(String doc_content, String doc_id)
787 {
788 String top_doc_id = OID.getTop(doc_id);
789 DBInfo info = gdbm_src_.getInfo(top_doc_id);
790 String archivedir = info.getInfo("archivedir");
791 String image_dir = site_http_address_ + "/collect/"+cluster_name_+"/index/assoc/"+archivedir;
792
793 // Resolve all "_httpdocimg_"s
794 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
795 return doc_content;
796 }
797
798 protected Element getInfo(String doc_id, String info_type) {
799
800 String value="";
801 if (info_type.equals(INFO_NUM_SIBS)) {
802 String parent_id = OID.getParent(doc_id);
803 if (parent_id.equals(doc_id)) {
804 value="0";
805 } else {
806 value = String.valueOf(getNumChildren(parent_id));
807 }
808 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
809 value = String.valueOf(getNumChildren(doc_id));
810 } else if (info_type.equals(INFO_SIB_POS)) {
811 String parent_id = OID.getParent(doc_id);
812 if (parent_id.equals(doc_id)) {
813 value="-1";
814 } else {
815 DBInfo info = gdbm_src_.getInfo(parent_id);
816 String contains = info.getInfo("contains");
817 contains = contains.replaceAll("\"", parent_id);
818 String [] children = contains.split(";");
819 for (int i=0;i<children.length;i++) {
820 String child_id = children[i];
821 System.out.println("child="+child_id+" doc="+doc_id);
822 if (child_id.equals(doc_id)) {
823 value = String.valueOf(i+1); // make it from 1 to length
824 break;
825 }
826 }
827 }
828 } else {
829 return null;
830 }
831 Element info_elem = doc_.createElement("info");
832 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
833 info_elem.setAttribute(GSXML.VALUE_ATT, value);
834 return info_elem;
835 }
836
837 protected int getNumChildren(String doc_id) {
838 DBInfo info = gdbm_src_.getInfo(doc_id);
839 String contains = info.getInfo("contains");
840 if (contains.equals("")) {
841 return 0;
842 }
843 String [] children = contains.split(";");
844 return children.length;
845 }
846
847}
Note: See TracBrowser for help on using the repository browser.