source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2Retrieve.java@ 6247

Last change on this file since 6247 was 6247, checked in by kjdon, 20 years ago

changed the metadata handling to use metadata names in the form all_relation_'sep'_meta, where all means to get multiple values for each node, relation can be parent, ancestors etc, sep is the separator string to put between values. All the preceeding bits are optional.

  • Property svn:keywords set to Author Date Id Revision
File size: 32.0 KB
Line 
1/*
2 * GS2Retrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gdbm.*;
24import org.greenstone.gsdl3.util.*;
25
26// XML classes
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31// General Java classes
32import java.io.File;
33import java.util.StringTokenizer;
34import java.util.Vector;
35import java.util.Set;
36import java.util.Iterator;
37
38/** Implements the generic retrieval and classifier services for GS2
39 * collections.
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
43 * @version $Revision: 6247 $
44 */
45
46public abstract class GS2Retrieve
47 extends ServiceRack {
48
49 // the services on offer
50 // these strings must match what is found in the properties file
51 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
52 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
53 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
54
55
56 // the browsing services - now in here, these will only be advertised if classifiers have been specified in the config file
57 private static final String CLASSIFIER_SERVICE = "ClassifierBrowse";
58 private static final String CLASSIFIER_METADATA_SERVICE = "ClassifierBrowseMetadataRetrieve";
59
60 protected static final String STRUCT_PARAM = "structure";
61 protected static final String INFO_PARAM = "info";
62
63 protected static final String STRUCT_ANCESTORS = "ancestors";
64 protected static final String STRUCT_PARENT = "parent";
65 protected static final String STRUCT_SIBS = "siblings";
66 protected static final String STRUCT_CHILDREN = "children";
67 protected static final String STRUCT_DESCENDS = "descendants";
68 protected static final String STRUCT_ENTIRE = "entire";
69
70 protected static final String INFO_NUM_SIBS = "numSiblings";
71 protected static final String INFO_NUM_CHILDREN = "numChildren";
72 protected static final String INFO_SIB_POS = "siblingPosition";
73
74 protected static final int DOCUMENT=1;
75 protected static final int CLASSIFIER=2;
76
77 protected GDBMWrapper gdbm_src = null;
78 protected Element config_info = null; // the xml from the config file
79
80 /** constructor */
81 protected GS2Retrieve()
82 {
83 this.gdbm_src = new GDBMWrapper();
84 }
85
86
87 /** configure this service */
88 public boolean configure(Element info, Element extra_info)
89 {
90 System.out.println("Configuring GS2Retrieve...");
91 this.config_info = info;
92
93 // set up short_service_info_ - for now just has name and type
94 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
95 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
96 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
97 this.short_service_info.appendChild(dsr_service);
98
99 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
100 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
101 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
102 this.short_service_info.appendChild(dmr_service);
103
104 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
105 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
106 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
107 this.short_service_info.appendChild(dcr_service);
108
109 // Open GDBM database for querying
110 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
111 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
112 System.err.println("GS2Retrieve Error: Could not open GDBM database!");
113 return false;
114 }
115
116 // now do the classifier browse service
117
118 // check that there are classifiers specified
119 Element class_list = (Element)GSXML.getChildByTagName(info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
120 if (class_list == null) {
121 // no classifiers specified
122 return true;
123 }
124
125 // get the display and format elements from the coll config file for
126 // the classifiers
127 extractExtraClassifierInfo(info, extra_info);
128
129 this.config_info = info;
130
131 // short_service_info_ - the browse one
132 Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
133 cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
134 cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
135 this.short_service_info.appendChild(cb_service);
136
137 // metadata retrieval for the browsing
138 Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
139 cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
140 cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
141 this.short_service_info.appendChild(cbmr_service);
142
143 // the format info
144 Element cb_format_info = this.doc.createElement(GSXML.FORMAT_ELEM);
145 boolean format_found = false;
146
147 // try the default format first
148 Element def_format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
149 if (def_format != null) {
150 cb_format_info.appendChild(GSXML.duplicateWithNewName(this.doc, def_format, GSXML.DEFAULT_ELEM, true));
151 format_found = true;
152 }
153
154 // add in to the description a simplified list of classifiers
155 NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
156 for(int i=0; i<classifiers.getLength(); i++) {
157 Element cl = (Element)classifiers.item(i);
158 Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
159
160 // get the format info out, and put inside a classifier element
161 Element format_cl = (Element)new_cl.cloneNode(false);
162 Element format = (Element)GSXML.getChildByTagName(cl, GSXML.FORMAT_ELEM);
163 if (format != null) {
164
165 //copy all the children
166 NodeList elems = format.getChildNodes();
167 for (int j=0; j<elems.getLength();j++) {
168 format_cl.appendChild(this.doc.importNode(elems.item(j), true));
169 }
170 cb_format_info.appendChild(format_cl);
171 format_found = true;
172 }
173
174
175 }
176
177 if (format_found) {
178 this.format_info_map.put(CLASSIFIER_SERVICE, cb_format_info);
179 }
180
181 // look for document display format
182 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
183 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
184 if (display_format != null) {
185 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
186 // shoudl we make a copy?
187 }
188 return true;
189 }
190
191 protected Element getServiceDescription(String service_id, String lang, String subset) {
192
193 if (service_id.equals(CLASSIFIER_SERVICE)) {
194
195 Element class_list = (Element)GSXML.getChildByTagName(this.config_info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
196 if (class_list == null) {
197 // no classifiers specified
198 return null;
199 }
200
201 Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
202 cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
203 cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
204 cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(CLASSIFIER_SERVICE+".name", lang)));
205 cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(CLASSIFIER_SERVICE+".description", lang)));
206
207 Element cl_list = this.doc.createElement(GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
208 cb_service.appendChild(cl_list);
209 NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
210 for(int i=0; i<classifiers.getLength(); i++) {
211 Element cl = (Element)classifiers.item(i);
212 Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
213 String content = cl.getAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
214 cl_list.appendChild(new_cl);
215 String text = GSXML.getDisplayText(cl,
216 GSXML.DISPLAY_TEXT_NAME,
217 lang, "en");
218 if (text == null || text.equals("")) {
219 // no display element was specified, use the metadata name
220 // for now this looks in the class properties file
221 // this needs to use a general metadata thing instead
222 text = getMetadataNameText(content+".buttonname", lang);
223 }
224 if (text == null) {
225 text = content;
226 }
227
228 Element cl_name = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, text);
229 new_cl.appendChild(cl_name);
230
231 // description
232
233 String meta_name = getMetadataNameText(content, lang);
234 if (meta_name==null) {
235 meta_name = content;
236 }
237 String [] array = {meta_name};
238 String description = getTextString("ClassifierBrowse.classifier_help", array, lang);
239 Element cl_desc = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, description);
240 new_cl.appendChild(cl_desc);
241
242 }
243 return cb_service;
244 }
245
246 // these ones are probably never called, but put them here just in case
247
248 if (service_id.equals(CLASSIFIER_METADATA_SERVICE)) {
249
250 Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
251 cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
252 cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
253 return cbmr_service;
254 }
255
256 if (service_id.equals(DOCUMENT_STRUCTURE_RETRIEVE_SERVICE)) {
257 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
258 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
259 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
260 return dsr_service;
261 }
262 if (service_id.equals(DOCUMENT_METADATA_RETRIEVE_SERVICE)) {
263 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
264 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
265 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
266 return dmr_service;
267 }
268
269 if (service_id.equals(DOCUMENT_CONTENT_RETRIEVE_SERVICE)) {
270 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
271 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
272 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
273 return dcr_service;
274 }
275
276 return null;
277 }
278
279 /** this looks for any classifier specific display or format info from extra_info and adds it in to the correct place in info */
280 protected boolean extractExtraClassifierInfo(Element info, Element extra_info) {
281
282 if (extra_info == null) {
283 return false;
284 }
285
286 Document owner = info.getOwnerDocument();
287 // so far we have display and format elements that we need for classifiers
288 NodeList classifiers = info.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
289 Element config_browse = (Element)GSXML.getChildByTagName(extra_info, GSXML.BROWSE_ELEM);
290
291 for (int i=0; i<classifiers.getLength();i++) {
292 Element cl = (Element)classifiers.item(i);
293 String name = cl.getAttribute(GSXML.NAME_ATT);
294 Element node_extra = GSXML.getNamedElement(config_browse,
295 GSXML.CLASSIFIER_ELEM,
296 GSXML.NAME_ATT,
297 name);
298 if (node_extra == null) {
299 System.err.println("GS2REtrieve: haven't found extra info for classifier named "+name);
300 continue;
301 }
302
303 // get the display elements if any - displayName
304 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
305 if (display_names !=null) {
306 Element display = owner.createElement(GSXML.DISPLAY_ELEM);
307 for (int j=0; j<display_names.getLength(); j++) {
308 Element e = (Element)display_names.item(j);
309 cl.appendChild(owner.importNode(e, true));
310
311 }
312 }
313
314 // get the format element if any
315 Element format = (Element)GSXML.getChildByTagName(node_extra, GSXML.FORMAT_ELEM);
316 if (format==null) { // try a generic one that applies to all classifiers
317 format = (Element)GSXML.getChildByTagName(extra_info,
318 GSXML.FORMAT_ELEM);
319 }
320 if (format!=null) { // append to index info
321 cl.appendChild(owner.importNode(format, true));
322 }
323 } // for each classifier
324
325 // now check for default format info
326 Element default_format = (Element)GSXML.getChildByTagName(config_browse, GSXML.FORMAT_ELEM);
327 if (default_format!=null) { // append to info
328 info.appendChild(owner.importNode(default_format, true));
329 }
330
331 return true;
332 }
333
334
335 /** parent is true if this node is definitely the parent of something,
336 * child is true is it definitely is a child of something - just for efficiency purposes */
337 protected Element createDocNode(String node_id, boolean parent, boolean child) {
338
339 // create this here or pass it in?
340 DBInfo info = this.gdbm_src.getInfo(node_id);
341 Element node;
342 if (isClassifier(node_id)) {
343 node = this.doc.createElement(GSXML.CLASS_NODE_ELEM);
344 //String childtype = info.getInfo("childtype");
345 //String orientation="";
346 //if (childtype.equals("HList")) {
347 // orientation = "horizontal";
348 //} else { // assume vertical
349 // orientation = "vertical";
350 //}
351 //node.setAttribute(GSXML.CLASS_NODE_ORIENTATION_ATT, orientation);
352 } else {
353
354 node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
355
356 String top_id = OID.getTop(node_id);
357 boolean is_top = (top_id.equals(node_id) ? true : false);
358
359 String children = info.getInfo("contains");
360 boolean is_leaf = (children.equals("") ? true : false);
361
362 // set teh node type att
363 if (is_top) {
364 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
365 } else if (is_leaf) {
366 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
367 } else {
368 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
369 }
370
371 // set teh doc type att
372 if (is_top && is_leaf) { // a single section document
373 node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
374
375 } else {
376
377 if (!is_top) { // we need to look at the top info
378 info = this.gdbm_src.getInfo(top_id);
379 }
380
381 String childtype = info.getInfo("childtype");
382 if (childtype.equals("Paged")) {
383 node.setAttribute(GSXML.DOC_TYPE_ATT, "paged");
384 } else {
385 node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
386 }
387 }
388
389 }
390 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
391 return node;
392
393 }
394 /** Returns the parent of a specified documentID, or null if none exists */
395 protected Element getParent(String doc_id)
396 {
397 String parent_id = OID.getParent(doc_id);
398 if (parent_id.equals(doc_id))
399 return null;
400
401 return createDocNode(parent_id, true, false);
402 }
403
404
405 /** adds all the children of doc_id the the doc element,
406 * and if recursive=true, adds all their children as well*/
407 protected void addDescendants(Element doc, String doc_id,
408 boolean recursive)
409 {
410 DBInfo info = this.gdbm_src.getInfo(doc_id);
411 String contains = info.getInfo("contains");
412
413 StringTokenizer st = new StringTokenizer(contains, ";");
414 while (st.hasMoreTokens()) {
415 String child_id = st.nextToken().replaceAll("\"", doc_id);
416 Element child = createDocNode(child_id, false, true);
417 doc.appendChild(child);
418
419 // Apply recursively, if desired
420 if (recursive) {
421 addDescendants(child, child_id, recursive);
422 }
423
424 }
425 }
426
427 /** adds all the siblings of current_id to the parent element. */
428 protected Element addSiblings(Element parent, String parent_id, String current_id) {
429 Element current_node = (Element)parent.getFirstChild();
430 if (current_node ==null) {
431 // create a sensible error message
432 System.err.println("GS2Retrieve Error: there should be a first child.");
433 return null;
434 }
435 // remove the current child,- will add it in later in its correct place
436 parent.removeChild(current_node);
437
438 // add in all the siblings,
439 addDescendants(parent, parent_id, false);
440
441 // find the node that is now the current node
442 // this assumes that the new node that was created is the same as
443 // the old one that was removed - we may want to replace the new one
444 // with the old one.
445 Element new_current = GSXML.getNamedElement(parent, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
446 return new_current;
447
448 }
449 /** Returns true if the OID specifies a leaf node, false otherwise
450 Note: this makes a request to the GDBM database so it may not be
451 a particularly cheap operation */
452 protected boolean isLeafNode(String oid)
453 {
454 DBInfo info = this.gdbm_src.getInfo(oid);
455 String children = info.getInfo("contains");
456 return (children.equals(""));
457 }
458
459 // for now just use CL for classifiers - should have a type? in teh gdbm
460 // database.
461 protected boolean isClassifier(String oid) {
462 if (oid.startsWith("CL")) {
463 return true;
464 }
465 return false;
466 }
467
468 protected Element processDocumentStructureRetrieve(Element request) {
469 return genericStructureRetrieve(request, DOCUMENT);
470 }
471
472 protected Element processClassifierBrowse(Element request) {
473 return genericStructureRetrieve(request, CLASSIFIER);
474 }
475
476 /** Retrieve the structure of a document */
477 protected Element genericStructureRetrieve(Element request, int type)
478 {
479 // Create a new (empty) result message
480 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
481
482 String node_name;
483 String service_name;
484 if (type==DOCUMENT) {
485 service_name = DOCUMENT_STRUCTURE_RETRIEVE_SERVICE;
486 node_name = GSXML.DOC_NODE_ELEM;
487 } else {
488 service_name = CLASSIFIER_SERVICE;
489 node_name = GSXML.CLASS_NODE_ELEM;
490 }
491
492 result.setAttribute(GSXML.FROM_ATT, service_name);
493 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
494
495
496 Element query_doc_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
497 if (query_doc_list == null) {
498 System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request specified no doc nodes.\n");
499 return result;
500 }
501
502 // Get the parameters of the request
503 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
504 if (param_list == null) {
505 System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request had no paramList.");
506 return result; // Return the empty result
507 }
508
509 // the type of info required
510 boolean want_structure = false;
511 boolean want_info = false;
512
513 Vector info_types=new Vector();
514 // The document structure information desired
515 boolean want_ancestors = false;
516 boolean want_parent = false;
517 boolean want_siblings = false;
518 boolean want_children = false;
519 boolean want_descendants = false;
520
521 boolean want_entire_structure = false;
522 // Process the request parameters
523 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
524 for (int i=0; i<params.getLength();i++) {
525
526 Element param = (Element)params.item(i);
527 String p_name = param.getAttribute(GSXML.NAME_ATT);
528 String p_value = GSXML.getValue(param);
529 // Identify the structure information desired
530 if (p_name.equals(STRUCT_PARAM)) {
531 want_structure = true;
532
533 // This is NOT locale sensitive
534 if (p_value.equals(STRUCT_ANCESTORS))
535 want_ancestors = true;
536 else if (p_value.equals(STRUCT_PARENT))
537 want_parent = true;
538 else if (p_value.equals(STRUCT_SIBS))
539 want_siblings = true;
540 else if (p_value.equals(STRUCT_CHILDREN))
541 want_children = true;
542 else if (p_value.equals(STRUCT_DESCENDS))
543 want_descendants = true;
544 else if (p_value.equals(STRUCT_ENTIRE))
545 want_entire_structure = true;
546 else
547 System.err.println("GS2Retrieve Warning: Unknown value \"" + p_value + "\".");
548 } else if (p_name.equals(INFO_PARAM)) {
549 want_info = true;
550 info_types.add(p_value);
551 }
552 }
553
554 // Make sure there is no repeated information
555 if (want_ancestors)
556 want_parent = false;
557 if (want_descendants)
558 want_children = false;
559
560
561
562 Element doc_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
563 result.appendChild(doc_list);
564
565 // Get the documents
566 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
567 GSXML.NODE_ID_ATT);
568 for (int i = 0; i < doc_ids.length; i++) {
569 String doc_id = doc_ids[i];
570
571 if (OID.needsTranslating(doc_id)) {
572 doc_id = this.gdbm_src.translateOID(doc_id);
573 }
574
575 // Add the document to the list
576 Element doc = this.doc.createElement(node_name);
577 doc_list.appendChild(doc);
578 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
579
580
581 if (want_info) {
582
583 Element info_elem = this.doc.createElement("nodeStructureInfo");
584 doc.appendChild(info_elem);
585
586 for (int j=0; j<info_types.size(); j++) {
587 String info_type = (String)info_types.get(j);
588 Element inf = getInfo(doc_id, info_type);
589 if (inf != null) {
590 info_elem.appendChild(inf);
591 }
592 }
593 }
594 if (want_structure) {
595 // all structure info goes into a nodeStructure elem
596 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
597 doc.appendChild(structure_elem);
598
599 if (want_entire_structure) {
600 String top_id = OID.getTop(doc_id);
601 Element top_node = createDocNode(top_id, true, false);
602 addDescendants(top_node, top_id, true);
603 structure_elem.appendChild(top_node);
604 continue; // with the next document, we dont need to do any more here
605 }
606
607 // Add the requested structure information
608 Element current = createDocNode(doc_id, false, false);
609
610 //Ancestors: continually add parent nodes until the root is reached
611 Element top_node = current; // the top node so far
612 if (want_ancestors) {
613 String current_id = doc_id;
614 while (true) {
615 Element parent = getParent(current_id);
616 if (parent == null)
617 break;
618
619 parent.appendChild(top_node);
620 current_id = parent.getAttribute(GSXML.NODE_ID_ATT);
621 top_node = parent;
622 }
623 }
624 // Parent: get the parent of the selected node
625 if (want_parent) {
626 Element parent = getParent(doc_id);
627 if (parent != null) {
628 parent.appendChild(current);
629 top_node = parent;
630 }
631 }
632
633
634 // now the top node is the root of the structure
635 structure_elem.appendChild(top_node);
636
637 //Siblings: get the other descendants of the selected node's parent
638 if (want_siblings) {
639 Element parent = (Element)current.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
640 String parent_id = OID.getParent(doc_id);
641
642 // add siblings, - returns a pointer to the new current node
643 current = addSiblings(parent, parent_id, doc_id);
644 }
645
646 // Children: get the descendants, but only one level deep
647 if (want_children)
648 addDescendants(current, doc_id, false);
649 // Descendants: recursively get every descendant of the selected node
650 if (want_descendants)
651 addDescendants(current, doc_id, true);
652 } // if want structure
653 } // for each doc
654 return result;
655 }
656
657
658 protected Element processDocumentMetadataRetrieve(Element request) {
659 return genericMetadataRetrieve(request, DOCUMENT);
660 }
661
662 protected Element processClassifierBrowseMetadataRetrieve(Element request) {
663 return genericMetadataRetrieve(request, CLASSIFIER);
664 }
665
666
667 /** Retrieve metadata associated with a document or classifier node*/
668 protected Element genericMetadataRetrieve(Element request, int type)
669 {
670 // Create a new (empty) result message
671 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
672
673 String node_name;
674
675 String service_name;
676 if (type==DOCUMENT) {
677 service_name = DOCUMENT_METADATA_RETRIEVE_SERVICE;
678 node_name = GSXML.DOC_NODE_ELEM;
679 } else {
680 service_name = CLASSIFIER_METADATA_SERVICE;
681 node_name = GSXML.CLASS_NODE_ELEM;
682 }
683 result.setAttribute(GSXML.FROM_ATT, service_name);
684 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
685
686 // Get the parameters of the request
687 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
688 if (param_list == null) {
689 System.err.println("GS2Retrieve, DocumentMetadataRetrieve Error: missing paramList.\n");
690 return result; // Return the empty result
691 }
692
693 // The metadata information required
694 Vector metadata_list = new Vector();
695 boolean all_metadata = false;
696 // Process the request parameters
697 Element param = (Element) param_list.getFirstChild();
698 while (param != null) {
699 // Identify the metadata information desired
700 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
701 String metadata = GSXML.getValue(param);
702 if (metadata.equals("all")) {
703 all_metadata = true;
704 break;
705 }
706 metadata_list.add(metadata);
707 }
708 param = (Element) param.getNextSibling();
709 }
710
711 Element node_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
712 result.appendChild(node_list);
713
714 // Get the documents
715 Element request_node_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
716 if (request_node_list == null) {
717 System.err.println("Error: DocumentMetadataRetrieve request had no "+node_name+"List.\n");
718 return result;
719 }
720
721 NodeList request_nodes = request_node_list.getChildNodes();
722 for (int i = 0; i < request_nodes.getLength(); i++) {
723 Element request_node = (Element) request_nodes.item(i);
724 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
725
726 if (OID.needsTranslating(node_id)) {
727 node_id = this.gdbm_src.translateOID(node_id);
728 }
729
730 // Add the document to the list
731 Element new_node = (Element)this.doc.importNode(request_node, false);
732 node_list.appendChild(new_node);
733
734 // Add the requested metadata information
735 Element node_meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
736 new_node.appendChild(node_meta_list);
737 DBInfo info = this.gdbm_src.getInfo(node_id);
738 if (info == null) {// I have had a case where it is null!
739 continue;
740 }
741 if (all_metadata) {
742 // return everything out of the database
743 Set keys = info.getKeys();
744 Iterator it = keys.iterator();
745 while(it.hasNext()) {
746 String key = (String)it.next();
747 String value = info.getInfo(key);
748 GSXML.addMetadata(this.doc, node_meta_list, key, value);
749 }
750 } else { // just get the selected ones
751
752 for (int m = 0; m < metadata_list.size(); m++) {
753 String metadata = (String) metadata_list.get(m);
754 String value = getMetadata(node_id, info, metadata);
755 GSXML.addMetadata(this.doc, node_meta_list, metadata, value);
756 }
757 }
758 }
759
760 return result;
761 }
762
763 protected final char RELATION_SEP_CHAR = '_';
764 protected final String SEPARATOR_SEP_STRING = "'";
765
766 protected String getMetadata(String node_id, DBInfo info,
767 String metadata) {
768 boolean multiple = false;
769 String relation = "";
770 String separator = ", ";
771 int pos = metadata.indexOf(RELATION_SEP_CHAR);
772 if (pos ==-1) {
773 // just a plain meta entry eg dc.Title
774 return info.getInfo(metadata);
775 }
776
777 String temp = metadata.substring(0, pos);
778 metadata = metadata.substring(pos+1);
779 // check for all on the front
780 if (temp.equals("all")) {
781 multiple=true;
782 pos = metadata.indexOf(RELATION_SEP_CHAR);
783 if (pos ==-1) {
784 temp = "";
785 } else {
786 temp = metadata.substring(0, pos);
787 metadata = metadata.substring(pos+1);
788 }
789 }
790
791 // now check for relational info
792 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
793 relation = temp;
794 pos = metadata.indexOf(RELATION_SEP_CHAR);
795 if (pos == -1) {
796 temp = "";
797 } else {
798 temp = metadata.substring(0, pos);
799 metadata = metadata.substring(pos+1);
800 }
801 }
802
803 // now look for separator info
804 if (temp.startsWith(SEPARATOR_SEP_STRING) && temp.endsWith(SEPARATOR_SEP_STRING)) {
805 separator = temp.substring(1, temp.length()-1);
806
807 }
808
809 String relation_id = node_id;
810 if (relation.equals("parent") || relation.equals("ancestors")) {
811 relation_id = OID.getParent(node_id);
812 // parent or ancestor does not include self
813 if (relation_id.equals(node_id)){
814 return "";
815 }
816 } else if (relation.equals("root")) {
817 relation_id = OID.getTop(node_id);
818 }
819
820 // now we either have a single node, or we have ancestors
821 DBInfo relation_info;
822 if (relation_id.equals(node_id)) {
823 relation_info = info;
824 } else {
825 relation_info = this.gdbm_src.getInfo(relation_id);
826 }
827 if (relation_info == null) {
828 return "";
829 }
830
831 StringBuffer result = new StringBuffer();
832
833 if (!multiple) {
834 result.append(relation_info.getInfo(metadata));
835 } else {
836 // we have multiple meta
837 Vector values = relation_info.getMultiInfo(metadata);
838 if (values != null) {
839 boolean first = true;
840 for (int i=0; i<values.size(); i++) {
841 if (first) {
842 first = false;
843 } else {
844 result.append(separator);
845 }
846 result.append(values.elementAt(i));
847 }
848 }
849 }
850 // if not ancestors, then this is all we do
851 if (!relation.equals("ancestors")) {
852 return result.toString();
853 }
854
855 // now do the ancestors
856 String current_id = relation_id;
857 relation_id = OID.getParent(current_id);
858 while (!relation_id.equals(current_id)) {
859 relation_info = this.gdbm_src.getInfo(relation_id);
860 if (relation_info == null) return result.toString();
861 if (!multiple) {
862 result.insert(0, separator);
863 result.insert(0, relation_info.getInfo(metadata));
864 } else {
865 Vector values = relation_info.getMultiInfo(metadata);
866 if (values != null) {
867 for (int i=values.size()-1; i>=0; i--) {
868 result.insert(0, separator);
869 result.insert(0, values.elementAt(i));
870 }
871 }
872
873 }
874 current_id = relation_id;
875 relation_id = OID.getParent(current_id);
876 }
877
878 return result.toString();
879 }
880
881 /** Retrieve the content of a document - implemented by concrete subclasses */
882 protected abstract Element processDocumentContentRetrieve(Element request);
883
884 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
885 protected String resolveImages(String doc_content, String doc_id)
886 {
887 String top_doc_id = OID.getTop(doc_id);
888 DBInfo info = this.gdbm_src.getInfo(top_doc_id);
889 String archivedir = info.getInfo("archivedir");
890 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
891
892 // Resolve all "_httpdocimg_"s
893 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
894 return doc_content;
895 }
896
897 protected Element getInfo(String doc_id, String info_type) {
898
899 String value="";
900 if (info_type.equals(INFO_NUM_SIBS)) {
901 String parent_id = OID.getParent(doc_id);
902 if (parent_id.equals(doc_id)) {
903 value="0";
904 } else {
905 value = String.valueOf(getNumChildren(parent_id));
906 }
907 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
908 value = String.valueOf(getNumChildren(doc_id));
909 } else if (info_type.equals(INFO_SIB_POS)) {
910 String parent_id = OID.getParent(doc_id);
911 if (parent_id.equals(doc_id)) {
912 value="-1";
913 } else {
914 DBInfo info = this.gdbm_src.getInfo(parent_id);
915 String contains = info.getInfo("contains");
916 contains = contains.replaceAll("\"", parent_id);
917 String [] children = contains.split(";");
918 for (int i=0;i<children.length;i++) {
919 String child_id = children[i];
920 if (child_id.equals(doc_id)) {
921 value = String.valueOf(i+1); // make it from 1 to length
922 break;
923 }
924 }
925 }
926 } else {
927 return null;
928 }
929 Element info_elem = this.doc.createElement("info");
930 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
931 info_elem.setAttribute(GSXML.VALUE_ATT, value);
932 return info_elem;
933 }
934
935 protected int getNumChildren(String doc_id) {
936 DBInfo info = this.gdbm_src.getInfo(doc_id);
937 String contains = info.getInfo("contains");
938 if (contains.equals("")) {
939 return 0;
940 }
941 String [] children = contains.split(";");
942 return children.length;
943 }
944
945}
Note: See TracBrowser for help on using the repository browser.