source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2Retrieve.java@ 6490

Last change on this file since 6490 was 6275, checked in by kjdon, 20 years ago

now checks for empty metadata before adding it

  • Property svn:keywords set to Author Date Id Revision
File size: 32.1 KB
Line 
1/*
2 * GS2Retrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.gdbm.*;
24import org.greenstone.gsdl3.util.*;
25
26// XML classes
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31// General Java classes
32import java.io.File;
33import java.util.StringTokenizer;
34import java.util.Vector;
35import java.util.Set;
36import java.util.Iterator;
37
38/** Implements the generic retrieval and classifier services for GS2
39 * collections.
40 *
41 * @author <a href="mailto:[email protected]">Katherine Don</a>
42 * @author <a href="mailto:[email protected]">Michael Dewsnip</a>
43 * @version $Revision: 6275 $
44 */
45
46public abstract class GS2Retrieve
47 extends ServiceRack {
48
49 // the services on offer
50 // these strings must match what is found in the properties file
51 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
52 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
53 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
54
55
56 // the browsing services - now in here, these will only be advertised if classifiers have been specified in the config file
57 private static final String CLASSIFIER_SERVICE = "ClassifierBrowse";
58 private static final String CLASSIFIER_METADATA_SERVICE = "ClassifierBrowseMetadataRetrieve";
59
60 protected static final String STRUCT_PARAM = "structure";
61 protected static final String INFO_PARAM = "info";
62
63 protected static final String STRUCT_ANCESTORS = "ancestors";
64 protected static final String STRUCT_PARENT = "parent";
65 protected static final String STRUCT_SIBS = "siblings";
66 protected static final String STRUCT_CHILDREN = "children";
67 protected static final String STRUCT_DESCENDS = "descendants";
68 protected static final String STRUCT_ENTIRE = "entire";
69
70 protected static final String INFO_NUM_SIBS = "numSiblings";
71 protected static final String INFO_NUM_CHILDREN = "numChildren";
72 protected static final String INFO_SIB_POS = "siblingPosition";
73
74 protected static final int DOCUMENT=1;
75 protected static final int CLASSIFIER=2;
76
77 protected GDBMWrapper gdbm_src = null;
78 protected Element config_info = null; // the xml from the config file
79
80 /** constructor */
81 protected GS2Retrieve()
82 {
83 this.gdbm_src = new GDBMWrapper();
84 }
85
86
87 /** configure this service */
88 public boolean configure(Element info, Element extra_info)
89 {
90 System.out.println("Configuring GS2Retrieve...");
91 this.config_info = info;
92
93 // set up short_service_info_ - for now just has name and type
94 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
95 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
96 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
97 this.short_service_info.appendChild(dsr_service);
98
99 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
100 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
101 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
102 this.short_service_info.appendChild(dmr_service);
103
104 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
105 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
106 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
107 this.short_service_info.appendChild(dcr_service);
108
109 // Open GDBM database for querying
110 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name);
111 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
112 System.err.println("GS2Retrieve Error: Could not open GDBM database!");
113 return false;
114 }
115
116 // now do the classifier browse service
117
118 // check that there are classifiers specified
119 Element class_list = (Element)GSXML.getChildByTagName(info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
120 if (class_list == null) {
121 // no classifiers specified
122 return true;
123 }
124
125 // get the display and format elements from the coll config file for
126 // the classifiers
127 extractExtraClassifierInfo(info, extra_info);
128
129 this.config_info = info;
130
131 // short_service_info_ - the browse one
132 Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
133 cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
134 cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
135 this.short_service_info.appendChild(cb_service);
136
137 // metadata retrieval for the browsing
138 Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
139 cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
140 cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
141 this.short_service_info.appendChild(cbmr_service);
142
143 // the format info
144 Element cb_format_info = this.doc.createElement(GSXML.FORMAT_ELEM);
145 boolean format_found = false;
146
147 // try the default format first
148 Element def_format = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
149 if (def_format != null) {
150 cb_format_info.appendChild(GSXML.duplicateWithNewName(this.doc, def_format, GSXML.DEFAULT_ELEM, true));
151 format_found = true;
152 }
153
154 // add in to the description a simplified list of classifiers
155 NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
156 for(int i=0; i<classifiers.getLength(); i++) {
157 Element cl = (Element)classifiers.item(i);
158 Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
159
160 // get the format info out, and put inside a classifier element
161 Element format_cl = (Element)new_cl.cloneNode(false);
162 Element format = (Element)GSXML.getChildByTagName(cl, GSXML.FORMAT_ELEM);
163 if (format != null) {
164
165 //copy all the children
166 NodeList elems = format.getChildNodes();
167 for (int j=0; j<elems.getLength();j++) {
168 format_cl.appendChild(this.doc.importNode(elems.item(j), true));
169 }
170 cb_format_info.appendChild(format_cl);
171 format_found = true;
172 }
173
174
175 }
176
177 if (format_found) {
178 this.format_info_map.put(CLASSIFIER_SERVICE, cb_format_info);
179 }
180
181 // look for document display format
182 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
183 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
184 if (display_format != null) {
185 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
186 // shoudl we make a copy?
187 }
188 return true;
189 }
190
191 protected Element getServiceDescription(String service_id, String lang, String subset) {
192
193 if (service_id.equals(CLASSIFIER_SERVICE)) {
194
195 Element class_list = (Element)GSXML.getChildByTagName(this.config_info, GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
196 if (class_list == null) {
197 // no classifiers specified
198 return null;
199 }
200
201 Element cb_service = this.doc.createElement(GSXML.SERVICE_ELEM);
202 cb_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_BROWSE);
203 cb_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_SERVICE);
204 cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(CLASSIFIER_SERVICE+".name", lang)));
205 cb_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(CLASSIFIER_SERVICE+".description", lang)));
206
207 Element cl_list = this.doc.createElement(GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
208 cb_service.appendChild(cl_list);
209 NodeList classifiers = class_list.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
210 for(int i=0; i<classifiers.getLength(); i++) {
211 Element cl = (Element)classifiers.item(i);
212 Element new_cl = (Element)this.doc.importNode(cl, false); // just import this node, not the children
213 String content = cl.getAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
214 cl_list.appendChild(new_cl);
215 String text = GSXML.getDisplayText(cl,
216 GSXML.DISPLAY_TEXT_NAME,
217 lang, "en");
218 if (text == null || text.equals("")) {
219 // no display element was specified, use the metadata name
220 // for now this looks in the class properties file
221 // this needs to use a general metadata thing instead
222 text = getMetadataNameText(content+".buttonname", lang);
223 }
224 if (text == null) {
225 text = content;
226 }
227
228 Element cl_name = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, text);
229 new_cl.appendChild(cl_name);
230
231 // description
232
233 String meta_name = getMetadataNameText(content, lang);
234 if (meta_name==null) {
235 meta_name = content;
236 }
237 String [] array = {meta_name};
238 String description = getTextString("ClassifierBrowse.classifier_help", array, lang);
239 Element cl_desc = GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, description);
240 new_cl.appendChild(cl_desc);
241
242 }
243 return cb_service;
244 }
245
246 // these ones are probably never called, but put them here just in case
247
248 if (service_id.equals(CLASSIFIER_METADATA_SERVICE)) {
249
250 Element cbmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
251 cbmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
252 cbmr_service.setAttribute(GSXML.NAME_ATT, CLASSIFIER_METADATA_SERVICE);
253 return cbmr_service;
254 }
255
256 if (service_id.equals(DOCUMENT_STRUCTURE_RETRIEVE_SERVICE)) {
257 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
258 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
259 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
260 return dsr_service;
261 }
262 if (service_id.equals(DOCUMENT_METADATA_RETRIEVE_SERVICE)) {
263 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
264 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
265 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
266 return dmr_service;
267 }
268
269 if (service_id.equals(DOCUMENT_CONTENT_RETRIEVE_SERVICE)) {
270 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
271 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
272 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
273 return dcr_service;
274 }
275
276 return null;
277 }
278
279 /** this looks for any classifier specific display or format info from extra_info and adds it in to the correct place in info */
280 protected boolean extractExtraClassifierInfo(Element info, Element extra_info) {
281
282 if (extra_info == null) {
283 return false;
284 }
285
286 Document owner = info.getOwnerDocument();
287 // so far we have display and format elements that we need for classifiers
288 NodeList classifiers = info.getElementsByTagName(GSXML.CLASSIFIER_ELEM);
289 Element config_browse = (Element)GSXML.getChildByTagName(extra_info, GSXML.BROWSE_ELEM);
290
291 for (int i=0; i<classifiers.getLength();i++) {
292 Element cl = (Element)classifiers.item(i);
293 String name = cl.getAttribute(GSXML.NAME_ATT);
294 Element node_extra = GSXML.getNamedElement(config_browse,
295 GSXML.CLASSIFIER_ELEM,
296 GSXML.NAME_ATT,
297 name);
298 if (node_extra == null) {
299 System.err.println("GS2REtrieve: haven't found extra info for classifier named "+name);
300 continue;
301 }
302
303 // get the display elements if any - displayName
304 NodeList display_names = node_extra.getElementsByTagName(GSXML.DISPLAY_TEXT_ELEM);
305 if (display_names !=null) {
306 Element display = owner.createElement(GSXML.DISPLAY_ELEM);
307 for (int j=0; j<display_names.getLength(); j++) {
308 Element e = (Element)display_names.item(j);
309 cl.appendChild(owner.importNode(e, true));
310
311 }
312 }
313
314 // get the format element if any
315 Element format = (Element)GSXML.getChildByTagName(node_extra, GSXML.FORMAT_ELEM);
316 if (format==null) { // try a generic one that applies to all classifiers
317 format = (Element)GSXML.getChildByTagName(extra_info,
318 GSXML.FORMAT_ELEM);
319 }
320 if (format!=null) { // append to index info
321 cl.appendChild(owner.importNode(format, true));
322 }
323 } // for each classifier
324
325 // now check for default format info
326 Element default_format = (Element)GSXML.getChildByTagName(config_browse, GSXML.FORMAT_ELEM);
327 if (default_format!=null) { // append to info
328 info.appendChild(owner.importNode(default_format, true));
329 }
330
331 return true;
332 }
333
334
335 /** parent is true if this node is definitely the parent of something,
336 * child is true is it definitely is a child of something - just for efficiency purposes */
337 protected Element createDocNode(String node_id, boolean parent, boolean child) {
338
339 // create this here or pass it in?
340 DBInfo info = this.gdbm_src.getInfo(node_id);
341 Element node;
342 if (isClassifier(node_id)) {
343 node = this.doc.createElement(GSXML.CLASS_NODE_ELEM);
344 //String childtype = info.getInfo("childtype");
345 //String orientation="";
346 //if (childtype.equals("HList")) {
347 // orientation = "horizontal";
348 //} else { // assume vertical
349 // orientation = "vertical";
350 //}
351 //node.setAttribute(GSXML.CLASS_NODE_ORIENTATION_ATT, orientation);
352 } else {
353
354 node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
355
356 String top_id = OID.getTop(node_id);
357 boolean is_top = (top_id.equals(node_id) ? true : false);
358
359 String children = info.getInfo("contains");
360 boolean is_leaf = (children.equals("") ? true : false);
361
362 // set teh node type att
363 if (is_top) {
364 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
365 } else if (is_leaf) {
366 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
367 } else {
368 node.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERIOR);
369 }
370
371 // set teh doc type att
372 if (is_top && is_leaf) { // a single section document
373 node.setAttribute(GSXML.DOC_TYPE_ATT, "simple");
374
375 } else {
376
377 if (!is_top) { // we need to look at the top info
378 info = this.gdbm_src.getInfo(top_id);
379 }
380
381 String childtype = info.getInfo("childtype");
382 if (childtype.equals("Paged")) {
383 node.setAttribute(GSXML.DOC_TYPE_ATT, "paged");
384 } else {
385 node.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
386 }
387 }
388
389 }
390 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
391 return node;
392
393 }
394 /** Returns the parent of a specified documentID, or null if none exists */
395 protected Element getParent(String doc_id)
396 {
397 String parent_id = OID.getParent(doc_id);
398 if (parent_id.equals(doc_id))
399 return null;
400
401 return createDocNode(parent_id, true, false);
402 }
403
404
405 /** adds all the children of doc_id the the doc element,
406 * and if recursive=true, adds all their children as well*/
407 protected void addDescendants(Element doc, String doc_id,
408 boolean recursive)
409 {
410 DBInfo info = this.gdbm_src.getInfo(doc_id);
411 String contains = info.getInfo("contains");
412
413 StringTokenizer st = new StringTokenizer(contains, ";");
414 while (st.hasMoreTokens()) {
415 String child_id = st.nextToken().replaceAll("\"", doc_id);
416 Element child = createDocNode(child_id, false, true);
417 doc.appendChild(child);
418
419 // Apply recursively, if desired
420 if (recursive) {
421 addDescendants(child, child_id, recursive);
422 }
423
424 }
425 }
426
427 /** adds all the siblings of current_id to the parent element. */
428 protected Element addSiblings(Element parent, String parent_id, String current_id) {
429 Element current_node = (Element)parent.getFirstChild();
430 if (current_node ==null) {
431 // create a sensible error message
432 System.err.println("GS2Retrieve Error: there should be a first child.");
433 return null;
434 }
435 // remove the current child,- will add it in later in its correct place
436 parent.removeChild(current_node);
437
438 // add in all the siblings,
439 addDescendants(parent, parent_id, false);
440
441 // find the node that is now the current node
442 // this assumes that the new node that was created is the same as
443 // the old one that was removed - we may want to replace the new one
444 // with the old one.
445 Element new_current = GSXML.getNamedElement(parent, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
446 return new_current;
447
448 }
449 /** Returns true if the OID specifies a leaf node, false otherwise
450 Note: this makes a request to the GDBM database so it may not be
451 a particularly cheap operation */
452 protected boolean isLeafNode(String oid)
453 {
454 DBInfo info = this.gdbm_src.getInfo(oid);
455 String children = info.getInfo("contains");
456 return (children.equals(""));
457 }
458
459 // for now just use CL for classifiers - should have a type? in teh gdbm
460 // database.
461 protected boolean isClassifier(String oid) {
462 if (oid.startsWith("CL")) {
463 return true;
464 }
465 return false;
466 }
467
468 protected Element processDocumentStructureRetrieve(Element request) {
469 return genericStructureRetrieve(request, DOCUMENT);
470 }
471
472 protected Element processClassifierBrowse(Element request) {
473 return genericStructureRetrieve(request, CLASSIFIER);
474 }
475
476 /** Retrieve the structure of a document */
477 protected Element genericStructureRetrieve(Element request, int type)
478 {
479 // Create a new (empty) result message
480 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
481
482 String node_name;
483 String service_name;
484 if (type==DOCUMENT) {
485 service_name = DOCUMENT_STRUCTURE_RETRIEVE_SERVICE;
486 node_name = GSXML.DOC_NODE_ELEM;
487 } else {
488 service_name = CLASSIFIER_SERVICE;
489 node_name = GSXML.CLASS_NODE_ELEM;
490 }
491
492 result.setAttribute(GSXML.FROM_ATT, service_name);
493 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
494
495
496 Element query_doc_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
497 if (query_doc_list == null) {
498 System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request specified no doc nodes.\n");
499 return result;
500 }
501
502 // Get the parameters of the request
503 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
504 if (param_list == null) {
505 System.err.println("GS2Retrieve Error: DocumentStructureRetrieve request had no paramList.");
506 return result; // Return the empty result
507 }
508
509 // the type of info required
510 boolean want_structure = false;
511 boolean want_info = false;
512
513 Vector info_types=new Vector();
514 // The document structure information desired
515 boolean want_ancestors = false;
516 boolean want_parent = false;
517 boolean want_siblings = false;
518 boolean want_children = false;
519 boolean want_descendants = false;
520
521 boolean want_entire_structure = false;
522 // Process the request parameters
523 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
524 for (int i=0; i<params.getLength();i++) {
525
526 Element param = (Element)params.item(i);
527 String p_name = param.getAttribute(GSXML.NAME_ATT);
528 String p_value = GSXML.getValue(param);
529 // Identify the structure information desired
530 if (p_name.equals(STRUCT_PARAM)) {
531 want_structure = true;
532
533 // This is NOT locale sensitive
534 if (p_value.equals(STRUCT_ANCESTORS))
535 want_ancestors = true;
536 else if (p_value.equals(STRUCT_PARENT))
537 want_parent = true;
538 else if (p_value.equals(STRUCT_SIBS))
539 want_siblings = true;
540 else if (p_value.equals(STRUCT_CHILDREN))
541 want_children = true;
542 else if (p_value.equals(STRUCT_DESCENDS))
543 want_descendants = true;
544 else if (p_value.equals(STRUCT_ENTIRE))
545 want_entire_structure = true;
546 else
547 System.err.println("GS2Retrieve Warning: Unknown value \"" + p_value + "\".");
548 } else if (p_name.equals(INFO_PARAM)) {
549 want_info = true;
550 info_types.add(p_value);
551 }
552 }
553
554 // Make sure there is no repeated information
555 if (want_ancestors)
556 want_parent = false;
557 if (want_descendants)
558 want_children = false;
559
560
561
562 Element doc_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
563 result.appendChild(doc_list);
564
565 // Get the documents
566 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
567 GSXML.NODE_ID_ATT);
568 for (int i = 0; i < doc_ids.length; i++) {
569 String doc_id = doc_ids[i];
570
571 if (OID.needsTranslating(doc_id)) {
572 doc_id = this.gdbm_src.translateOID(doc_id);
573 }
574
575 // Add the document to the list
576 Element doc = this.doc.createElement(node_name);
577 doc_list.appendChild(doc);
578 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
579
580
581 if (want_info) {
582
583 Element info_elem = this.doc.createElement("nodeStructureInfo");
584 doc.appendChild(info_elem);
585
586 for (int j=0; j<info_types.size(); j++) {
587 String info_type = (String)info_types.get(j);
588 Element inf = getInfo(doc_id, info_type);
589 if (inf != null) {
590 info_elem.appendChild(inf);
591 }
592 }
593 }
594 if (want_structure) {
595 // all structure info goes into a nodeStructure elem
596 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
597 doc.appendChild(structure_elem);
598
599 if (want_entire_structure) {
600 String top_id = OID.getTop(doc_id);
601 Element top_node = createDocNode(top_id, true, false);
602 addDescendants(top_node, top_id, true);
603 structure_elem.appendChild(top_node);
604 continue; // with the next document, we dont need to do any more here
605 }
606
607 // Add the requested structure information
608 Element current = createDocNode(doc_id, false, false);
609
610 //Ancestors: continually add parent nodes until the root is reached
611 Element top_node = current; // the top node so far
612 if (want_ancestors) {
613 String current_id = doc_id;
614 while (true) {
615 Element parent = getParent(current_id);
616 if (parent == null)
617 break;
618
619 parent.appendChild(top_node);
620 current_id = parent.getAttribute(GSXML.NODE_ID_ATT);
621 top_node = parent;
622 }
623 }
624 // Parent: get the parent of the selected node
625 if (want_parent) {
626 Element parent = getParent(doc_id);
627 if (parent != null) {
628 parent.appendChild(current);
629 top_node = parent;
630 }
631 }
632
633
634 // now the top node is the root of the structure
635 structure_elem.appendChild(top_node);
636
637 //Siblings: get the other descendants of the selected node's parent
638 if (want_siblings) {
639 Element parent = (Element)current.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
640 String parent_id = OID.getParent(doc_id);
641
642 // add siblings, - returns a pointer to the new current node
643 current = addSiblings(parent, parent_id, doc_id);
644 }
645
646 // Children: get the descendants, but only one level deep
647 if (want_children)
648 addDescendants(current, doc_id, false);
649 // Descendants: recursively get every descendant of the selected node
650 if (want_descendants)
651 addDescendants(current, doc_id, true);
652 } // if want structure
653 } // for each doc
654 return result;
655 }
656
657
658 protected Element processDocumentMetadataRetrieve(Element request) {
659 return genericMetadataRetrieve(request, DOCUMENT);
660 }
661
662 protected Element processClassifierBrowseMetadataRetrieve(Element request) {
663 return genericMetadataRetrieve(request, CLASSIFIER);
664 }
665
666
667 /** Retrieve metadata associated with a document or classifier node*/
668 protected Element genericMetadataRetrieve(Element request, int type)
669 {
670 // Create a new (empty) result message
671 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
672
673 String node_name;
674
675 String service_name;
676 if (type==DOCUMENT) {
677 service_name = DOCUMENT_METADATA_RETRIEVE_SERVICE;
678 node_name = GSXML.DOC_NODE_ELEM;
679 } else {
680 service_name = CLASSIFIER_METADATA_SERVICE;
681 node_name = GSXML.CLASS_NODE_ELEM;
682 }
683 result.setAttribute(GSXML.FROM_ATT, service_name);
684 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
685
686 // Get the parameters of the request
687 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
688 if (param_list == null) {
689 System.err.println("GS2Retrieve, DocumentMetadataRetrieve Error: missing paramList.\n");
690 return result; // Return the empty result
691 }
692
693 // The metadata information required
694 Vector metadata_list = new Vector();
695 boolean all_metadata = false;
696 // Process the request parameters
697 Element param = (Element) param_list.getFirstChild();
698 while (param != null) {
699 // Identify the metadata information desired
700 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
701 String metadata = GSXML.getValue(param);
702 if (metadata.equals("all")) {
703 all_metadata = true;
704 break;
705 }
706 metadata_list.add(metadata);
707 }
708 param = (Element) param.getNextSibling();
709 }
710
711 Element node_list = this.doc.createElement(node_name+GSXML.LIST_MODIFIER);
712 result.appendChild(node_list);
713
714 // Get the documents
715 Element request_node_list = (Element) GSXML.getChildByTagName(request, node_name+GSXML.LIST_MODIFIER);
716 if (request_node_list == null) {
717 System.err.println("Error: DocumentMetadataRetrieve request had no "+node_name+"List.\n");
718 return result;
719 }
720
721 NodeList request_nodes = request_node_list.getChildNodes();
722 for (int i = 0; i < request_nodes.getLength(); i++) {
723 Element request_node = (Element) request_nodes.item(i);
724 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
725
726 if (OID.needsTranslating(node_id)) {
727 node_id = this.gdbm_src.translateOID(node_id);
728 }
729
730 // Add the document to the list
731 Element new_node = (Element)this.doc.importNode(request_node, false);
732 node_list.appendChild(new_node);
733
734 // Add the requested metadata information
735 Element node_meta_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
736 new_node.appendChild(node_meta_list);
737 DBInfo info = this.gdbm_src.getInfo(node_id);
738 if (info == null) {// I have had a case where it is null!
739 continue;
740 }
741 if (all_metadata) {
742 // return everything out of the database
743 Set keys = info.getKeys();
744 Iterator it = keys.iterator();
745 while(it.hasNext()) {
746 String key = (String)it.next();
747 String value = info.getInfo(key);
748 GSXML.addMetadata(this.doc, node_meta_list, key, value);
749 }
750 } else { // just get the selected ones
751
752 for (int m = 0; m < metadata_list.size(); m++) {
753 String metadata = (String) metadata_list.get(m);
754 String value = getMetadata(node_id, info, metadata);
755 if (!value.equals("")) {
756 GSXML.addMetadata(this.doc, node_meta_list, metadata, value);
757 }
758 }
759 }
760 }
761
762 return result;
763 }
764
765 protected final char RELATION_SEP_CHAR = '_';
766 protected final String SEPARATOR_SEP_STRING = "'";
767
768 protected String getMetadata(String node_id, DBInfo info,
769 String metadata) {
770 boolean multiple = false;
771 String relation = "";
772 String separator = ", ";
773 int pos = metadata.indexOf(RELATION_SEP_CHAR);
774 if (pos ==-1) {
775 // just a plain meta entry eg dc.Title
776 return info.getInfo(metadata);
777 }
778
779 String temp = metadata.substring(0, pos);
780 metadata = metadata.substring(pos+1);
781 // check for all on the front
782 if (temp.equals("all")) {
783 multiple=true;
784 pos = metadata.indexOf(RELATION_SEP_CHAR);
785 if (pos ==-1) {
786 temp = "";
787 } else {
788 temp = metadata.substring(0, pos);
789 metadata = metadata.substring(pos+1);
790 }
791 }
792
793 // now check for relational info
794 if (temp.equals("parent") || temp.equals("root") || temp.equals( "ancestors")) { // "current" "siblings" "children" "descendents"
795 relation = temp;
796 pos = metadata.indexOf(RELATION_SEP_CHAR);
797 if (pos == -1) {
798 temp = "";
799 } else {
800 temp = metadata.substring(0, pos);
801 metadata = metadata.substring(pos+1);
802 }
803 }
804
805 // now look for separator info
806 if (temp.startsWith(SEPARATOR_SEP_STRING) && temp.endsWith(SEPARATOR_SEP_STRING)) {
807 separator = temp.substring(1, temp.length()-1);
808
809 }
810
811 String relation_id = node_id;
812 if (relation.equals("parent") || relation.equals("ancestors")) {
813 relation_id = OID.getParent(node_id);
814 // parent or ancestor does not include self
815 if (relation_id.equals(node_id)){
816 return "";
817 }
818 } else if (relation.equals("root")) {
819 relation_id = OID.getTop(node_id);
820 }
821
822 // now we either have a single node, or we have ancestors
823 DBInfo relation_info;
824 if (relation_id.equals(node_id)) {
825 relation_info = info;
826 } else {
827 relation_info = this.gdbm_src.getInfo(relation_id);
828 }
829 if (relation_info == null) {
830 return "";
831 }
832
833 StringBuffer result = new StringBuffer();
834
835 if (!multiple) {
836 result.append(relation_info.getInfo(metadata));
837 } else {
838 // we have multiple meta
839 Vector values = relation_info.getMultiInfo(metadata);
840 if (values != null) {
841 boolean first = true;
842 for (int i=0; i<values.size(); i++) {
843 if (first) {
844 first = false;
845 } else {
846 result.append(separator);
847 }
848 result.append(values.elementAt(i));
849 }
850 }
851 }
852 // if not ancestors, then this is all we do
853 if (!relation.equals("ancestors")) {
854 return result.toString();
855 }
856
857 // now do the ancestors
858 String current_id = relation_id;
859 relation_id = OID.getParent(current_id);
860 while (!relation_id.equals(current_id)) {
861 relation_info = this.gdbm_src.getInfo(relation_id);
862 if (relation_info == null) return result.toString();
863 if (!multiple) {
864 result.insert(0, separator);
865 result.insert(0, relation_info.getInfo(metadata));
866 } else {
867 Vector values = relation_info.getMultiInfo(metadata);
868 if (values != null) {
869 for (int i=values.size()-1; i>=0; i--) {
870 result.insert(0, separator);
871 result.insert(0, values.elementAt(i));
872 }
873 }
874
875 }
876 current_id = relation_id;
877 relation_id = OID.getParent(current_id);
878 }
879
880 return result.toString();
881 }
882
883 /** Retrieve the content of a document - implemented by concrete subclasses */
884 protected abstract Element processDocumentContentRetrieve(Element request);
885
886 /** needs to get info from gdbm database - if the calling code gets it already it may pay to pass it in instead */
887 protected String resolveImages(String doc_content, String doc_id)
888 {
889 String top_doc_id = OID.getTop(doc_id);
890 DBInfo info = this.gdbm_src.getInfo(top_doc_id);
891 String archivedir = info.getInfo("archivedir");
892 String image_dir = this.site_http_address + "/collect/"+this.cluster_name+"/index/assoc/"+archivedir;
893
894 // Resolve all "_httpdocimg_"s
895 doc_content = doc_content.replaceAll("_httpdocimg_", image_dir);
896 return doc_content;
897 }
898
899 protected Element getInfo(String doc_id, String info_type) {
900
901 String value="";
902 if (info_type.equals(INFO_NUM_SIBS)) {
903 String parent_id = OID.getParent(doc_id);
904 if (parent_id.equals(doc_id)) {
905 value="0";
906 } else {
907 value = String.valueOf(getNumChildren(parent_id));
908 }
909 } else if (info_type.equals(INFO_NUM_CHILDREN)) {
910 value = String.valueOf(getNumChildren(doc_id));
911 } else if (info_type.equals(INFO_SIB_POS)) {
912 String parent_id = OID.getParent(doc_id);
913 if (parent_id.equals(doc_id)) {
914 value="-1";
915 } else {
916 DBInfo info = this.gdbm_src.getInfo(parent_id);
917 String contains = info.getInfo("contains");
918 contains = contains.replaceAll("\"", parent_id);
919 String [] children = contains.split(";");
920 for (int i=0;i<children.length;i++) {
921 String child_id = children[i];
922 if (child_id.equals(doc_id)) {
923 value = String.valueOf(i+1); // make it from 1 to length
924 break;
925 }
926 }
927 }
928 } else {
929 return null;
930 }
931 Element info_elem = this.doc.createElement("info");
932 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
933 info_elem.setAttribute(GSXML.VALUE_ATT, value);
934 return info_elem;
935 }
936
937 protected int getNumChildren(String doc_id) {
938 DBInfo info = this.gdbm_src.getInfo(doc_id);
939 String contains = info.getInfo("contains");
940 if (contains.equals("")) {
941 return 0;
942 }
943 String [] children = contains.split(";");
944 return children.length;
945 }
946
947}
Note: See TracBrowser for help on using the repository browser.