source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 24981

Last change on this file since 24981 was 24981, checked in by sjm84, 12 years ago

In paged collections pages are leaf nodes rather than internal nodes which prevents calls like 'expand document' from working correctly. So for now I have added a (possibly) temporary fix

  • Property svn:keywords set to Author Date Id Revision
File size: 27.7 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24import org.greenstone.util.GlobalProperties;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GSConstants;
31
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/**
47 * Abstract class for Document Retrieval Services
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 */
51
52public abstract class AbstractDocumentRetrieve extends ServiceRack
53{
54 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
55
56 // the services on offer
57 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
58 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
59 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
60
61 protected static final String STRUCT_PARAM = "structure";
62 protected static final String INFO_PARAM = "info";
63
64 protected static final String STRUCT_ANCESTORS = "ancestors";
65 protected static final String STRUCT_PARENT = "parent";
66 protected static final String STRUCT_SIBS = "siblings";
67 protected static final String STRUCT_CHILDREN = "children";
68 protected static final String STRUCT_DESCENDS = "descendants";
69 protected static final String STRUCT_ENTIRE = "entire";
70
71 protected static final String INFO_NUM_SIBS = "numSiblings";
72 protected static final String INFO_NUM_CHILDREN = "numChildren";
73 protected static final String INFO_SIB_POS = "siblingPosition";
74
75 // means the id is not a greenstone id and needs translating
76 protected static final String EXTID_PARAM = "ext";
77
78 protected Element config_info = null; // the xml from the config file
79
80 protected String default_document_type = null;
81 protected MacroResolver macro_resolver = null;
82
83 /** does this class provide the service?? */
84 protected boolean does_metadata = true;
85 protected boolean does_content = true;
86 protected boolean does_structure = true;
87
88 /** constructor */
89 public AbstractDocumentRetrieve()
90 {
91 }
92
93 /** configure this service */
94 public boolean configure(Element info, Element extra_info)
95 {
96 if (!super.configure(info, extra_info))
97 {
98 return false;
99 }
100
101 logger.info("Configuring AbstractDocumentRetrieve...");
102 this.config_info = info;
103
104 // set up short_service_info_ - for now just has name and type
105 if (does_structure)
106 {
107 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
108 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
109 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
110 this.short_service_info.appendChild(dsr_service);
111 }
112
113 if (does_metadata)
114 {
115 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
116 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
117 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
118 this.short_service_info.appendChild(dmr_service);
119 }
120
121 if (does_content)
122 {
123 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
124 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
125 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
126 this.short_service_info.appendChild(dcr_service);
127 }
128
129 // look for document display format
130 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
131 Element display_format = (Element) GSXML.getNodeByPath(extra_info, path);
132 if (display_format != null)
133 {
134 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
135 // should we keep a copy?
136 // check for docType option.
137 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
138 if (doc_type_opt != null)
139 {
140 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
141 if (!value.equals(""))
142 {
143 this.default_document_type = value;
144 }
145 }
146 }
147
148 if (macro_resolver != null)
149 {
150 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
151 // set up the macro resolver
152 Element replacement_elem = (Element) GSXML.getChildByTagName(extra_info, "replaceList");
153 if (replacement_elem != null)
154 {
155 macro_resolver.addMacros(replacement_elem);
156 }
157 // look for any refs to global replace lists
158 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
159 for (int i = 0; i < replace_refs_elems.getLength(); i++)
160 {
161 String id = ((Element) replace_refs_elems.item(i)).getAttribute("id");
162 if (!id.equals(""))
163 {
164 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
165 if (replace_list != null)
166 {
167 macro_resolver.addMacros(replace_list);
168 }
169 }
170 }
171 }
172
173 return true;
174 }
175
176 protected Element getServiceDescription(String service_id, String lang, String subset)
177 {
178
179 // these ones are probably never called, but put them here just in case
180 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
181 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
182 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
183 return service_elem;
184 }
185
186 protected Element processDocumentMetadataRetrieve(Element request)
187 {
188
189 // Create a new (empty) result message
190 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
191 String lang = request.getAttribute(GSXML.LANG_ATT);
192 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
193 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
194
195 if (!does_metadata)
196 {
197 // shouldn't get here
198 return result;
199 }
200 // Get the parameters of the request
201 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
202 if (param_list == null)
203 {
204 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
205 return result;
206 }
207
208 boolean external_id = false;
209 // The metadata information required
210 ArrayList metadata_names_list = new ArrayList();
211 boolean all_metadata = false;
212 // Process the request parameters
213 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
214 while (param != null)
215 {
216 // Identify the metadata information desired
217 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata"))
218 {
219 String metadata = GSXML.getValue(param);
220 if (metadata.equals("all"))
221 {
222 all_metadata = true;
223 break;
224 }
225 metadata_names_list.add(metadata);
226 }
227 else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM) && GSXML.getValue(param).equals("1"))
228 {
229 external_id = true;
230 }
231 param = (Element) param.getNextSibling();
232 }
233
234 // check that there has been some metadata specified
235 if (!all_metadata && metadata_names_list.size() == 0)
236 {
237 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
238 return result;
239 }
240
241 // Get the documents
242 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
243 if (request_node_list == null)
244 {
245 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
246 return result;
247 }
248
249 // copy the request doc node list to the response
250 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
251 result.appendChild(response_node_list);
252
253 // use the copied list so that we add the metadata into the copy
254 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
255 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
256 if (request_nodes.getLength() == 0)
257 {
258 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no " + GSXML.DOC_NODE_ELEM + " found in the " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
259 return result;
260 }
261
262 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
263 for (int i = 0; i < request_nodes.getLength(); i++)
264 {
265 Element request_node = (Element) request_nodes.item(i);
266 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
267
268 boolean is_external_link = false;
269 if (!node_id.startsWith("HASH") && !node_id.startsWith("D"))
270 {
271 if (node_id.endsWith(".rt"))
272 {
273 node_id = getHrefOID(node_id.substring(0, node_id.length() - 3));
274 if (node_id != null)
275 {
276 node_id += ".rt";
277 }
278 else
279 {
280 is_external_link = true;
281 }
282 }
283 else
284 {
285 node_id = getHrefOID(node_id);
286 if (node_id == null)
287 {
288 is_external_link = true;
289 }
290 }
291 }
292 if (!is_external_link)
293 {
294 if (external_id)
295 {
296 // can we have .pr etc extensions with external ids?
297 node_id = translateExternalId(node_id);
298 }
299 else if (idNeedsTranslating(node_id))
300 {
301 node_id = translateId(node_id);
302 }
303 }
304
305 if (node_id == null)
306 {
307 continue;
308 }
309 if (!is_external_link)
310 {
311 try
312 {
313 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
314 request_node.appendChild(metadata_list);
315 }
316 catch (GSException e)
317 {
318 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
319 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM))
320 {
321 // there is no point trying any others
322 return result;
323 }
324 }
325 }
326 else
327 {
328 request_node.setAttribute("external_link", request_node.getAttribute(GSXML.NODE_ID_ATT));
329 }
330 }
331 return result;
332 }
333
334 protected Element processDocumentStructureRetrieve(Element request)
335 {
336
337 // Create a new (empty) result message
338 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
339 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
340 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
341
342 if (!does_structure)
343 {
344 // shouldn't get here
345 return result;
346 }
347
348 String lang = request.getAttribute(GSXML.LANG_ATT);
349
350 // Get the parameters of the request
351 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
352 if (param_list == null)
353 {
354 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
355 return result;
356 }
357
358 // get the documents of the request
359 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
360 if (query_doc_list == null)
361 {
362 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
363 return result;
364 }
365
366 // copy the doc_list to the response
367 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
368 result.appendChild(response_node_list);
369
370 // check that we have some doc nodes specified
371 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
372 if (node_list.getLength() == 0)
373 {
374 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no " + GSXML.DOC_NODE_ELEM + " found in the " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
375 return result;
376 }
377
378 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
379 boolean external_id = false;
380 if (extid_param != null && GSXML.getValue(extid_param).equals("1"))
381 {
382 external_id = true;
383 }
384
385 // the type of info required
386 boolean want_structure = false;
387 boolean want_info = false;
388
389 ArrayList info_types = new ArrayList();
390 // The document structure information desired
391 boolean want_ancestors = false;
392 boolean want_parent = false;
393 boolean want_siblings = false;
394 boolean want_children = false;
395 boolean want_descendants = false;
396
397 boolean want_entire_structure = false;
398 // Process the request parameters
399 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
400 for (int i = 0; i < params.getLength(); i++)
401 {
402
403 Element param = (Element) params.item(i);
404 String p_name = param.getAttribute(GSXML.NAME_ATT);
405 String p_value = GSXML.getValue(param);
406 // Identify the structure information desired
407 if (p_name.equals(STRUCT_PARAM))
408 {
409 want_structure = true;
410
411 // This is NOT locale sensitive
412 if (p_value.equals(STRUCT_ANCESTORS))
413 want_ancestors = true;
414 else if (p_value.equals(STRUCT_PARENT))
415 want_parent = true;
416 else if (p_value.equals(STRUCT_SIBS))
417 want_siblings = true;
418 else if (p_value.equals(STRUCT_CHILDREN))
419 want_children = true;
420 else if (p_value.equals(STRUCT_DESCENDS))
421 want_descendants = true;
422 else if (p_value.equals(STRUCT_ENTIRE))
423 want_entire_structure = true;
424 else
425 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
426 }
427 else if (p_name.equals(INFO_PARAM))
428 {
429 want_info = true;
430 info_types.add(p_value);
431 }
432 }
433
434 // Make sure there is no repeated information
435 if (want_ancestors)
436 want_parent = false;
437 if (want_descendants)
438 want_children = false;
439
440 for (int i = 0; i < node_list.getLength(); i++)
441 {
442 Element doc = (Element) node_list.item(i);
443
444 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
445 String is_external = doc.getAttribute("externalURL");
446
447 boolean is_external_link = false;
448 if (is_external.equals("0"))
449 {
450 is_external_link = true;
451 }
452 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link)
453 {
454 if (doc_id.endsWith(".rt"))
455 {
456 doc_id = getHrefOID(doc_id.substring(0, doc_id.length() - 3));
457 if (doc_id != null)
458 {
459 doc_id += ".rt";
460 }
461 else
462 {
463 is_external_link = true;
464 }
465 }
466 else
467 {
468 doc_id = getHrefOID(doc_id);
469 if (doc_id == null)
470 {
471 is_external_link = true;
472 }
473 }
474 }
475 if (!is_external_link)
476 {
477 if (external_id)
478 {
479 doc_id = translateExternalId(doc_id);
480 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
481 }
482 else if (idNeedsTranslating(doc_id))
483 {
484 doc_id = translateId(doc_id);
485 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
486 }
487
488 if (doc_id == null)
489 {
490 continue;
491 }
492
493 if (want_info)
494 {
495 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
496 doc.appendChild(node_info_elem);
497
498 for (int j = 0; j < info_types.size(); j++)
499 {
500 String info_type = (String) info_types.get(j);
501 String info_value = getStructureInfo(doc_id, info_type);
502 if (info_value != null)
503 {
504 Element info_elem = this.doc.createElement("info");
505 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
506 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
507 node_info_elem.appendChild(info_elem);
508 }
509 }
510 }
511
512 if (want_structure)
513 {
514 // all structure info goes into a nodeStructure elem
515 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
516 doc.appendChild(structure_elem);
517
518 if (want_entire_structure)
519 {
520 String root_id = getRootId(doc_id);
521 Element root_node = createDocNode(root_id); //, true, false);
522 addDescendants(root_node, root_id, true);
523 structure_elem.appendChild(root_node);
524 continue; // with the next document, we dont need to do any more here
525 }
526
527 // Add the requested structure information
528 Element base_node = createDocNode(doc_id); //, false, false);
529
530 //Ancestors: continually add parent nodes until the root is reached
531 Element top_node = base_node; // the top node so far
532 if (want_ancestors)
533 {
534 String current_id = doc_id;
535 while (true)
536 {
537 String parent_id = getParentId(current_id);
538 //Element parent = getParent(current_id);
539 if (parent_id == null)
540 break; // no parent
541 Element parent_node = createDocNode(parent_id);
542 parent_node.appendChild(top_node);
543 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
544 top_node = parent_node;
545 }
546 }
547 // Parent: get the parent of the selected node
548 else if (want_parent)
549 {
550 String parent_id = getParentId(doc_id);
551 if (parent_id != null)
552 {
553 Element parent_node = createDocNode(parent_id);
554 parent_node.appendChild(base_node);
555 top_node = parent_node;
556 }
557 }
558
559 // now the top node is the root of the structure
560 structure_elem.appendChild(top_node);
561
562 //Siblings: get the other descendants of the selected node's parent
563 if (want_siblings)
564 {
565 String parent_id = getParentId(doc_id);
566 if (parent_id != null)
567 {
568 // if parent == current id, then we are at the top
569 // and can't get siblings
570 Element parent_node = (Element) base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
571
572 // add siblings, - returns a pointer to the new current node
573 base_node = addSiblings(parent_node, parent_id, doc_id);
574 }
575
576 }
577
578 // Children: get the descendants, but only one level deep
579 if (want_children)
580 {
581 addDescendants(base_node, doc_id, false);
582 }
583 // Descendants: recursively get every descendant
584 else if (want_descendants)
585 {
586 addDescendants(base_node, doc_id, true);
587 }
588 } // if want structure
589
590 }
591 else
592 {
593 Element external_link_elem = this.doc.createElement("external");
594 external_link_elem.setAttribute("external_link", doc.getAttribute(GSXML.NODE_ID_ATT));
595 doc.appendChild(external_link_elem);
596 }// if is_external_link
597 } // for each doc
598 return result;
599 }
600
601 /** Retrieve the content of a document */
602 protected Element processDocumentContentRetrieve(Element request)
603 {
604 // Create a new (empty) result message
605 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
606 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
607 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
608
609 if (!does_content)
610 {
611 // shouldn't get here
612 return result;
613 }
614
615 // Get the parameters of the request
616 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
617 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
618 boolean external_id = false;
619 if (extid_param != null && GSXML.getValue(extid_param).equals("1"))
620 {
621 external_id = true;
622 }
623 // Get the request content
624 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
625 if (query_doc_list == null)
626 {
627 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
628 return result;
629 }
630
631 String lang = request.getAttribute(GSXML.LANG_ATT);
632 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
633 result.appendChild(doc_list);
634
635 // set up the retrieval??
636
637 // Get the documents
638 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list, GSXML.NODE_ID_ATT);
639 String[] is_externals = GSXML.getAttributeValuesFromList(query_doc_list, "externalURL");
640
641 for (int i = 0; i < doc_ids.length; i++)
642 {
643 String doc_id = doc_ids[i];
644 String is_external = is_externals[i];
645
646 boolean is_external_link = false;
647 if (is_external.equals("0"))
648 {
649 is_external_link = true;
650 }
651 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link)
652 {
653 //if (!doc_id.startsWith("HASH")){
654 if (doc_id.endsWith(".rt"))
655 {
656 String find_doc_id = getHrefOID(doc_id.substring(0, doc_id.length() - 3));
657 if (find_doc_id != null)
658 {
659 doc_id = doc_id + ".rt";
660 }
661 else
662 {
663 is_external_link = true;
664 }
665
666 }
667 else
668 {
669 String find_doc_id = getHrefOID(doc_id);
670 if (find_doc_id == null)
671 {
672 is_external_link = true;
673 }
674 else
675 {
676 doc_id = find_doc_id;
677 }
678 }
679 }
680
681 if (!is_external_link)
682 {
683 // Create the document node
684 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
685 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
686 doc_list.appendChild(doc);
687
688 if (external_id)
689 {
690 doc_id = translateExternalId(doc_id);
691 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
692 }
693 else if (idNeedsTranslating(doc_id))
694 {
695 doc_id = translateId(doc_id);
696 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
697 }
698 if (doc_id == null)
699 {
700 continue;
701 }
702 try
703 {
704 Element node_content = getNodeContent(doc_id, lang);
705 doc.appendChild(node_content);
706 }
707 catch (GSException e)
708 {
709 GSXML.addError(this.doc, result, e.getMessage());
710 return result;
711
712 }
713 }
714 else
715 {
716 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
717 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
718 //doc.setAttribute("external_link", doc_id);
719 Element external_link_elem = this.doc.createElement("external");
720 external_link_elem.setAttribute("external_link", doc_id);
721 doc.appendChild(external_link_elem);
722
723 doc_list.appendChild(doc);
724 }
725 }
726 return result;
727 }
728
729 /**
730 * create an element to go into the structure. A node element has the form
731 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
732 */
733 protected Element createDocNode(String node_id)
734 {
735 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
736 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
737
738 String doc_type = null;
739 if (default_document_type != null)
740 {
741 doc_type = default_document_type;
742 }
743 else
744 {
745 doc_type = getDocType(node_id);
746 }
747 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
748 String node_type = getNodeType(node_id, doc_type);
749 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
750 return node;
751 }
752
753 /**
754 * adds all the children of doc_id the the doc element, and if
755 * recursive=true, adds all their children as well
756 */
757 protected void addDescendants(Element doc, String doc_id, boolean recursive)
758 {
759 ArrayList child_ids = getChildrenIds(doc_id);
760 if (child_ids == null)
761 return;
762 for (int i = 0; i < child_ids.size(); i++)
763 {
764 String child_id = (String) child_ids.get(i);
765 Element child_elem = createDocNode(child_id);
766 doc.appendChild(child_elem);
767 if (recursive && (!child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF) || child_elem.getAttribute(GSXML.DOC_TYPE_ATT).equals(GSXML.DOC_TYPE_PAGED)))
768 {
769 addDescendants(child_elem, child_id, recursive);
770 }
771 }
772 }
773
774 /**
775 * adds all the siblings of current_id to the parent element. returns the
776 * new current element
777 */
778 protected Element addSiblings(Element parent_node, String parent_id, String current_id)
779 {
780 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
781 if (current_node == null)
782 {
783 // create a sensible error message
784 logger.error(" there should be a first child.");
785 return null;
786 }
787 // remove the current child,- will add it in later in its correct place
788 parent_node.removeChild(current_node);
789
790 // add in all the siblings,
791 addDescendants(parent_node, parent_id, false);
792
793 // find the node that is now the current node
794 // this assumes that the new node that was created is the same as
795 // the old one that was removed - we may want to replace the new one
796 // with the old one.
797 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
798 return new_current;
799 }
800
801 /**
802 * returns true if oid ends in .fc (firstchild), .lc (lastchild), .pr
803 * (parent), .ns (next sibling), .ps (prev sibling), .rt (root) .ss
804 * (specified sibling), false otherwise
805 */
806 protected boolean idNeedsTranslating(String id)
807 {
808 return OID.needsTranslating(id);
809 }
810
811 /** returns the list of sibling ids, including the specified node_id */
812 protected ArrayList getSiblingIds(String node_id)
813 {
814 String parent_id = getParentId(node_id);
815 if (parent_id == null)
816 {
817 return null;
818 }
819 return getChildrenIds(parent_id);
820
821 }
822
823 /**
824 * returns the node type of the specified node. should be one of
825 * GSXML.NODE_TYPE_LEAF, GSXML.NODE_TYPE_INTERNAL, GSXML.NODE_TYPE_ROOT
826 */
827 protected String getNodeType(String node_id, String doc_type)
828 {
829 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE))
830 {
831 return GSXML.NODE_TYPE_LEAF;
832 }
833
834 if (getParentId(node_id) == null)
835 {
836 return GSXML.NODE_TYPE_ROOT;
837 }
838 if (doc_type.equals(GSXML.DOC_TYPE_PAGED))
839 {
840 return GSXML.NODE_TYPE_LEAF;
841 }
842 if (getChildrenIds(node_id) == null)
843 {
844 return GSXML.NODE_TYPE_LEAF;
845 }
846 return GSXML.NODE_TYPE_INTERNAL;
847
848 }
849
850 /**
851 * if id ends in .fc, .pc etc, then translate it to the correct id default
852 * implementation: just remove the suffix
853 */
854 protected String translateId(String id)
855 {
856 return id.substring(0, id.length());
857 }
858
859 /**
860 * if an id is not a greenstone id (an external id) then translate it to a
861 * greenstone one default implementation: return the id
862 */
863 protected String translateExternalId(String id)
864 {
865 return id;
866 }
867
868 /**
869 * returns the document type of the doc that the specified node belongs to.
870 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
871 * GSXML.DOC_TYPE_HIERARCHY default implementation: return DOC_TYPE_SIMPLE
872 */
873 protected String getDocType(String node_id)
874 {
875 return GSXML.DOC_TYPE_SIMPLE;
876 }
877
878 /**
879 * returns the id of the root node of the document containing node node_id.
880 * may be the same as node_id default implemntation: return node_id
881 */
882 protected String getRootId(String node_id)
883 {
884 return node_id;
885 }
886
887 /**
888 * returns a list of the child ids in order, null if no children default
889 * implementation: return null
890 */
891 protected ArrayList getChildrenIds(String node_id)
892 {
893 return null;
894 }
895
896 /**
897 * returns the node id of the parent node, null if no parent default
898 * implementation: return null
899 */
900 protected String getParentId(String node_id)
901 {
902 return null;
903 }
904
905 /**
906 * get the metadata for the doc node doc_id returns a metadataList element:
907 * <metadataList><metadata name="xxx">value</metadata></metadataList>
908 */
909 abstract protected Element getMetadataList(String doc_id, boolean all_metadata, ArrayList metadata_names) throws GSException;
910
911 /**
912 * returns the content of a node should return a nodeContent element:
913 * <nodeContent>text content or other elements</nodeContent> can return
914 */
915 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
916
917 /**
918 * returns the structural information asked for. info_type may be one of
919 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
920 */
921 abstract protected String getStructureInfo(String doc_id, String info_type);
922
923 protected String getHrefOID(String href_url)
924 {
925 return null;
926 }
927
928}
Note: See TracBrowser for help on using the repository browser.