source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 24980

Last change on this file since 24980 was 24980, checked in by sjm84, 12 years ago

Reformatting this file ahead of some changes

  • Property svn:keywords set to Author Date Id Revision
File size: 27.6 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24import org.greenstone.util.GlobalProperties;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GSConstants;
31
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44import org.apache.log4j.*;
45
46/**
47 * Abstract class for Document Retrieval Services
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 */
51
52public abstract class AbstractDocumentRetrieve extends ServiceRack
53{
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
56
57 // the services on offer
58 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
59 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
60 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
61
62 protected static final String STRUCT_PARAM = "structure";
63 protected static final String INFO_PARAM = "info";
64
65 protected static final String STRUCT_ANCESTORS = "ancestors";
66 protected static final String STRUCT_PARENT = "parent";
67 protected static final String STRUCT_SIBS = "siblings";
68 protected static final String STRUCT_CHILDREN = "children";
69 protected static final String STRUCT_DESCENDS = "descendants";
70 protected static final String STRUCT_ENTIRE = "entire";
71
72 protected static final String INFO_NUM_SIBS = "numSiblings";
73 protected static final String INFO_NUM_CHILDREN = "numChildren";
74 protected static final String INFO_SIB_POS = "siblingPosition";
75
76 // means the id is not a greenstone id and needs translating
77 protected static final String EXTID_PARAM = "ext";
78
79 protected Element config_info = null; // the xml from the config file
80
81 protected String default_document_type = null;
82 protected MacroResolver macro_resolver = null;
83
84 /** does this class provide the service?? */
85 protected boolean does_metadata = true;
86 protected boolean does_content = true;
87 protected boolean does_structure = true;
88
89 /** constructor */
90 public AbstractDocumentRetrieve()
91 {
92 }
93
94 /** configure this service */
95 public boolean configure(Element info, Element extra_info)
96 {
97 if (!super.configure(info, extra_info))
98 {
99 return false;
100 }
101
102 logger.info("Configuring AbstractDocumentRetrieve...");
103 this.config_info = info;
104
105 // set up short_service_info_ - for now just has name and type
106 if (does_structure)
107 {
108 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
109 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
110 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
111 this.short_service_info.appendChild(dsr_service);
112 }
113
114 if (does_metadata)
115 {
116 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
117 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
118 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
119 this.short_service_info.appendChild(dmr_service);
120 }
121
122 if (does_content)
123 {
124 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
125 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
126 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
127 this.short_service_info.appendChild(dcr_service);
128 }
129
130 // look for document display format
131 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
132 Element display_format = (Element) GSXML.getNodeByPath(extra_info, path);
133 if (display_format != null)
134 {
135 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
136 // should we keep a copy?
137 // check for docType option.
138 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
139 if (doc_type_opt != null)
140 {
141 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
142 if (!value.equals(""))
143 {
144 this.default_document_type = value;
145 }
146 }
147 }
148
149 if (macro_resolver != null)
150 {
151 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
152 // set up the macro resolver
153 Element replacement_elem = (Element) GSXML.getChildByTagName(extra_info, "replaceList");
154 if (replacement_elem != null)
155 {
156 macro_resolver.addMacros(replacement_elem);
157 }
158 // look for any refs to global replace lists
159 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
160 for (int i = 0; i < replace_refs_elems.getLength(); i++)
161 {
162 String id = ((Element) replace_refs_elems.item(i)).getAttribute("id");
163 if (!id.equals(""))
164 {
165 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
166 if (replace_list != null)
167 {
168 macro_resolver.addMacros(replace_list);
169 }
170 }
171 }
172 }
173
174 return true;
175 }
176
177 protected Element getServiceDescription(String service_id, String lang, String subset)
178 {
179
180 // these ones are probably never called, but put them here just in case
181 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
182 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
183 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
184 return service_elem;
185 }
186
187 protected Element processDocumentMetadataRetrieve(Element request)
188 {
189
190 // Create a new (empty) result message
191 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
192 String lang = request.getAttribute(GSXML.LANG_ATT);
193 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
194 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
195
196 if (!does_metadata)
197 {
198 // shouldn't get here
199 return result;
200 }
201 // Get the parameters of the request
202 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
203 if (param_list == null)
204 {
205 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
206 return result;
207 }
208
209 boolean external_id = false;
210 // The metadata information required
211 ArrayList metadata_names_list = new ArrayList();
212 boolean all_metadata = false;
213 // Process the request parameters
214 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
215 while (param != null)
216 {
217 // Identify the metadata information desired
218 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata"))
219 {
220 String metadata = GSXML.getValue(param);
221 if (metadata.equals("all"))
222 {
223 all_metadata = true;
224 break;
225 }
226 metadata_names_list.add(metadata);
227 }
228 else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM) && GSXML.getValue(param).equals("1"))
229 {
230 external_id = true;
231 }
232 param = (Element) param.getNextSibling();
233 }
234
235 // check that there has been some metadata specified
236 if (!all_metadata && metadata_names_list.size() == 0)
237 {
238 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
239 return result;
240 }
241
242 // Get the documents
243 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
244 if (request_node_list == null)
245 {
246 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
247 return result;
248 }
249
250 // copy the request doc node list to the response
251 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
252 result.appendChild(response_node_list);
253
254 // use the copied list so that we add the metadata into the copy
255 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
256 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
257 if (request_nodes.getLength() == 0)
258 {
259 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no " + GSXML.DOC_NODE_ELEM + " found in the " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
260 return result;
261 }
262
263 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
264 for (int i = 0; i < request_nodes.getLength(); i++)
265 {
266 Element request_node = (Element) request_nodes.item(i);
267 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
268
269 boolean is_external_link = false;
270 if (!node_id.startsWith("HASH") && !node_id.startsWith("D"))
271 {
272 if (node_id.endsWith(".rt"))
273 {
274 node_id = getHrefOID(node_id.substring(0, node_id.length() - 3));
275 if (node_id != null)
276 {
277 node_id += ".rt";
278 }
279 else
280 {
281 is_external_link = true;
282 }
283 }
284 else
285 {
286 node_id = getHrefOID(node_id);
287 if (node_id == null)
288 {
289 is_external_link = true;
290 }
291 }
292 }
293 if (!is_external_link)
294 {
295 if (external_id)
296 {
297 // can we have .pr etc extensions with external ids?
298 node_id = translateExternalId(node_id);
299 }
300 else if (idNeedsTranslating(node_id))
301 {
302 node_id = translateId(node_id);
303 }
304 }
305
306 if (node_id == null)
307 {
308 continue;
309 }
310 if (!is_external_link)
311 {
312 try
313 {
314 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
315 request_node.appendChild(metadata_list);
316 }
317 catch (GSException e)
318 {
319 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
320 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM))
321 {
322 // there is no point trying any others
323 return result;
324 }
325 }
326 }
327 else
328 {
329 request_node.setAttribute("external_link", request_node.getAttribute(GSXML.NODE_ID_ATT));
330 }
331 }
332 return result;
333 }
334
335 protected Element processDocumentStructureRetrieve(Element request)
336 {
337
338 // Create a new (empty) result message
339 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
340 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
341 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
342
343 if (!does_structure)
344 {
345 // shouldn't get here
346 return result;
347 }
348
349 String lang = request.getAttribute(GSXML.LANG_ATT);
350
351 // Get the parameters of the request
352 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
353 if (param_list == null)
354 {
355 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " + GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
356 return result;
357 }
358
359 // get the documents of the request
360 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
361 if (query_doc_list == null)
362 {
363 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
364 return result;
365 }
366
367 // copy the doc_list to the response
368 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
369 result.appendChild(response_node_list);
370
371 // check that we have some doc nodes specified
372 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
373 if (node_list.getLength() == 0)
374 {
375 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no " + GSXML.DOC_NODE_ELEM + " found in the " + GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
376 return result;
377 }
378
379 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
380 boolean external_id = false;
381 if (extid_param != null && GSXML.getValue(extid_param).equals("1"))
382 {
383 external_id = true;
384 }
385
386 // the type of info required
387 boolean want_structure = false;
388 boolean want_info = false;
389
390 ArrayList info_types = new ArrayList();
391 // The document structure information desired
392 boolean want_ancestors = false;
393 boolean want_parent = false;
394 boolean want_siblings = false;
395 boolean want_children = false;
396 boolean want_descendants = false;
397
398 boolean want_entire_structure = false;
399 // Process the request parameters
400 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
401 for (int i = 0; i < params.getLength(); i++)
402 {
403
404 Element param = (Element) params.item(i);
405 String p_name = param.getAttribute(GSXML.NAME_ATT);
406 String p_value = GSXML.getValue(param);
407 // Identify the structure information desired
408 if (p_name.equals(STRUCT_PARAM))
409 {
410 want_structure = true;
411
412 // This is NOT locale sensitive
413 if (p_value.equals(STRUCT_ANCESTORS))
414 want_ancestors = true;
415 else if (p_value.equals(STRUCT_PARENT))
416 want_parent = true;
417 else if (p_value.equals(STRUCT_SIBS))
418 want_siblings = true;
419 else if (p_value.equals(STRUCT_CHILDREN))
420 want_children = true;
421 else if (p_value.equals(STRUCT_DESCENDS))
422 want_descendants = true;
423 else if (p_value.equals(STRUCT_ENTIRE))
424 want_entire_structure = true;
425 else
426 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
427 }
428 else if (p_name.equals(INFO_PARAM))
429 {
430 want_info = true;
431 info_types.add(p_value);
432 }
433 }
434
435 // Make sure there is no repeated information
436 if (want_ancestors)
437 want_parent = false;
438 if (want_descendants)
439 want_children = false;
440
441 for (int i = 0; i < node_list.getLength(); i++)
442 {
443 Element doc = (Element) node_list.item(i);
444 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
445 String is_external = doc.getAttribute("externalURL");
446
447 boolean is_external_link = false;
448 if (is_external.equals("0"))
449 {
450 is_external_link = true;
451 }
452 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link)
453 {
454 if (doc_id.endsWith(".rt"))
455 {
456 doc_id = getHrefOID(doc_id.substring(0, doc_id.length() - 3));
457 if (doc_id != null)
458 {
459 doc_id += ".rt";
460 }
461 else
462 {
463 is_external_link = true;
464 }
465 }
466 else
467 {
468 doc_id = getHrefOID(doc_id);
469 if (doc_id == null)
470 {
471 is_external_link = true;
472 }
473 }
474 }
475 if (!is_external_link)
476 {
477 if (external_id)
478 {
479 doc_id = translateExternalId(doc_id);
480 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
481 }
482 else if (idNeedsTranslating(doc_id))
483 {
484 doc_id = translateId(doc_id);
485 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
486 }
487
488 if (doc_id == null)
489 {
490 continue;
491 }
492
493 if (want_info)
494 {
495
496 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
497 doc.appendChild(node_info_elem);
498
499 for (int j = 0; j < info_types.size(); j++)
500 {
501 String info_type = (String) info_types.get(j);
502 String info_value = getStructureInfo(doc_id, info_type);
503 if (info_value != null)
504 {
505 Element info_elem = this.doc.createElement("info");
506 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
507 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
508 node_info_elem.appendChild(info_elem);
509 }
510 }
511 }
512
513 if (want_structure)
514 {
515 // all structure info goes into a nodeStructure elem
516 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
517 doc.appendChild(structure_elem);
518
519 if (want_entire_structure)
520 {
521 String root_id = getRootId(doc_id);
522 Element root_node = createDocNode(root_id); //, true, false);
523 addDescendants(root_node, root_id, true);
524 structure_elem.appendChild(root_node);
525 continue; // with the next document, we dont need to do any more here
526 }
527
528 // Add the requested structure information
529 Element base_node = createDocNode(doc_id); //, false, false);
530
531 //Ancestors: continually add parent nodes until the root is reached
532 Element top_node = base_node; // the top node so far
533 if (want_ancestors)
534 {
535 String current_id = doc_id;
536 while (true)
537 {
538 String parent_id = getParentId(current_id);
539 //Element parent = getParent(current_id);
540 if (parent_id == null)
541 break; // no parent
542 Element parent_node = createDocNode(parent_id);
543 parent_node.appendChild(top_node);
544 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
545 top_node = parent_node;
546 }
547 }
548 // Parent: get the parent of the selected node
549 else if (want_parent)
550 {
551 String parent_id = getParentId(doc_id);
552 if (parent_id != null)
553 {
554 Element parent_node = createDocNode(parent_id);
555 parent_node.appendChild(base_node);
556 top_node = parent_node;
557 }
558 }
559
560 // now the top node is the root of the structure
561 structure_elem.appendChild(top_node);
562
563 //Siblings: get the other descendants of the selected node's parent
564 if (want_siblings)
565 {
566 String parent_id = getParentId(doc_id);
567 if (parent_id != null)
568 {
569 // if parent == current id, then we are at the top
570 // and can't get siblings
571 Element parent_node = (Element) base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
572
573 // add siblings, - returns a pointer to the new current node
574 base_node = addSiblings(parent_node, parent_id, doc_id);
575 }
576
577 }
578
579 // Children: get the descendants, but only one level deep
580 if (want_children)
581 {
582 addDescendants(base_node, doc_id, false);
583 }
584 // Descendants: recursively get every descendant
585 else if (want_descendants)
586 {
587 addDescendants(base_node, doc_id, true);
588 }
589 } // if want structure
590
591 }
592 else
593 {
594 Element external_link_elem = this.doc.createElement("external");
595 external_link_elem.setAttribute("external_link", doc.getAttribute(GSXML.NODE_ID_ATT));
596 doc.appendChild(external_link_elem);
597 }// if is_external_link
598 } // for each doc
599 return result;
600 }
601
602 /** Retrieve the content of a document */
603 protected Element processDocumentContentRetrieve(Element request)
604 {
605 // Create a new (empty) result message
606 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
607 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
608 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
609
610 if (!does_content)
611 {
612 // shouldn't get here
613 return result;
614 }
615
616 // Get the parameters of the request
617 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
618 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
619 boolean external_id = false;
620 if (extid_param != null && GSXML.getValue(extid_param).equals("1"))
621 {
622 external_id = true;
623 }
624 // Get the request content
625 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
626 if (query_doc_list == null)
627 {
628 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
629 return result;
630 }
631
632 String lang = request.getAttribute(GSXML.LANG_ATT);
633 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
634 result.appendChild(doc_list);
635
636 // set up the retrieval??
637
638 // Get the documents
639 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list, GSXML.NODE_ID_ATT);
640 String[] is_externals = GSXML.getAttributeValuesFromList(query_doc_list, "externalURL");
641
642 for (int i = 0; i < doc_ids.length; i++)
643 {
644 String doc_id = doc_ids[i];
645 String is_external = is_externals[i];
646
647 boolean is_external_link = false;
648 if (is_external.equals("0"))
649 {
650 is_external_link = true;
651 }
652 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link)
653 {
654 //if (!doc_id.startsWith("HASH")){
655 if (doc_id.endsWith(".rt"))
656 {
657 String find_doc_id = getHrefOID(doc_id.substring(0, doc_id.length() - 3));
658 if (find_doc_id != null)
659 {
660 doc_id = doc_id + ".rt";
661 }
662 else
663 {
664 is_external_link = true;
665 }
666
667 }
668 else
669 {
670 String find_doc_id = getHrefOID(doc_id);
671 if (find_doc_id == null)
672 {
673 is_external_link = true;
674 }
675 else
676 {
677 doc_id = find_doc_id;
678 }
679 }
680 }
681
682 if (!is_external_link)
683 {
684 // Create the document node
685 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
686 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
687 doc_list.appendChild(doc);
688
689 if (external_id)
690 {
691 doc_id = translateExternalId(doc_id);
692 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
693 }
694 else if (idNeedsTranslating(doc_id))
695 {
696 doc_id = translateId(doc_id);
697 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
698 }
699 if (doc_id == null)
700 {
701 continue;
702 }
703 try
704 {
705 Element node_content = getNodeContent(doc_id, lang);
706 doc.appendChild(node_content);
707 }
708 catch (GSException e)
709 {
710 GSXML.addError(this.doc, result, e.getMessage());
711 return result;
712
713 }
714 }
715 else
716 {
717 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
718 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
719 //doc.setAttribute("external_link", doc_id);
720 Element external_link_elem = this.doc.createElement("external");
721 external_link_elem.setAttribute("external_link", doc_id);
722 doc.appendChild(external_link_elem);
723
724 doc_list.appendChild(doc);
725 }
726 }
727 return result;
728 }
729
730 /**
731 * create an element to go into the structure. A node element has the form
732 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
733 */
734 protected Element createDocNode(String node_id)
735 {
736 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
737 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
738
739 String doc_type = null;
740 if (default_document_type != null)
741 {
742 doc_type = default_document_type;
743 }
744 else
745 {
746 doc_type = getDocType(node_id);
747 }
748 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
749 String node_type = getNodeType(node_id, doc_type);
750 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
751 return node;
752 }
753
754 /**
755 * adds all the children of doc_id the the doc element, and if
756 * recursive=true, adds all their children as well
757 */
758 protected void addDescendants(Element doc, String doc_id, boolean recursive)
759 {
760 ArrayList child_ids = getChildrenIds(doc_id);
761 if (child_ids == null)
762 return;
763 for (int i = 0; i < child_ids.size(); i++)
764 {
765 String child_id = (String) child_ids.get(i);
766 Element child_elem = createDocNode(child_id);
767 doc.appendChild(child_elem);
768 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF))
769 {
770 addDescendants(child_elem, child_id, recursive);
771 }
772 }
773 }
774
775 /**
776 * adds all the siblings of current_id to the parent element. returns the
777 * new current element
778 */
779 protected Element addSiblings(Element parent_node, String parent_id, String current_id)
780 {
781 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
782 if (current_node == null)
783 {
784 // create a sensible error message
785 logger.error(" there should be a first child.");
786 return null;
787 }
788 // remove the current child,- will add it in later in its correct place
789 parent_node.removeChild(current_node);
790
791 // add in all the siblings,
792 addDescendants(parent_node, parent_id, false);
793
794 // find the node that is now the current node
795 // this assumes that the new node that was created is the same as
796 // the old one that was removed - we may want to replace the new one
797 // with the old one.
798 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
799 return new_current;
800 }
801
802 /**
803 * returns true if oid ends in .fc (firstchild), .lc (lastchild), .pr
804 * (parent), .ns (next sibling), .ps (prev sibling), .rt (root) .ss
805 * (specified sibling), false otherwise
806 */
807 protected boolean idNeedsTranslating(String id)
808 {
809 return OID.needsTranslating(id);
810 }
811
812 /** returns the list of sibling ids, including the specified node_id */
813 protected ArrayList getSiblingIds(String node_id)
814 {
815 String parent_id = getParentId(node_id);
816 if (parent_id == null)
817 {
818 return null;
819 }
820 return getChildrenIds(parent_id);
821
822 }
823
824 /**
825 * returns the node type of the specified node. should be one of
826 * GSXML.NODE_TYPE_LEAF, GSXML.NODE_TYPE_INTERNAL, GSXML.NODE_TYPE_ROOT
827 */
828 protected String getNodeType(String node_id, String doc_type)
829 {
830 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE))
831 {
832 return GSXML.NODE_TYPE_LEAF;
833 }
834
835 if (getParentId(node_id) == null)
836 {
837 return GSXML.NODE_TYPE_ROOT;
838 }
839 if (doc_type.equals(GSXML.DOC_TYPE_PAGED))
840 {
841 return GSXML.NODE_TYPE_LEAF;
842 }
843 if (getChildrenIds(node_id) == null)
844 {
845 return GSXML.NODE_TYPE_LEAF;
846 }
847 return GSXML.NODE_TYPE_INTERNAL;
848
849 }
850
851 /**
852 * if id ends in .fc, .pc etc, then translate it to the correct id default
853 * implementation: just remove the suffix
854 */
855 protected String translateId(String id)
856 {
857 return id.substring(0, id.length());
858 }
859
860 /**
861 * if an id is not a greenstone id (an external id) then translate it to a
862 * greenstone one default implementation: return the id
863 */
864 protected String translateExternalId(String id)
865 {
866 return id;
867 }
868
869 /**
870 * returns the document type of the doc that the specified node belongs to.
871 * should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED,
872 * GSXML.DOC_TYPE_HIERARCHY default implementation: return DOC_TYPE_SIMPLE
873 */
874 protected String getDocType(String node_id)
875 {
876 return GSXML.DOC_TYPE_SIMPLE;
877 }
878
879 /**
880 * returns the id of the root node of the document containing node node_id.
881 * may be the same as node_id default implemntation: return node_id
882 */
883 protected String getRootId(String node_id)
884 {
885 return node_id;
886 }
887
888 /**
889 * returns a list of the child ids in order, null if no children default
890 * implementation: return null
891 */
892 protected ArrayList getChildrenIds(String node_id)
893 {
894 return null;
895 }
896
897 /**
898 * returns the node id of the parent node, null if no parent default
899 * implementation: return null
900 */
901 protected String getParentId(String node_id)
902 {
903 return null;
904 }
905
906 /**
907 * get the metadata for the doc node doc_id returns a metadataList element:
908 * <metadataList><metadata name="xxx">value</metadata></metadataList>
909 */
910 abstract protected Element getMetadataList(String doc_id, boolean all_metadata, ArrayList metadata_names) throws GSException;
911
912 /**
913 * returns the content of a node should return a nodeContent element:
914 * <nodeContent>text content or other elements</nodeContent> can return
915 */
916 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
917
918 /**
919 * returns the structural information asked for. info_type may be one of
920 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
921 */
922 abstract protected String getStructureInfo(String doc_id, String info_type);
923
924 protected String getHrefOID(String href_url)
925 {
926 return null;
927 }
928
929}
Note: See TracBrowser for help on using the repository browser.