source: greenstone3/branches/customizingGreenstone3/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 15787

Last change on this file since 15787 was 15787, checked in by oranfry, 16 years ago

updating from trunk: brought in trunk changes from r15191 to r15785

  • Property svn:keywords set to Author Date Id Revision
File size: 27.3 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24import org.greenstone.gsdl3.core.GSException;
25import org.greenstone.gsdl3.util.GSXML;
26import org.greenstone.gsdl3.util.GSPath;
27import org.greenstone.gsdl3.util.MacroResolver;
28import org.greenstone.gsdl3.util.OID;
29import org.greenstone.gsdl3.util.GlobalProperties;
30import org.greenstone.gsdl3.util.GSConstants;
31
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44
45import org.apache.log4j.*;
46
47/** Abstract class for Document Retrieval Services
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 */
51
52public abstract class AbstractDocumentRetrieve
53 extends ServiceRack {
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
56
57 // the services on offer
58 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
59 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
60 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
61
62 protected static final String STRUCT_PARAM = "structure";
63 protected static final String INFO_PARAM = "info";
64
65 protected static final String STRUCT_ANCESTORS = "ancestors";
66 protected static final String STRUCT_PARENT = "parent";
67 protected static final String STRUCT_SIBS = "siblings";
68 protected static final String STRUCT_CHILDREN = "children";
69 protected static final String STRUCT_DESCENDS = "descendants";
70 protected static final String STRUCT_ENTIRE = "entire";
71
72 protected static final String INFO_NUM_SIBS = "numSiblings";
73 protected static final String INFO_NUM_CHILDREN = "numChildren";
74 protected static final String INFO_SIB_POS = "siblingPosition";
75
76 // means the id is not a greenstone id and needs translating
77 protected static final String EXTID_PARAM = "ext";
78
79 protected Element config_info = null; // the xml from the config file
80
81 protected String default_document_type = null;
82 protected MacroResolver macro_resolver = null;
83
84 /** does this class provide the service?? */
85 protected boolean does_metadata = true;
86 protected boolean does_content = true;
87 protected boolean does_structure = true;
88
89 /** constructor */
90 public AbstractDocumentRetrieve()
91 {
92 }
93
94 /** configure this service */
95 public boolean configure(Element info, Element extra_info)
96 {
97 if (!super.configure(info, extra_info)){
98 return false;
99 }
100
101 logger.info("Configuring AbstractDocumentRetrieve...");
102 this.config_info = info;
103
104 // set up short_service_info_ - for now just has name and type
105 if (does_structure) {
106 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
107 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
108 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
109 this.short_service_info.appendChild(dsr_service);
110 }
111
112 if (does_metadata) {
113 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
114 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
115 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
116 this.short_service_info.appendChild(dmr_service);
117 }
118
119 if (does_content) {
120 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
121 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
122 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
123 this.short_service_info.appendChild(dcr_service);
124 }
125
126 // look for document display format
127 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
128 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
129 if (display_format != null) {
130 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
131 // should we keep a copy?
132 // check for docType option.
133 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
134 if (doc_type_opt != null) {
135 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
136 if (!value.equals("")) {
137 this.default_document_type = value;
138 }
139 }
140 }
141
142 if (macro_resolver != null) {
143 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
144 // set up the macro resolver
145 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
146 if (replacement_elem != null) {
147 macro_resolver.addMacros(replacement_elem);
148 }
149 // look for any refs to global replace lists
150 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
151 for (int i=0; i<replace_refs_elems.getLength(); i++) {
152 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
153 if (!id.equals("")) {
154 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
155 if (replace_list != null) {
156 macro_resolver.addMacros(replace_list);
157 }
158 }
159 }
160 }
161
162 return true;
163 }
164
165 protected Element getServiceDescription(String service_id, String lang, String subset) {
166
167 // these ones are probably never called, but put them here just in case
168 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
169 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
170 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
171 return service_elem;
172 }
173
174 protected Element processDocumentMetadataRetrieve(Element request) {
175
176 // Create a new (empty) result message
177 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
178 String lang = request.getAttribute(GSXML.LANG_ATT);
179 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
180 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
181
182 if (!does_metadata) {
183 // shouldn't get here
184 return result;
185 }
186 // Get the parameters of the request
187 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
188 if (param_list == null) {
189 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
190 return result;
191 }
192
193 boolean external_id = false;
194 // The metadata information required
195 ArrayList metadata_names_list = new ArrayList();
196 boolean all_metadata = false;
197 // Process the request parameters
198 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
199 while (param != null) {
200 // Identify the metadata information desired
201 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
202 String metadata = GSXML.getValue(param);
203 if (metadata.equals("all")) {
204 all_metadata = true;
205 break;
206 }
207 metadata_names_list.add(metadata);
208 } else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM)&& GSXML.getValue(param).equals("1")) {
209 external_id = true;
210 }
211 param = (Element) param.getNextSibling();
212 }
213
214 // check that there has been some metadata specified
215 if (!all_metadata && metadata_names_list.size()==0) {
216 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
217 return result;
218 }
219
220 // Get the documents
221 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
222 if (request_node_list == null) {
223 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
224 return result;
225 }
226
227 // copy the request doc node list to the response
228 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
229 result.appendChild(response_node_list);
230
231 // use the copied list so that we add the metadata into the copy
232 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
233 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
234 if (request_nodes.getLength()==0) {
235 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
236 return result;
237 }
238
239 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
240 for (int i = 0; i < request_nodes.getLength(); i++) {
241 Element request_node = (Element) request_nodes.item(i);
242 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
243
244 boolean is_external_link = false;
245 if (!node_id.startsWith("HASH")){
246 if (node_id.endsWith(".rt")){
247 node_id = getHrefOID(node_id.substring(0,node_id.length()-3));
248 if (node_id!=null){
249 node_id += ".rt";
250 }else{
251 is_external_link = true;
252 }
253 }else{
254 node_id = getHrefOID(node_id);
255 if (node_id==null){
256 is_external_link = true;
257 }
258 }
259 }
260 if (!is_external_link){
261 if (external_id) {
262 // can we have .pr etc extensions with external ids?
263 node_id = translateExternalId(node_id);
264 } else if (idNeedsTranslating(node_id)) {
265 node_id = translateId(node_id);
266 }
267 }
268
269 if (node_id == null) {
270 continue;
271 }
272 if (!is_external_link){
273 try {
274 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
275 request_node.appendChild(metadata_list);
276 } catch (GSException e) {
277 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
278 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM)) {
279 // there is no point trying any others
280 return result;
281 }
282 }
283 }else{
284 request_node.setAttribute("external_link",request_node.getAttribute(GSXML.NODE_ID_ATT));
285 }
286 }
287
288 return result;
289 }
290
291 protected Element processDocumentStructureRetrieve(Element request) {
292
293 // Create a new (empty) result message
294 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
295 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
296 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
297
298 if (!does_structure) {
299 // shouldn't get here
300 return result;
301 }
302
303 String lang = request.getAttribute(GSXML.LANG_ATT);
304
305 // Get the parameters of the request
306 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
307 if (param_list == null) {
308 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
309 return result;
310 }
311
312 // get the documents of the request
313 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
314 if (query_doc_list == null) {
315 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
316 return result;
317 }
318
319 // copy the doc_list to the response
320 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
321 result.appendChild(response_node_list);
322
323 // check that we have some doc nodes specified
324 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
325 if (node_list.getLength()==0) {
326 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
327 return result;
328 }
329
330 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
331 boolean external_id = false;
332 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
333 external_id = true;
334 }
335
336 // the type of info required
337 boolean want_structure = false;
338 boolean want_info = false;
339
340 ArrayList info_types=new ArrayList();
341 // The document structure information desired
342 boolean want_ancestors = false;
343 boolean want_parent = false;
344 boolean want_siblings = false;
345 boolean want_children = false;
346 boolean want_descendants = false;
347
348 boolean want_entire_structure = false;
349 // Process the request parameters
350 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
351 for (int i=0; i<params.getLength();i++) {
352
353 Element param = (Element)params.item(i);
354 String p_name = param.getAttribute(GSXML.NAME_ATT);
355 String p_value = GSXML.getValue(param);
356 // Identify the structure information desired
357 if (p_name.equals(STRUCT_PARAM)) {
358 want_structure = true;
359
360 // This is NOT locale sensitive
361 if (p_value.equals(STRUCT_ANCESTORS))
362 want_ancestors = true;
363 else if (p_value.equals(STRUCT_PARENT))
364 want_parent = true;
365 else if (p_value.equals(STRUCT_SIBS))
366 want_siblings = true;
367 else if (p_value.equals(STRUCT_CHILDREN))
368 want_children = true;
369 else if (p_value.equals(STRUCT_DESCENDS))
370 want_descendants = true;
371 else if (p_value.equals(STRUCT_ENTIRE))
372 want_entire_structure = true;
373 else
374 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
375 } else if (p_name.equals(INFO_PARAM)) {
376 want_info = true;
377 info_types.add(p_value);
378 }
379 }
380
381 // Make sure there is no repeated information
382 if (want_ancestors)
383 want_parent = false;
384 if (want_descendants)
385 want_children = false;
386
387 for (int i=0; i < node_list.getLength(); i++) {
388 Element doc = (Element) node_list.item(i);
389 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
390 String is_external=doc.getAttribute("externalURL");
391
392 boolean is_external_link = false;
393 if (is_external.equals("0")) {is_external_link = true;}
394 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
395 if (doc_id.endsWith(".rt")){
396 doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
397 if (doc_id!=null){
398 doc_id += ".rt";
399 }else{
400 is_external_link = true;
401 }
402 }else{
403 doc_id = getHrefOID(doc_id);
404 if (doc_id==null){ is_external_link = true;}
405 }
406 }
407 if (!is_external_link){
408 if (external_id) {
409 doc_id = translateExternalId(doc_id);
410 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
411 } else if (idNeedsTranslating(doc_id)) {
412 doc_id = translateId(doc_id);
413 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
414 }
415
416 if (doc_id == null) {
417 continue;
418 }
419
420 if (want_info) {
421
422 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
423 doc.appendChild(node_info_elem);
424
425 for (int j=0; j<info_types.size(); j++) {
426 String info_type = (String)info_types.get(j);
427 String info_value = getStructureInfo(doc_id, info_type);
428 if (info_value != null) {
429 Element info_elem = this.doc.createElement("info");
430 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
431 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
432 node_info_elem.appendChild(info_elem);
433 }
434 }
435 }
436
437 if (want_structure) {
438 // all structure info goes into a nodeStructure elem
439 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
440 doc.appendChild(structure_elem);
441
442 if (want_entire_structure) {
443 String root_id = getRootId(doc_id);
444 Element root_node = createDocNode(root_id); //, true, false);
445 addDescendants(root_node, root_id, true);
446 structure_elem.appendChild(root_node);
447 continue; // with the next document, we dont need to do any more here
448 }
449
450 // Add the requested structure information
451 Element base_node = createDocNode(doc_id); //, false, false);
452
453 //Ancestors: continually add parent nodes until the root is reached
454 Element top_node = base_node; // the top node so far
455 if (want_ancestors) {
456 String current_id = doc_id;
457 while (true) {
458 String parent_id = getParentId(current_id);
459 //Element parent = getParent(current_id);
460 if (parent_id == null)
461 break; // no parent
462 Element parent_node = createDocNode(parent_id);
463 parent_node.appendChild(top_node);
464 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
465 top_node = parent_node;
466 }
467 }
468 // Parent: get the parent of the selected node
469 else if (want_parent) {
470 String parent_id = getParentId(doc_id);
471 if (parent_id != null) {
472 Element parent_node = createDocNode(parent_id);
473 parent_node.appendChild(base_node);
474 top_node = parent_node;
475 }
476 }
477
478 // now the top node is the root of the structure
479 structure_elem.appendChild(top_node);
480
481 //Siblings: get the other descendants of the selected node's parent
482 if (want_siblings) {
483 String parent_id = getParentId(doc_id);
484 if (parent_id != null) {
485 // if parent == current id, then we are at the top
486 // and can't get siblings
487 Element parent_node = (Element)base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
488
489 // add siblings, - returns a pointer to the new current node
490 base_node = addSiblings(parent_node, parent_id, doc_id);
491 }
492
493 }
494
495 // Children: get the descendants, but only one level deep
496 if (want_children) {
497 addDescendants(base_node, doc_id, false);
498 }
499 // Descendants: recursively get every descendant
500 else if (want_descendants) {
501 addDescendants(base_node, doc_id, true);
502 }
503 } // if want structure
504
505 }else{
506 Element external_link_elem = this.doc.createElement("external");
507 external_link_elem.setAttribute("external_link",doc.getAttribute(GSXML.NODE_ID_ATT));
508 doc.appendChild(external_link_elem);
509 }// if is_external_link
510 } // for each doc
511 return result;
512 }
513
514 /** Retrieve the content of a document */
515 protected Element processDocumentContentRetrieve(Element request)
516 {
517 // Create a new (empty) result message
518 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
519 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
520 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
521
522 if (!does_content) {
523 // shouldn't get here
524 return result;
525 }
526
527 // Get the parameters of the request
528 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
529 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
530 boolean external_id = false;
531 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
532 external_id = true;
533 }
534 // Get the request content
535 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
536 if (query_doc_list == null) {
537 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
538 return result;
539 }
540
541 String lang = request.getAttribute(GSXML.LANG_ATT);
542 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
543 result.appendChild(doc_list);
544
545 // set up the retrieval??
546
547 // Get the documents
548 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
549 GSXML.NODE_ID_ATT);
550 String[] is_externals=GSXML.getAttributeValuesFromList(query_doc_list,"externalURL");
551
552 for (int i = 0; i < doc_ids.length; i++) {
553 String doc_id = doc_ids[i];
554 String is_external=is_externals[i];
555
556 boolean is_external_link=false;
557 if (is_external.equals("0")){is_external_link = true;}
558 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
559 //if (!doc_id.startsWith("HASH")){
560 if (doc_id.endsWith(".rt")){
561 String find_doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
562 if (find_doc_id!=null){
563 doc_id = doc_id + ".rt";
564 }else{
565 is_external_link=true;
566 }
567
568 }else {
569 String find_doc_id = getHrefOID(doc_id);
570 if (find_doc_id==null){
571 is_external_link=true;
572 }else{
573 doc_id = find_doc_id;
574 }
575 }
576 }
577
578 if (!is_external_link){
579 // Create the document node
580 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
581 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
582 doc_list.appendChild(doc);
583
584 if (external_id) {
585 doc_id = translateExternalId(doc_id);
586 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
587 } else if (idNeedsTranslating(doc_id)) {
588 doc_id = translateId(doc_id);
589 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
590 }
591 if (doc_id == null) {
592 continue;
593 }
594 try {
595 Element node_content = getNodeContent(doc_id, lang);
596 doc.appendChild(node_content);
597 } catch (GSException e) {
598 GSXML.addError(this.doc, result, e.getMessage());
599 return result;
600
601 }
602 }else{
603 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
604 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
605 //doc.setAttribute("external_link", doc_id);
606 Element external_link_elem = this.doc.createElement("external");
607 external_link_elem.setAttribute("external_link",doc_id);
608 doc.appendChild(external_link_elem);
609
610 doc_list.appendChild(doc);
611 }
612 }
613 return result;
614 }
615
616 /** create an element to go into the structure. A node element
617 * has the form
618 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
619 */
620 protected Element createDocNode(String node_id) {
621 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
622 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
623
624 String doc_type = null;
625 if (default_document_type != null) {
626 doc_type = default_document_type;
627 } else {
628 doc_type = getDocType(node_id);
629 }
630 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
631 String node_type = getNodeType(node_id, doc_type);
632 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
633 return node;
634 }
635
636 /** adds all the children of doc_id the the doc element,
637 * and if recursive=true, adds all their children as well*/
638 protected void addDescendants(Element doc, String doc_id,
639 boolean recursive)
640 {
641 ArrayList child_ids = getChildrenIds(doc_id);
642 if (child_ids==null) return;
643 for (int i=0; i< child_ids.size(); i++) {
644 String child_id = (String)child_ids.get(i);
645 Element child_elem = createDocNode(child_id);
646 doc.appendChild(child_elem);
647 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
648 addDescendants(child_elem, child_id, recursive);
649 }
650 }
651 }
652
653 /** adds all the siblings of current_id to the parent element.
654 returns the new current element*/
655 protected Element addSiblings(Element parent_node, String parent_id,
656 String current_id) {
657 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
658 if (current_node == null) {
659 // create a sensible error message
660 logger.error(" there should be a first child.");
661 return null;
662 }
663 // remove the current child,- will add it in later in its correct place
664 parent_node.removeChild(current_node);
665
666 // add in all the siblings,
667 addDescendants(parent_node, parent_id, false);
668
669 // find the node that is now the current node
670 // this assumes that the new node that was created is the same as
671 // the old one that was removed - we may want to replace the new one
672 // with the old one.
673 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
674 return new_current;
675 }
676
677 /** returns true if oid ends in
678 .fc (firstchild),
679 .lc (lastchild),
680 .pr (parent),
681 .ns (next sibling),
682 .ps (prev sibling),
683 .rt (root)
684 .ss (specified sibling),
685 false otherwise
686 */
687 protected boolean idNeedsTranslating(String id) {
688 return OID.needsTranslating(id);
689 }
690
691 /** returns the list of sibling ids, including the specified node_id */
692 protected ArrayList getSiblingIds(String node_id) {
693 String parent_id = getParentId(node_id);
694 if (parent_id == null) {
695 return null;
696 }
697 return getChildrenIds(parent_id);
698
699 }
700
701 /** returns the node type of the specified node.
702 should be one of
703 GSXML.NODE_TYPE_LEAF,
704 GSXML.NODE_TYPE_INTERNAL,
705 GSXML.NODE_TYPE_ROOT
706 */
707 protected String getNodeType(String node_id, String doc_type) {
708 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
709 return GSXML.NODE_TYPE_LEAF;
710 }
711
712 if (getParentId(node_id)==null) {
713 return GSXML.NODE_TYPE_ROOT;
714 }
715 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
716 return GSXML.NODE_TYPE_LEAF;
717 }
718 if (getChildrenIds(node_id)==null) {
719 return GSXML.NODE_TYPE_LEAF;
720 }
721 return GSXML.NODE_TYPE_INTERNAL;
722
723 }
724
725 /** if id ends in .fc, .pc etc, then translate it to the correct id
726 * default implementation: just remove the suffix */
727 protected String translateId(String id) {
728 return id.substring(0,id.length());
729 }
730
731 /** if an id is not a greenstone id (an external id) then translate
732 * it to a greenstone one
733 * default implementation: return the id */
734 protected String translateExternalId(String id) {
735 return id;
736 }
737
738 /** returns the document type of the doc that the specified node
739 belongs to. should be one of
740 GSXML.DOC_TYPE_SIMPLE,
741 GSXML.DOC_TYPE_PAGED,
742 GSXML.DOC_TYPE_HIERARCHY
743 default implementation: return DOC_TYPE_SIMPLE
744 */
745 protected String getDocType(String node_id) {
746 return GSXML.DOC_TYPE_SIMPLE;
747 }
748
749
750 /** returns the id of the root node of the document containing
751 * node node_id. may be the same as node_id
752 * default implemntation: return node_id
753 */
754 protected String getRootId(String node_id) {
755 return node_id;
756 }
757 /** returns a list of the child ids in order, null if no children
758 * default implementation: return null */
759 protected ArrayList getChildrenIds(String node_id) {
760 return null;
761 }
762 /** returns the node id of the parent node, null if no parent
763 * default implementation: return null */
764 protected String getParentId(String node_id) {
765 return null;
766 }
767
768 /** get the metadata for the doc node doc_id
769 * returns a metadataList element:
770 * <metadataList><metadata name="xxx">value</metadata></metadataList>
771 */
772 abstract protected Element getMetadataList(String doc_id,
773 boolean all_metadata,
774 ArrayList metadata_names) throws GSException;
775 /** returns the content of a node
776 * should return a nodeContent element:
777 * <nodeContent>text content or other elements</nodeContent>
778 * can return
779 */
780 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
781
782 /** returns the structural information asked for.
783 * info_type may be one of
784 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
785 */
786 abstract protected String getStructureInfo(String doc_id, String info_type);
787
788 protected String getHrefOID(String href_url){
789 return null;
790 }
791
792}
Note: See TracBrowser for help on using the repository browser.