source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 14529

Last change on this file since 14529 was 14529, checked in by qq6, 17 years ago

processing the external links

  • Property svn:keywords set to Author Date Id Revision
File size: 27.2 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24//import org.greenstone.gdbm.*;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GlobalProperties;
31import org.greenstone.gsdl3.util.GSConstants;
32
33// XML classes
34import org.w3c.dom.Document;
35import org.w3c.dom.Element;
36import org.w3c.dom.NodeList;
37
38// General Java classes
39import java.io.File;
40import java.util.StringTokenizer;
41import java.util.Set;
42import java.util.Iterator;
43import java.util.ArrayList;
44
45
46import org.apache.log4j.*;
47
48/** Abstract class for Document Retrieval Services
49 *
50 * @author <a href="mailto:[email protected]">Katherine Don</a>
51 */
52
53public abstract class AbstractDocumentRetrieve
54 extends ServiceRack {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
57
58 // the services on offer
59 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
60 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
61 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
62
63 protected static final String STRUCT_PARAM = "structure";
64 protected static final String INFO_PARAM = "info";
65
66 protected static final String STRUCT_ANCESTORS = "ancestors";
67 protected static final String STRUCT_PARENT = "parent";
68 protected static final String STRUCT_SIBS = "siblings";
69 protected static final String STRUCT_CHILDREN = "children";
70 protected static final String STRUCT_DESCENDS = "descendants";
71 protected static final String STRUCT_ENTIRE = "entire";
72
73 protected static final String INFO_NUM_SIBS = "numSiblings";
74 protected static final String INFO_NUM_CHILDREN = "numChildren";
75 protected static final String INFO_SIB_POS = "siblingPosition";
76
77 // means the id is not a greenstone id and needs translating
78 protected static final String EXTID_PARAM = "ext";
79
80 protected Element config_info = null; // the xml from the config file
81
82 protected String default_document_type = null;
83 protected MacroResolver macro_resolver = null;
84
85 /** does this class provide the service?? */
86 protected boolean does_metadata = true;
87 protected boolean does_content = true;
88 protected boolean does_structure = true;
89
90 /** constructor */
91 public AbstractDocumentRetrieve()
92 {
93 }
94
95 /** configure this service */
96 public boolean configure(Element info, Element extra_info)
97 {
98 if (!super.configure(info, extra_info)){
99 return false;
100 }
101
102 logger.info("Configuring AbstractDocumentRetrieve...");
103 this.config_info = info;
104
105 // set up short_service_info_ - for now just has name and type
106 if (does_structure) {
107 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
108 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
109 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
110 this.short_service_info.appendChild(dsr_service);
111 }
112
113 if (does_metadata) {
114 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
115 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
116 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
117 this.short_service_info.appendChild(dmr_service);
118 }
119
120 if (does_content) {
121 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
122 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
123 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
124 this.short_service_info.appendChild(dcr_service);
125 }
126
127 // look for document display format
128 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
129 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
130 if (display_format != null) {
131 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
132 // should we keep a copy?
133 // check for docType option.
134 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
135 if (doc_type_opt != null) {
136 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
137 if (!value.equals("")) {
138 this.default_document_type = value;
139 }
140 }
141 }
142
143 if (macro_resolver != null) {
144 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
145 // set up the macro resolver
146 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
147 if (replacement_elem != null) {
148 macro_resolver.addMacros(replacement_elem);
149 }
150 // look for any refs to global replace lists
151 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
152 for (int i=0; i<replace_refs_elems.getLength(); i++) {
153 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
154 if (!id.equals("")) {
155 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
156 if (replace_list != null) {
157 macro_resolver.addMacros(replace_list);
158 }
159 }
160 }
161 }
162
163 return true;
164 }
165
166 protected Element getServiceDescription(String service_id, String lang, String subset) {
167
168 // these ones are probably never called, but put them here just in case
169 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
170 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
171 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
172 return service_elem;
173 }
174
175 protected Element processDocumentMetadataRetrieve(Element request) {
176
177 // Create a new (empty) result message
178 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
179 String lang = request.getAttribute(GSXML.LANG_ATT);
180 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
181 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
182
183 if (!does_metadata) {
184 // shouldn't get here
185 return result;
186 }
187 // Get the parameters of the request
188 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
189 if (param_list == null) {
190 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
191 return result;
192 }
193
194 boolean external_id = false;
195 // The metadata information required
196 ArrayList metadata_names_list = new ArrayList();
197 boolean all_metadata = false;
198 // Process the request parameters
199 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
200 while (param != null) {
201 // Identify the metadata information desired
202 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
203 String metadata = GSXML.getValue(param);
204 if (metadata.equals("all")) {
205 all_metadata = true;
206 break;
207 }
208 metadata_names_list.add(metadata);
209 } else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM)&& GSXML.getValue(param).equals("1")) {
210 external_id = true;
211 }
212 param = (Element) param.getNextSibling();
213 }
214
215 // check that there has been some metadata specified
216 if (!all_metadata && metadata_names_list.size()==0) {
217 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
218 return result;
219 }
220
221 // Get the documents
222 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
223 if (request_node_list == null) {
224 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
225 return result;
226 }
227
228 // copy the request doc node list to the response
229 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
230 result.appendChild(response_node_list);
231
232 // use the copied list so that we add the metadata into the copy
233 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
234 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
235 if (request_nodes.getLength()==0) {
236 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
237 return result;
238 }
239
240 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
241 for (int i = 0; i < request_nodes.getLength(); i++) {
242 Element request_node = (Element) request_nodes.item(i);
243 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
244
245 boolean is_external_link = false;
246 if (!node_id.startsWith("HASH")){
247 if (node_id.endsWith(".rt")){
248 node_id = getHrefOID(node_id.substring(0,node_id.length()-3));
249 if (node_id!=null){
250 node_id += ".rt";
251 }else{
252 is_external_link = true;
253 }
254 }else{
255 node_id = getHrefOID(node_id);
256 if (node_id==null){
257 is_external_link = true;
258 }
259 }
260 }
261 if (!is_external_link){
262 if (external_id) {
263 // can we have .pr etc extensions with external ids?
264 node_id = translateExternalId(node_id);
265 } else if (idNeedsTranslating(node_id)) {
266 node_id = translateId(node_id);
267 }
268 }
269
270 if (node_id == null) {
271 continue;
272 }
273 if (!is_external_link){
274 try {
275 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
276 request_node.appendChild(metadata_list);
277 } catch (GSException e) {
278 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
279 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM)) {
280 // there is no point trying any others
281 return result;
282 }
283 }
284 }else{
285 request_node.setAttribute("external_link",request_node.getAttribute(GSXML.NODE_ID_ATT));
286 }
287 }
288
289 return result;
290 }
291
292 protected Element processDocumentStructureRetrieve(Element request) {
293
294 // Create a new (empty) result message
295 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
296 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
297 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
298
299 if (!does_structure) {
300 // shouldn't get here
301 return result;
302 }
303
304 String lang = request.getAttribute(GSXML.LANG_ATT);
305
306 // Get the parameters of the request
307 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
308 if (param_list == null) {
309 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
310 return result;
311 }
312
313 // get the documents of the request
314 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
315 if (query_doc_list == null) {
316 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
317 return result;
318 }
319
320 // copy the doc_list to the response
321 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
322 result.appendChild(response_node_list);
323
324 // check that we have some doc nodes specified
325 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
326 if (node_list.getLength()==0) {
327 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
328 return result;
329 }
330
331 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
332 boolean external_id = false;
333 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
334 external_id = true;
335 }
336
337 // the type of info required
338 boolean want_structure = false;
339 boolean want_info = false;
340
341 ArrayList info_types=new ArrayList();
342 // The document structure information desired
343 boolean want_ancestors = false;
344 boolean want_parent = false;
345 boolean want_siblings = false;
346 boolean want_children = false;
347 boolean want_descendants = false;
348
349 boolean want_entire_structure = false;
350 // Process the request parameters
351 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
352 for (int i=0; i<params.getLength();i++) {
353
354 Element param = (Element)params.item(i);
355 String p_name = param.getAttribute(GSXML.NAME_ATT);
356 String p_value = GSXML.getValue(param);
357 // Identify the structure information desired
358 if (p_name.equals(STRUCT_PARAM)) {
359 want_structure = true;
360
361 // This is NOT locale sensitive
362 if (p_value.equals(STRUCT_ANCESTORS))
363 want_ancestors = true;
364 else if (p_value.equals(STRUCT_PARENT))
365 want_parent = true;
366 else if (p_value.equals(STRUCT_SIBS))
367 want_siblings = true;
368 else if (p_value.equals(STRUCT_CHILDREN))
369 want_children = true;
370 else if (p_value.equals(STRUCT_DESCENDS))
371 want_descendants = true;
372 else if (p_value.equals(STRUCT_ENTIRE))
373 want_entire_structure = true;
374 else
375 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
376 } else if (p_name.equals(INFO_PARAM)) {
377 want_info = true;
378 info_types.add(p_value);
379 }
380 }
381
382 // Make sure there is no repeated information
383 if (want_ancestors)
384 want_parent = false;
385 if (want_descendants)
386 want_children = false;
387
388 for (int i=0; i < node_list.getLength(); i++) {
389 Element doc = (Element) node_list.item(i);
390 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
391 String is_external=doc.getAttribute("externalURL");
392
393 boolean is_external_link = false;
394 if (is_external.equals("0")) {is_external_link = true;}
395 if (!doc_id.startsWith("HASH") && !is_external_link){
396 if (doc_id.endsWith(".rt")){
397 doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
398 if (doc_id!=null){
399 doc_id += ".rt";
400 // }else{
401 // is_external_link = true;
402 // }
403 }else{
404 doc_id = getHrefOID(doc_id);
405 // if (doc_id==null){
406 // is_external_link = true;
407 }
408 }
409 }
410
411 if (!is_external_link){
412 if (external_id) {
413 doc_id = translateExternalId(doc_id);
414 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
415 } else if (idNeedsTranslating(doc_id)) {
416 doc_id = translateId(doc_id);
417 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
418 }
419
420 if (doc_id == null) {
421 continue;
422 }
423
424 if (want_info) {
425
426 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
427 doc.appendChild(node_info_elem);
428
429 for (int j=0; j<info_types.size(); j++) {
430 String info_type = (String)info_types.get(j);
431 String info_value = getStructureInfo(doc_id, info_type);
432 if (info_value != null) {
433 Element info_elem = this.doc.createElement("info");
434 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
435 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
436 node_info_elem.appendChild(info_elem);
437 }
438 }
439 }
440
441 if (want_structure) {
442 // all structure info goes into a nodeStructure elem
443 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
444 doc.appendChild(structure_elem);
445
446 if (want_entire_structure) {
447 String root_id = getRootId(doc_id);
448 Element root_node = createDocNode(root_id); //, true, false);
449 addDescendants(root_node, root_id, true);
450 structure_elem.appendChild(root_node);
451 continue; // with the next document, we dont need to do any more here
452 }
453
454 // Add the requested structure information
455 Element base_node = createDocNode(doc_id); //, false, false);
456
457 //Ancestors: continually add parent nodes until the root is reached
458 Element top_node = base_node; // the top node so far
459 if (want_ancestors) {
460 String current_id = doc_id;
461 while (true) {
462 String parent_id = getParentId(current_id);
463 //Element parent = getParent(current_id);
464 if (parent_id == null)
465 break; // no parent
466 Element parent_node = createDocNode(parent_id);
467 parent_node.appendChild(top_node);
468 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
469 top_node = parent_node;
470 }
471 }
472 // Parent: get the parent of the selected node
473 else if (want_parent) {
474 String parent_id = getParentId(doc_id);
475 if (parent_id != null) {
476 Element parent_node = createDocNode(parent_id);
477 parent_node.appendChild(base_node);
478 top_node = parent_node;
479 }
480 }
481
482 // now the top node is the root of the structure
483 structure_elem.appendChild(top_node);
484
485 //Siblings: get the other descendants of the selected node's parent
486 if (want_siblings) {
487 String parent_id = getParentId(doc_id);
488 if (parent_id != null) {
489 // if parent == current id, then we are at the top
490 // and can't get siblings
491 Element parent_node = (Element)base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
492
493 // add siblings, - returns a pointer to the new current node
494 base_node = addSiblings(parent_node, parent_id, doc_id);
495 }
496
497 }
498
499 // Children: get the descendants, but only one level deep
500 if (want_children) {
501 addDescendants(base_node, doc_id, false);
502 }
503 // Descendants: recursively get every descendant
504 else if (want_descendants) {
505 addDescendants(base_node, doc_id, true);
506 }
507 } // if want structure
508
509 }else{
510 Element external_link_elem = this.doc.createElement("external");
511 external_link_elem.setAttribute("external_link",doc.getAttribute(GSXML.NODE_ID_ATT));
512 doc.appendChild(external_link_elem);
513 }// if is_external_link
514 } // for each doc
515 return result;
516 }
517
518 /** Retrieve the content of a document */
519 protected Element processDocumentContentRetrieve(Element request)
520 {
521 // Create a new (empty) result message
522 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
523 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
524 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
525
526 if (!does_content) {
527 // shouldn't get here
528 return result;
529 }
530
531 // Get the parameters of the request
532 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
533 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
534 boolean external_id = false;
535 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
536 external_id = true;
537 }
538 // Get the request content
539 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
540 if (query_doc_list == null) {
541 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
542 return result;
543 }
544
545 String lang = request.getAttribute(GSXML.LANG_ATT);
546 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
547 result.appendChild(doc_list);
548
549 // set up the retrieval??
550
551 // Get the documents
552 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
553 GSXML.NODE_ID_ATT);
554 String[] is_externals=GSXML.getAttributeValuesFromList(query_doc_list,"externalURL");
555
556 for (int i = 0; i < doc_ids.length; i++) {
557 String doc_id = doc_ids[i];
558 String is_external=is_externals[i];
559 boolean is_external_link=false;
560 if (is_external.equals("0")){is_external_link = true;}
561 if (!doc_id.startsWith("HASH") && !is_external_link){
562 if (doc_id.endsWith(".rt")){
563 String find_doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
564 if (find_doc_id!=null){
565 doc_id = doc_id + ".rt";
566 //}else{
567 //is_external_link=true;
568 }
569 }else {
570 String find_doc_id = getHrefOID(doc_id);
571 if (find_doc_id!=null){
572 doc_id = find_doc_id;
573 //}else{
574 //is_external_link=true;
575 }
576 }
577 }
578
579 if (!is_external_link){
580 // Create the document node
581 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
582 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
583 doc_list.appendChild(doc);
584
585 if (external_id) {
586 doc_id = translateExternalId(doc_id);
587 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
588 } else if (idNeedsTranslating(doc_id)) {
589 doc_id = translateId(doc_id);
590 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
591 }
592 if (doc_id == null) {
593 continue;
594 }
595 try {
596 Element node_content = getNodeContent(doc_id, lang);
597 doc.appendChild(node_content);
598 } catch (GSException e) {
599 GSXML.addError(this.doc, result, e.getMessage());
600 return result;
601
602 }
603 }else{
604 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
605 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
606 //doc.setAttribute("external_link", doc_id);
607 Element external_link_elem = this.doc.createElement("external");
608 external_link_elem.setAttribute("external_link",doc_id);
609 doc.appendChild(external_link_elem);
610
611 doc_list.appendChild(doc);
612 }
613 }
614 return result;
615 }
616
617 /** create an element to go into the structure. A node element
618 * has the form
619 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
620 */
621 protected Element createDocNode(String node_id) {
622 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
623 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
624
625 String doc_type = null;
626 if (default_document_type != null) {
627 doc_type = default_document_type;
628 } else {
629 doc_type = getDocType(node_id);
630 }
631 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
632 String node_type = getNodeType(node_id, doc_type);
633 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
634 return node;
635 }
636
637 /** adds all the children of doc_id the the doc element,
638 * and if recursive=true, adds all their children as well*/
639 protected void addDescendants(Element doc, String doc_id,
640 boolean recursive)
641 {
642 ArrayList child_ids = getChildrenIds(doc_id);
643 if (child_ids==null) return;
644 for (int i=0; i< child_ids.size(); i++) {
645 String child_id = (String)child_ids.get(i);
646 Element child_elem = createDocNode(child_id);
647 doc.appendChild(child_elem);
648 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
649 addDescendants(child_elem, child_id, recursive);
650 }
651 }
652 }
653
654 /** adds all the siblings of current_id to the parent element.
655 returns the new current element*/
656 protected Element addSiblings(Element parent_node, String parent_id,
657 String current_id) {
658 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
659 if (current_node == null) {
660 // create a sensible error message
661 logger.error(" there should be a first child.");
662 return null;
663 }
664 // remove the current child,- will add it in later in its correct place
665 parent_node.removeChild(current_node);
666
667 // add in all the siblings,
668 addDescendants(parent_node, parent_id, false);
669
670 // find the node that is now the current node
671 // this assumes that the new node that was created is the same as
672 // the old one that was removed - we may want to replace the new one
673 // with the old one.
674 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
675 return new_current;
676 }
677
678 /** returns true if oid ends in
679 .fc (firstchild),
680 .lc (lastchild),
681 .pr (parent),
682 .ns (next sibling),
683 .ps (prev sibling),
684 .rt (root)
685 .ss (specified sibling),
686 false otherwise
687 */
688 protected boolean idNeedsTranslating(String id) {
689 return OID.needsTranslating(id);
690 }
691
692 /** returns the list of sibling ids, including the specified node_id */
693 protected ArrayList getSiblingIds(String node_id) {
694 String parent_id = getParentId(node_id);
695 if (parent_id == null) {
696 return null;
697 }
698 return getChildrenIds(parent_id);
699
700 }
701
702 /** returns the node type of the specified node.
703 should be one of
704 GSXML.NODE_TYPE_LEAF,
705 GSXML.NODE_TYPE_INTERNAL,
706 GSXML.NODE_TYPE_ROOT
707 */
708 protected String getNodeType(String node_id, String doc_type) {
709 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
710 return GSXML.NODE_TYPE_LEAF;
711 }
712
713 if (getParentId(node_id)==null) {
714 return GSXML.NODE_TYPE_ROOT;
715 }
716 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
717 return GSXML.NODE_TYPE_LEAF;
718 }
719 if (getChildrenIds(node_id)==null) {
720 return GSXML.NODE_TYPE_LEAF;
721 }
722 return GSXML.NODE_TYPE_INTERNAL;
723
724 }
725
726 /** if id ends in .fc, .pc etc, then translate it to the correct id
727 * default implementation: just remove the suffix */
728 protected String translateId(String id) {
729 return id.substring(0,id.length());
730 }
731
732 /** if an id is not a greenstone id (an external id) then translate
733 * it to a greenstone one
734 * default implementation: return the id */
735 protected String translateExternalId(String id) {
736 return id;
737 }
738
739 /** returns the document type of the doc that the specified node
740 belongs to. should be one of
741 GSXML.DOC_TYPE_SIMPLE,
742 GSXML.DOC_TYPE_PAGED,
743 GSXML.DOC_TYPE_HIERARCHY
744 default implementation: return DOC_TYPE_SIMPLE
745 */
746 protected String getDocType(String node_id) {
747 return GSXML.DOC_TYPE_SIMPLE;
748 }
749
750
751 /** returns the id of the root node of the document containing
752 * node node_id. may be the same as node_id
753 * default implemntation: return node_id
754 */
755 protected String getRootId(String node_id) {
756 return node_id;
757 }
758 /** returns a list of the child ids in order, null if no children
759 * default implementation: return null */
760 protected ArrayList getChildrenIds(String node_id) {
761 return null;
762 }
763 /** returns the node id of the parent node, null if no parent
764 * default implementation: return null */
765 protected String getParentId(String node_id) {
766 return null;
767 }
768
769 /** get the metadata for the doc node doc_id
770 * returns a metadataList element:
771 * <metadataList><metadata name="xxx">value</metadata></metadataList>
772 */
773 abstract protected Element getMetadataList(String doc_id,
774 boolean all_metadata,
775 ArrayList metadata_names) throws GSException;
776 /** returns the content of a node
777 * should return a nodeContent element:
778 * <nodeContent>text content or other elements</nodeContent>
779 * can return
780 */
781 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
782
783 /** returns the structural information asked for.
784 * info_type may be one of
785 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
786 */
787 abstract protected String getStructureInfo(String doc_id, String info_type);
788
789 protected String getHrefOID(String href_url){
790 return null;
791 }
792
793}
Note: See TracBrowser for help on using the repository browser.