source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 14552

Last change on this file since 14552 was 14552, checked in by qq6, 17 years ago

fixed a bug of checking external link

  • Property svn:keywords set to Author Date Id Revision
File size: 27.3 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24//import org.greenstone.gdbm.*;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GlobalProperties;
31import org.greenstone.gsdl3.util.GSConstants;
32
33// XML classes
34import org.w3c.dom.Document;
35import org.w3c.dom.Element;
36import org.w3c.dom.NodeList;
37
38// General Java classes
39import java.io.File;
40import java.util.StringTokenizer;
41import java.util.Set;
42import java.util.Iterator;
43import java.util.ArrayList;
44
45
46import org.apache.log4j.*;
47
48/** Abstract class for Document Retrieval Services
49 *
50 * @author <a href="mailto:[email protected]">Katherine Don</a>
51 */
52
53public abstract class AbstractDocumentRetrieve
54 extends ServiceRack {
55
56 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
57
58 // the services on offer
59 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
60 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
61 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
62
63 protected static final String STRUCT_PARAM = "structure";
64 protected static final String INFO_PARAM = "info";
65
66 protected static final String STRUCT_ANCESTORS = "ancestors";
67 protected static final String STRUCT_PARENT = "parent";
68 protected static final String STRUCT_SIBS = "siblings";
69 protected static final String STRUCT_CHILDREN = "children";
70 protected static final String STRUCT_DESCENDS = "descendants";
71 protected static final String STRUCT_ENTIRE = "entire";
72
73 protected static final String INFO_NUM_SIBS = "numSiblings";
74 protected static final String INFO_NUM_CHILDREN = "numChildren";
75 protected static final String INFO_SIB_POS = "siblingPosition";
76
77 // means the id is not a greenstone id and needs translating
78 protected static final String EXTID_PARAM = "ext";
79
80 protected Element config_info = null; // the xml from the config file
81
82 protected String default_document_type = null;
83 protected MacroResolver macro_resolver = null;
84
85 /** does this class provide the service?? */
86 protected boolean does_metadata = true;
87 protected boolean does_content = true;
88 protected boolean does_structure = true;
89
90 /** constructor */
91 public AbstractDocumentRetrieve()
92 {
93 }
94
95 /** configure this service */
96 public boolean configure(Element info, Element extra_info)
97 {
98 if (!super.configure(info, extra_info)){
99 return false;
100 }
101
102 logger.info("Configuring AbstractDocumentRetrieve...");
103 this.config_info = info;
104
105 // set up short_service_info_ - for now just has name and type
106 if (does_structure) {
107 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
108 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
109 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
110 this.short_service_info.appendChild(dsr_service);
111 }
112
113 if (does_metadata) {
114 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
115 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
116 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
117 this.short_service_info.appendChild(dmr_service);
118 }
119
120 if (does_content) {
121 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
122 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
123 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
124 this.short_service_info.appendChild(dcr_service);
125 }
126
127 // look for document display format
128 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
129 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
130 if (display_format != null) {
131 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
132 // should we keep a copy?
133 // check for docType option.
134 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
135 if (doc_type_opt != null) {
136 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
137 if (!value.equals("")) {
138 this.default_document_type = value;
139 }
140 }
141 }
142
143 if (macro_resolver != null) {
144 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
145 // set up the macro resolver
146 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
147 if (replacement_elem != null) {
148 macro_resolver.addMacros(replacement_elem);
149 }
150 // look for any refs to global replace lists
151 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
152 for (int i=0; i<replace_refs_elems.getLength(); i++) {
153 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
154 if (!id.equals("")) {
155 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
156 if (replace_list != null) {
157 macro_resolver.addMacros(replace_list);
158 }
159 }
160 }
161 }
162
163 return true;
164 }
165
166 protected Element getServiceDescription(String service_id, String lang, String subset) {
167
168 // these ones are probably never called, but put them here just in case
169 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
170 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
171 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
172 return service_elem;
173 }
174
175 protected Element processDocumentMetadataRetrieve(Element request) {
176
177 // Create a new (empty) result message
178 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
179 String lang = request.getAttribute(GSXML.LANG_ATT);
180 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
181 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
182
183 if (!does_metadata) {
184 // shouldn't get here
185 return result;
186 }
187 // Get the parameters of the request
188 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
189 if (param_list == null) {
190 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
191 return result;
192 }
193
194 boolean external_id = false;
195 // The metadata information required
196 ArrayList metadata_names_list = new ArrayList();
197 boolean all_metadata = false;
198 // Process the request parameters
199 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
200 while (param != null) {
201 // Identify the metadata information desired
202 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
203 String metadata = GSXML.getValue(param);
204 if (metadata.equals("all")) {
205 all_metadata = true;
206 break;
207 }
208 metadata_names_list.add(metadata);
209 } else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM)&& GSXML.getValue(param).equals("1")) {
210 external_id = true;
211 }
212 param = (Element) param.getNextSibling();
213 }
214
215 // check that there has been some metadata specified
216 if (!all_metadata && metadata_names_list.size()==0) {
217 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
218 return result;
219 }
220
221 // Get the documents
222 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
223 if (request_node_list == null) {
224 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
225 return result;
226 }
227
228 // copy the request doc node list to the response
229 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
230 result.appendChild(response_node_list);
231
232 // use the copied list so that we add the metadata into the copy
233 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
234 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
235 if (request_nodes.getLength()==0) {
236 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
237 return result;
238 }
239
240 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
241 for (int i = 0; i < request_nodes.getLength(); i++) {
242 Element request_node = (Element) request_nodes.item(i);
243 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
244
245 boolean is_external_link = false;
246 if (!node_id.startsWith("HASH")){
247 if (node_id.endsWith(".rt")){
248 node_id = getHrefOID(node_id.substring(0,node_id.length()-3));
249 if (node_id!=null){
250 node_id += ".rt";
251 }else{
252 is_external_link = true;
253 }
254 }else{
255 node_id = getHrefOID(node_id);
256 if (node_id==null){
257 is_external_link = true;
258 }
259 }
260 }
261 if (!is_external_link){
262 if (external_id) {
263 // can we have .pr etc extensions with external ids?
264 node_id = translateExternalId(node_id);
265 } else if (idNeedsTranslating(node_id)) {
266 node_id = translateId(node_id);
267 }
268 }
269
270 if (node_id == null) {
271 continue;
272 }
273 if (!is_external_link){
274 try {
275 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
276 request_node.appendChild(metadata_list);
277 } catch (GSException e) {
278 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
279 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM)) {
280 // there is no point trying any others
281 return result;
282 }
283 }
284 }else{
285 request_node.setAttribute("external_link",request_node.getAttribute(GSXML.NODE_ID_ATT));
286 }
287 }
288
289 return result;
290 }
291
292 protected Element processDocumentStructureRetrieve(Element request) {
293
294 // Create a new (empty) result message
295 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
296 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
297 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
298
299 if (!does_structure) {
300 // shouldn't get here
301 return result;
302 }
303
304 String lang = request.getAttribute(GSXML.LANG_ATT);
305
306 // Get the parameters of the request
307 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
308 if (param_list == null) {
309 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
310 return result;
311 }
312
313 // get the documents of the request
314 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
315 if (query_doc_list == null) {
316 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
317 return result;
318 }
319
320 // copy the doc_list to the response
321 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
322 result.appendChild(response_node_list);
323
324 // check that we have some doc nodes specified
325 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
326 if (node_list.getLength()==0) {
327 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
328 return result;
329 }
330
331 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
332 boolean external_id = false;
333 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
334 external_id = true;
335 }
336
337 // the type of info required
338 boolean want_structure = false;
339 boolean want_info = false;
340
341 ArrayList info_types=new ArrayList();
342 // The document structure information desired
343 boolean want_ancestors = false;
344 boolean want_parent = false;
345 boolean want_siblings = false;
346 boolean want_children = false;
347 boolean want_descendants = false;
348
349 boolean want_entire_structure = false;
350 // Process the request parameters
351 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
352 for (int i=0; i<params.getLength();i++) {
353
354 Element param = (Element)params.item(i);
355 String p_name = param.getAttribute(GSXML.NAME_ATT);
356 String p_value = GSXML.getValue(param);
357 // Identify the structure information desired
358 if (p_name.equals(STRUCT_PARAM)) {
359 want_structure = true;
360
361 // This is NOT locale sensitive
362 if (p_value.equals(STRUCT_ANCESTORS))
363 want_ancestors = true;
364 else if (p_value.equals(STRUCT_PARENT))
365 want_parent = true;
366 else if (p_value.equals(STRUCT_SIBS))
367 want_siblings = true;
368 else if (p_value.equals(STRUCT_CHILDREN))
369 want_children = true;
370 else if (p_value.equals(STRUCT_DESCENDS))
371 want_descendants = true;
372 else if (p_value.equals(STRUCT_ENTIRE))
373 want_entire_structure = true;
374 else
375 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
376 } else if (p_name.equals(INFO_PARAM)) {
377 want_info = true;
378 info_types.add(p_value);
379 }
380 }
381
382 // Make sure there is no repeated information
383 if (want_ancestors)
384 want_parent = false;
385 if (want_descendants)
386 want_children = false;
387
388 for (int i=0; i < node_list.getLength(); i++) {
389 Element doc = (Element) node_list.item(i);
390 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
391 String is_external=doc.getAttribute("externalURL");
392
393 boolean is_external_link = false;
394 if (is_external.equals("0")) {is_external_link = true;}
395 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
396 if (doc_id.endsWith(".rt")){
397 doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
398 if (doc_id!=null){
399 doc_id += ".rt";
400 }else{
401 is_external_link = true;
402 }
403 }else{
404 doc_id = getHrefOID(doc_id);
405 if (doc_id==null){ is_external_link = true;}
406 }
407 }
408 if (!is_external_link){
409 if (external_id) {
410 doc_id = translateExternalId(doc_id);
411 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
412 } else if (idNeedsTranslating(doc_id)) {
413 doc_id = translateId(doc_id);
414 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
415 }
416
417 if (doc_id == null) {
418 continue;
419 }
420
421 if (want_info) {
422
423 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
424 doc.appendChild(node_info_elem);
425
426 for (int j=0; j<info_types.size(); j++) {
427 String info_type = (String)info_types.get(j);
428 String info_value = getStructureInfo(doc_id, info_type);
429 if (info_value != null) {
430 Element info_elem = this.doc.createElement("info");
431 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
432 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
433 node_info_elem.appendChild(info_elem);
434 }
435 }
436 }
437
438 if (want_structure) {
439 // all structure info goes into a nodeStructure elem
440 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
441 doc.appendChild(structure_elem);
442
443 if (want_entire_structure) {
444 String root_id = getRootId(doc_id);
445 Element root_node = createDocNode(root_id); //, true, false);
446 addDescendants(root_node, root_id, true);
447 structure_elem.appendChild(root_node);
448 continue; // with the next document, we dont need to do any more here
449 }
450
451 // Add the requested structure information
452 Element base_node = createDocNode(doc_id); //, false, false);
453
454 //Ancestors: continually add parent nodes until the root is reached
455 Element top_node = base_node; // the top node so far
456 if (want_ancestors) {
457 String current_id = doc_id;
458 while (true) {
459 String parent_id = getParentId(current_id);
460 //Element parent = getParent(current_id);
461 if (parent_id == null)
462 break; // no parent
463 Element parent_node = createDocNode(parent_id);
464 parent_node.appendChild(top_node);
465 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
466 top_node = parent_node;
467 }
468 }
469 // Parent: get the parent of the selected node
470 else if (want_parent) {
471 String parent_id = getParentId(doc_id);
472 if (parent_id != null) {
473 Element parent_node = createDocNode(parent_id);
474 parent_node.appendChild(base_node);
475 top_node = parent_node;
476 }
477 }
478
479 // now the top node is the root of the structure
480 structure_elem.appendChild(top_node);
481
482 //Siblings: get the other descendants of the selected node's parent
483 if (want_siblings) {
484 String parent_id = getParentId(doc_id);
485 if (parent_id != null) {
486 // if parent == current id, then we are at the top
487 // and can't get siblings
488 Element parent_node = (Element)base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
489
490 // add siblings, - returns a pointer to the new current node
491 base_node = addSiblings(parent_node, parent_id, doc_id);
492 }
493
494 }
495
496 // Children: get the descendants, but only one level deep
497 if (want_children) {
498 addDescendants(base_node, doc_id, false);
499 }
500 // Descendants: recursively get every descendant
501 else if (want_descendants) {
502 addDescendants(base_node, doc_id, true);
503 }
504 } // if want structure
505
506 }else{
507 Element external_link_elem = this.doc.createElement("external");
508 external_link_elem.setAttribute("external_link",doc.getAttribute(GSXML.NODE_ID_ATT));
509 doc.appendChild(external_link_elem);
510 }// if is_external_link
511 } // for each doc
512 return result;
513 }
514
515 /** Retrieve the content of a document */
516 protected Element processDocumentContentRetrieve(Element request)
517 {
518 // Create a new (empty) result message
519 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
520 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
521 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
522
523 if (!does_content) {
524 // shouldn't get here
525 return result;
526 }
527
528 // Get the parameters of the request
529 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
530 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
531 boolean external_id = false;
532 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
533 external_id = true;
534 }
535 // Get the request content
536 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
537 if (query_doc_list == null) {
538 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
539 return result;
540 }
541
542 String lang = request.getAttribute(GSXML.LANG_ATT);
543 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
544 result.appendChild(doc_list);
545
546 // set up the retrieval??
547
548 // Get the documents
549 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
550 GSXML.NODE_ID_ATT);
551 String[] is_externals=GSXML.getAttributeValuesFromList(query_doc_list,"externalURL");
552
553 for (int i = 0; i < doc_ids.length; i++) {
554 String doc_id = doc_ids[i];
555 String is_external=is_externals[i];
556
557 boolean is_external_link=false;
558 if (is_external.equals("0")){is_external_link = true;}
559 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
560 //if (!doc_id.startsWith("HASH")){
561 if (doc_id.endsWith(".rt")){
562 String find_doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
563 if (find_doc_id!=null){
564 doc_id = doc_id + ".rt";
565 }else{
566 is_external_link=true;
567 }
568
569 }else {
570 String find_doc_id = getHrefOID(doc_id);
571 if (find_doc_id==null){
572 is_external_link=true;
573 }else{
574 doc_id = find_doc_id;
575 }
576 }
577 }
578
579 if (!is_external_link){
580 // Create the document node
581 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
582 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
583 doc_list.appendChild(doc);
584
585 if (external_id) {
586 doc_id = translateExternalId(doc_id);
587 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
588 } else if (idNeedsTranslating(doc_id)) {
589 doc_id = translateId(doc_id);
590 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
591 }
592 if (doc_id == null) {
593 continue;
594 }
595 try {
596 Element node_content = getNodeContent(doc_id, lang);
597 doc.appendChild(node_content);
598 } catch (GSException e) {
599 GSXML.addError(this.doc, result, e.getMessage());
600 return result;
601
602 }
603 }else{
604 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
605 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
606 //doc.setAttribute("external_link", doc_id);
607 Element external_link_elem = this.doc.createElement("external");
608 external_link_elem.setAttribute("external_link",doc_id);
609 doc.appendChild(external_link_elem);
610
611 doc_list.appendChild(doc);
612 }
613 }
614 return result;
615 }
616
617 /** create an element to go into the structure. A node element
618 * has the form
619 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
620 */
621 protected Element createDocNode(String node_id) {
622 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
623 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
624
625 String doc_type = null;
626 if (default_document_type != null) {
627 doc_type = default_document_type;
628 } else {
629 doc_type = getDocType(node_id);
630 }
631 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
632 String node_type = getNodeType(node_id, doc_type);
633 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
634 return node;
635 }
636
637 /** adds all the children of doc_id the the doc element,
638 * and if recursive=true, adds all their children as well*/
639 protected void addDescendants(Element doc, String doc_id,
640 boolean recursive)
641 {
642 ArrayList child_ids = getChildrenIds(doc_id);
643 if (child_ids==null) return;
644 for (int i=0; i< child_ids.size(); i++) {
645 String child_id = (String)child_ids.get(i);
646 Element child_elem = createDocNode(child_id);
647 doc.appendChild(child_elem);
648 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
649 addDescendants(child_elem, child_id, recursive);
650 }
651 }
652 }
653
654 /** adds all the siblings of current_id to the parent element.
655 returns the new current element*/
656 protected Element addSiblings(Element parent_node, String parent_id,
657 String current_id) {
658 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
659 if (current_node == null) {
660 // create a sensible error message
661 logger.error(" there should be a first child.");
662 return null;
663 }
664 // remove the current child,- will add it in later in its correct place
665 parent_node.removeChild(current_node);
666
667 // add in all the siblings,
668 addDescendants(parent_node, parent_id, false);
669
670 // find the node that is now the current node
671 // this assumes that the new node that was created is the same as
672 // the old one that was removed - we may want to replace the new one
673 // with the old one.
674 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
675 return new_current;
676 }
677
678 /** returns true if oid ends in
679 .fc (firstchild),
680 .lc (lastchild),
681 .pr (parent),
682 .ns (next sibling),
683 .ps (prev sibling),
684 .rt (root)
685 .ss (specified sibling),
686 false otherwise
687 */
688 protected boolean idNeedsTranslating(String id) {
689 return OID.needsTranslating(id);
690 }
691
692 /** returns the list of sibling ids, including the specified node_id */
693 protected ArrayList getSiblingIds(String node_id) {
694 String parent_id = getParentId(node_id);
695 if (parent_id == null) {
696 return null;
697 }
698 return getChildrenIds(parent_id);
699
700 }
701
702 /** returns the node type of the specified node.
703 should be one of
704 GSXML.NODE_TYPE_LEAF,
705 GSXML.NODE_TYPE_INTERNAL,
706 GSXML.NODE_TYPE_ROOT
707 */
708 protected String getNodeType(String node_id, String doc_type) {
709 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
710 return GSXML.NODE_TYPE_LEAF;
711 }
712
713 if (getParentId(node_id)==null) {
714 return GSXML.NODE_TYPE_ROOT;
715 }
716 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
717 return GSXML.NODE_TYPE_LEAF;
718 }
719 if (getChildrenIds(node_id)==null) {
720 return GSXML.NODE_TYPE_LEAF;
721 }
722 return GSXML.NODE_TYPE_INTERNAL;
723
724 }
725
726 /** if id ends in .fc, .pc etc, then translate it to the correct id
727 * default implementation: just remove the suffix */
728 protected String translateId(String id) {
729 return id.substring(0,id.length());
730 }
731
732 /** if an id is not a greenstone id (an external id) then translate
733 * it to a greenstone one
734 * default implementation: return the id */
735 protected String translateExternalId(String id) {
736 return id;
737 }
738
739 /** returns the document type of the doc that the specified node
740 belongs to. should be one of
741 GSXML.DOC_TYPE_SIMPLE,
742 GSXML.DOC_TYPE_PAGED,
743 GSXML.DOC_TYPE_HIERARCHY
744 default implementation: return DOC_TYPE_SIMPLE
745 */
746 protected String getDocType(String node_id) {
747 return GSXML.DOC_TYPE_SIMPLE;
748 }
749
750
751 /** returns the id of the root node of the document containing
752 * node node_id. may be the same as node_id
753 * default implemntation: return node_id
754 */
755 protected String getRootId(String node_id) {
756 return node_id;
757 }
758 /** returns a list of the child ids in order, null if no children
759 * default implementation: return null */
760 protected ArrayList getChildrenIds(String node_id) {
761 return null;
762 }
763 /** returns the node id of the parent node, null if no parent
764 * default implementation: return null */
765 protected String getParentId(String node_id) {
766 return null;
767 }
768
769 /** get the metadata for the doc node doc_id
770 * returns a metadataList element:
771 * <metadataList><metadata name="xxx">value</metadata></metadataList>
772 */
773 abstract protected Element getMetadataList(String doc_id,
774 boolean all_metadata,
775 ArrayList metadata_names) throws GSException;
776 /** returns the content of a node
777 * should return a nodeContent element:
778 * <nodeContent>text content or other elements</nodeContent>
779 * can return
780 */
781 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
782
783 /** returns the structural information asked for.
784 * info_type may be one of
785 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
786 */
787 abstract protected String getStructureInfo(String doc_id, String info_type);
788
789 protected String getHrefOID(String href_url){
790 return null;
791 }
792
793}
Note: See TracBrowser for help on using the repository browser.