source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 24334

Last change on this file since 24334 was 24334, checked in by ak19, 13 years ago

Correction to the commits made for ticket 770 which had introduced a bug whereby titles and other metadata were no longer being retrieved for normal search and browse operations (even though the changes for the commits got the special DSpace browse by titles feature to work).

  • Property svn:keywords set to Author Date Id Revision
File size: 28.1 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24import org.greenstone.util.GlobalProperties;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GSConstants;
31
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44
45import org.apache.log4j.*;
46
47/** Abstract class for Document Retrieval Services
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 */
51
52public abstract class AbstractDocumentRetrieve
53 extends ServiceRack {
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
56
57 // the services on offer
58 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
59 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
60 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
61
62 protected static final String STRUCT_PARAM = "structure";
63 protected static final String INFO_PARAM = "info";
64
65 protected static final String STRUCT_ANCESTORS = "ancestors";
66 protected static final String STRUCT_PARENT = "parent";
67 protected static final String STRUCT_SIBS = "siblings";
68 protected static final String STRUCT_CHILDREN = "children";
69 protected static final String STRUCT_DESCENDS = "descendants";
70 protected static final String STRUCT_ENTIRE = "entire";
71
72 protected static final String INFO_NUM_SIBS = "numSiblings";
73 protected static final String INFO_NUM_CHILDREN = "numChildren";
74 protected static final String INFO_SIB_POS = "siblingPosition";
75
76 // means the id is not a greenstone id and needs translating
77 protected static final String EXTID_PARAM = "ext";
78
79 protected Element config_info = null; // the xml from the config file
80
81 protected String default_document_type = null;
82 protected MacroResolver macro_resolver = null;
83
84 /** does this class provide the service?? */
85 protected boolean does_metadata = true;
86 protected boolean does_content = true;
87 protected boolean does_structure = true;
88
89 /** constructor */
90 public AbstractDocumentRetrieve()
91 {
92 }
93
94 /** configure this service */
95 public boolean configure(Element info, Element extra_info)
96 {
97 if (!super.configure(info, extra_info)){
98 return false;
99 }
100
101 logger.info("Configuring AbstractDocumentRetrieve...");
102 this.config_info = info;
103
104 // set up short_service_info_ - for now just has name and type
105 if (does_structure) {
106 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
107 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
108 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
109 this.short_service_info.appendChild(dsr_service);
110 }
111
112 if (does_metadata) {
113 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
114 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
115 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
116 this.short_service_info.appendChild(dmr_service);
117 }
118
119 if (does_content) {
120 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
121 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
122 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
123 this.short_service_info.appendChild(dcr_service);
124 }
125
126 // look for document display format
127 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
128 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
129 if (display_format != null) {
130 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
131 // should we keep a copy?
132 // check for docType option.
133 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
134 if (doc_type_opt != null) {
135 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
136 if (!value.equals("")) {
137 this.default_document_type = value;
138 }
139 }
140 }
141
142 if (macro_resolver != null) {
143 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
144 // set up the macro resolver
145 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
146 if (replacement_elem != null) {
147 macro_resolver.addMacros(replacement_elem);
148 }
149 // look for any refs to global replace lists
150 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
151 for (int i=0; i<replace_refs_elems.getLength(); i++) {
152 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
153 if (!id.equals("")) {
154 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
155 if (replace_list != null) {
156 macro_resolver.addMacros(replace_list);
157 }
158 }
159 }
160 }
161
162 return true;
163 }
164
165 protected Element getServiceDescription(String service_id, String lang, String subset) {
166
167 // these ones are probably never called, but put them here just in case
168 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
169 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
170 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
171 return service_elem;
172 }
173
174 protected Element processDocumentMetadataRetrieve(Element request) {
175
176 // Create a new (empty) result message
177 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
178 String lang = request.getAttribute(GSXML.LANG_ATT);
179 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
180 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
181
182 if (!does_metadata) {
183 // shouldn't get here
184 return result;
185 }
186 // Get the parameters of the request
187 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
188 if (param_list == null) {
189 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
190 return result;
191 }
192
193 boolean external_id = false;
194 // The metadata information required
195 ArrayList metadata_names_list = new ArrayList();
196 boolean all_metadata = false;
197 // Process the request parameters
198 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
199 while (param != null) {
200 // Identify the metadata information desired
201 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
202 String metadata = GSXML.getValue(param);
203 if (metadata.equals("all")) {
204 all_metadata = true;
205 break;
206 }
207 metadata_names_list.add(metadata);
208 } else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM)&& GSXML.getValue(param).equals("1")) {
209 external_id = true;
210 }
211 param = (Element) param.getNextSibling();
212 }
213
214 // check that there has been some metadata specified
215 if (!all_metadata && metadata_names_list.size()==0) {
216 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
217 return result;
218 }
219
220 // Get the documents
221 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
222 if (request_node_list == null) {
223 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
224 return result;
225 }
226
227 // copy the request doc node list to the response
228 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
229 result.appendChild(response_node_list);
230
231 // use the copied list so that we add the metadata into the copy
232 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
233 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
234 if (request_nodes.getLength()==0) {
235 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
236 return result;
237 }
238
239 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
240 for (int i = 0; i < request_nodes.getLength(); i++) {
241 Element request_node = (Element) request_nodes.item(i);
242 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
243
244 // make a custom copy of metadata_names_list for each docID, since mdoffset value varies for each doc
245 ArrayList customised_metadata_names_list = new ArrayList(metadata_names_list.size());
246 int mdoffset = 0;
247 if(request_node.hasAttribute(GSXML.NODE_MDOFFSET_ATT)) {
248 String offset = request_node.getAttribute(GSXML.NODE_MDOFFSET_ATT);
249 mdoffset = Integer.parseInt(offset);
250 }
251 for(int x = 0; x < metadata_names_list.size(); x++) {
252 String metaname = (String)metadata_names_list.get(x);
253 if(metaname.indexOf("offset" + GSConstants.META_RELATION_SEP) != -1) {
254 // append offset number to the metaname
255 metaname = metaname.replace("offset"+GSConstants.META_RELATION_SEP, "offset"+mdoffset+GSConstants.META_RELATION_SEP);
256 }
257 customised_metadata_names_list.add(x, metaname);
258 }
259
260
261 boolean is_external_link = false;
262 if (!node_id.startsWith("HASH") && !node_id.startsWith("D")){
263 if (node_id.endsWith(".rt")){
264 node_id = getHrefOID(node_id.substring(0,node_id.length()-3));
265 if (node_id!=null){
266 node_id += ".rt";
267 }else{
268 is_external_link = true;
269 }
270 }else{
271 node_id = getHrefOID(node_id);
272 if (node_id==null){
273 is_external_link = true;
274 }
275 }
276 }
277 if (!is_external_link){
278 if (external_id) {
279 // can we have .pr etc extensions with external ids?
280 node_id = translateExternalId(node_id);
281 } else if (idNeedsTranslating(node_id)) {
282 node_id = translateId(node_id);
283 }
284 }
285
286 if (node_id == null) {
287 continue;
288 }
289 if (!is_external_link){
290 try {
291 Element metadata_list = getMetadataList(node_id, all_metadata, customised_metadata_names_list);
292 request_node.appendChild(metadata_list);
293 } catch (GSException e) {
294 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
295 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM)) {
296 // there is no point trying any others
297 return result;
298 }
299 }
300 }else{
301 request_node.setAttribute("external_link",request_node.getAttribute(GSXML.NODE_ID_ATT));
302 }
303 }
304 return result;
305 }
306
307 protected Element processDocumentStructureRetrieve(Element request) {
308
309 // Create a new (empty) result message
310 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
311 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
312 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
313
314 if (!does_structure) {
315 // shouldn't get here
316 return result;
317 }
318
319 String lang = request.getAttribute(GSXML.LANG_ATT);
320
321 // Get the parameters of the request
322 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
323 if (param_list == null) {
324 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
325 return result;
326 }
327
328 // get the documents of the request
329 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
330 if (query_doc_list == null) {
331 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
332 return result;
333 }
334
335 // copy the doc_list to the response
336 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
337 result.appendChild(response_node_list);
338
339 // check that we have some doc nodes specified
340 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
341 if (node_list.getLength()==0) {
342 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
343 return result;
344 }
345
346 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
347 boolean external_id = false;
348 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
349 external_id = true;
350 }
351
352 // the type of info required
353 boolean want_structure = false;
354 boolean want_info = false;
355
356 ArrayList info_types=new ArrayList();
357 // The document structure information desired
358 boolean want_ancestors = false;
359 boolean want_parent = false;
360 boolean want_siblings = false;
361 boolean want_children = false;
362 boolean want_descendants = false;
363
364 boolean want_entire_structure = false;
365 // Process the request parameters
366 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
367 for (int i=0; i<params.getLength();i++) {
368
369 Element param = (Element)params.item(i);
370 String p_name = param.getAttribute(GSXML.NAME_ATT);
371 String p_value = GSXML.getValue(param);
372 // Identify the structure information desired
373 if (p_name.equals(STRUCT_PARAM)) {
374 want_structure = true;
375
376 // This is NOT locale sensitive
377 if (p_value.equals(STRUCT_ANCESTORS))
378 want_ancestors = true;
379 else if (p_value.equals(STRUCT_PARENT))
380 want_parent = true;
381 else if (p_value.equals(STRUCT_SIBS))
382 want_siblings = true;
383 else if (p_value.equals(STRUCT_CHILDREN))
384 want_children = true;
385 else if (p_value.equals(STRUCT_DESCENDS))
386 want_descendants = true;
387 else if (p_value.equals(STRUCT_ENTIRE))
388 want_entire_structure = true;
389 else
390 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
391 } else if (p_name.equals(INFO_PARAM)) {
392 want_info = true;
393 info_types.add(p_value);
394 }
395 }
396
397 // Make sure there is no repeated information
398 if (want_ancestors)
399 want_parent = false;
400 if (want_descendants)
401 want_children = false;
402
403 for (int i=0; i < node_list.getLength(); i++) {
404 Element doc = (Element) node_list.item(i);
405 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
406 String is_external=doc.getAttribute("externalURL");
407
408 boolean is_external_link = false;
409 if (is_external.equals("0")) {is_external_link = true;}
410 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
411 if (doc_id.endsWith(".rt")){
412 doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
413 if (doc_id!=null){
414 doc_id += ".rt";
415 }else{
416 is_external_link = true;
417 }
418 }else{
419 doc_id = getHrefOID(doc_id);
420 if (doc_id==null){ is_external_link = true;}
421 }
422 }
423 if (!is_external_link){
424 if (external_id) {
425 doc_id = translateExternalId(doc_id);
426 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
427 } else if (idNeedsTranslating(doc_id)) {
428 doc_id = translateId(doc_id);
429 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
430 }
431
432 if (doc_id == null) {
433 continue;
434 }
435
436 if (want_info) {
437
438 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
439 doc.appendChild(node_info_elem);
440
441 for (int j=0; j<info_types.size(); j++) {
442 String info_type = (String)info_types.get(j);
443 String info_value = getStructureInfo(doc_id, info_type);
444 if (info_value != null) {
445 Element info_elem = this.doc.createElement("info");
446 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
447 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
448 node_info_elem.appendChild(info_elem);
449 }
450 }
451 }
452
453 if (want_structure) {
454 // all structure info goes into a nodeStructure elem
455 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
456 doc.appendChild(structure_elem);
457
458 if (want_entire_structure) {
459 String root_id = getRootId(doc_id);
460 Element root_node = createDocNode(root_id); //, true, false);
461 addDescendants(root_node, root_id, true);
462 structure_elem.appendChild(root_node);
463 continue; // with the next document, we dont need to do any more here
464 }
465
466 // Add the requested structure information
467 Element base_node = createDocNode(doc_id); //, false, false);
468
469 //Ancestors: continually add parent nodes until the root is reached
470 Element top_node = base_node; // the top node so far
471 if (want_ancestors) {
472 String current_id = doc_id;
473 while (true) {
474 String parent_id = getParentId(current_id);
475 //Element parent = getParent(current_id);
476 if (parent_id == null)
477 break; // no parent
478 Element parent_node = createDocNode(parent_id);
479 parent_node.appendChild(top_node);
480 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
481 top_node = parent_node;
482 }
483 }
484 // Parent: get the parent of the selected node
485 else if (want_parent) {
486 String parent_id = getParentId(doc_id);
487 if (parent_id != null) {
488 Element parent_node = createDocNode(parent_id);
489 parent_node.appendChild(base_node);
490 top_node = parent_node;
491 }
492 }
493
494 // now the top node is the root of the structure
495 structure_elem.appendChild(top_node);
496
497 //Siblings: get the other descendants of the selected node's parent
498 if (want_siblings) {
499 String parent_id = getParentId(doc_id);
500 if (parent_id != null) {
501 // if parent == current id, then we are at the top
502 // and can't get siblings
503 Element parent_node = (Element)base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
504
505 // add siblings, - returns a pointer to the new current node
506 base_node = addSiblings(parent_node, parent_id, doc_id);
507 }
508
509 }
510
511 // Children: get the descendants, but only one level deep
512 if (want_children) {
513 addDescendants(base_node, doc_id, false);
514 }
515 // Descendants: recursively get every descendant
516 else if (want_descendants) {
517 addDescendants(base_node, doc_id, true);
518 }
519 } // if want structure
520
521 }else{
522 Element external_link_elem = this.doc.createElement("external");
523 external_link_elem.setAttribute("external_link",doc.getAttribute(GSXML.NODE_ID_ATT));
524 doc.appendChild(external_link_elem);
525 }// if is_external_link
526 } // for each doc
527 return result;
528 }
529
530 /** Retrieve the content of a document */
531 protected Element processDocumentContentRetrieve(Element request)
532 {
533 // Create a new (empty) result message
534 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
535 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
536 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
537
538 if (!does_content) {
539 // shouldn't get here
540 return result;
541 }
542
543 // Get the parameters of the request
544 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
545 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
546 boolean external_id = false;
547 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
548 external_id = true;
549 }
550 // Get the request content
551 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
552 if (query_doc_list == null) {
553 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
554 return result;
555 }
556
557 String lang = request.getAttribute(GSXML.LANG_ATT);
558 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
559 result.appendChild(doc_list);
560
561 // set up the retrieval??
562
563 // Get the documents
564 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
565 GSXML.NODE_ID_ATT);
566 String[] is_externals=GSXML.getAttributeValuesFromList(query_doc_list,"externalURL");
567
568 for (int i = 0; i < doc_ids.length; i++) {
569 String doc_id = doc_ids[i];
570 String is_external=is_externals[i];
571
572 boolean is_external_link=false;
573 if (is_external.equals("0")){is_external_link = true;}
574 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
575 //if (!doc_id.startsWith("HASH")){
576 if (doc_id.endsWith(".rt")){
577 String find_doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
578 if (find_doc_id!=null){
579 doc_id = doc_id + ".rt";
580 }else{
581 is_external_link=true;
582 }
583
584 }else {
585 String find_doc_id = getHrefOID(doc_id);
586 if (find_doc_id==null){
587 is_external_link=true;
588 }else{
589 doc_id = find_doc_id;
590 }
591 }
592 }
593
594 if (!is_external_link){
595 // Create the document node
596 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
597 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
598 doc_list.appendChild(doc);
599
600 if (external_id) {
601 doc_id = translateExternalId(doc_id);
602 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
603 } else if (idNeedsTranslating(doc_id)) {
604 doc_id = translateId(doc_id);
605 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
606 }
607 if (doc_id == null) {
608 continue;
609 }
610 try {
611 Element node_content = getNodeContent(doc_id, lang);
612 doc.appendChild(node_content);
613 } catch (GSException e) {
614 GSXML.addError(this.doc, result, e.getMessage());
615 return result;
616
617 }
618 }else{
619 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
620 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
621 //doc.setAttribute("external_link", doc_id);
622 Element external_link_elem = this.doc.createElement("external");
623 external_link_elem.setAttribute("external_link",doc_id);
624 doc.appendChild(external_link_elem);
625
626 doc_list.appendChild(doc);
627 }
628 }
629 return result;
630 }
631
632 /** create an element to go into the structure. A node element
633 * has the form
634 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
635 */
636 protected Element createDocNode(String node_id) {
637 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
638 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
639
640 String doc_type = null;
641 if (default_document_type != null) {
642 doc_type = default_document_type;
643 } else {
644 doc_type = getDocType(node_id);
645 }
646 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
647 String node_type = getNodeType(node_id, doc_type);
648 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
649 return node;
650 }
651
652 /** adds all the children of doc_id the the doc element,
653 * and if recursive=true, adds all their children as well*/
654 protected void addDescendants(Element doc, String doc_id,
655 boolean recursive)
656 {
657 ArrayList child_ids = getChildrenIds(doc_id);
658 if (child_ids==null) return;
659 for (int i=0; i< child_ids.size(); i++) {
660 String child_id = (String)child_ids.get(i);
661 Element child_elem = createDocNode(child_id);
662 doc.appendChild(child_elem);
663 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
664 addDescendants(child_elem, child_id, recursive);
665 }
666 }
667 }
668
669 /** adds all the siblings of current_id to the parent element.
670 returns the new current element*/
671 protected Element addSiblings(Element parent_node, String parent_id,
672 String current_id) {
673 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
674 if (current_node == null) {
675 // create a sensible error message
676 logger.error(" there should be a first child.");
677 return null;
678 }
679 // remove the current child,- will add it in later in its correct place
680 parent_node.removeChild(current_node);
681
682 // add in all the siblings,
683 addDescendants(parent_node, parent_id, false);
684
685 // find the node that is now the current node
686 // this assumes that the new node that was created is the same as
687 // the old one that was removed - we may want to replace the new one
688 // with the old one.
689 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
690 return new_current;
691 }
692
693 /** returns true if oid ends in
694 .fc (firstchild),
695 .lc (lastchild),
696 .pr (parent),
697 .ns (next sibling),
698 .ps (prev sibling),
699 .rt (root)
700 .ss (specified sibling),
701 false otherwise
702 */
703 protected boolean idNeedsTranslating(String id) {
704 return OID.needsTranslating(id);
705 }
706
707 /** returns the list of sibling ids, including the specified node_id */
708 protected ArrayList getSiblingIds(String node_id) {
709 String parent_id = getParentId(node_id);
710 if (parent_id == null) {
711 return null;
712 }
713 return getChildrenIds(parent_id);
714
715 }
716
717 /** returns the node type of the specified node.
718 should be one of
719 GSXML.NODE_TYPE_LEAF,
720 GSXML.NODE_TYPE_INTERNAL,
721 GSXML.NODE_TYPE_ROOT
722 */
723 protected String getNodeType(String node_id, String doc_type) {
724 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
725 return GSXML.NODE_TYPE_LEAF;
726 }
727
728 if (getParentId(node_id)==null) {
729 return GSXML.NODE_TYPE_ROOT;
730 }
731 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
732 return GSXML.NODE_TYPE_LEAF;
733 }
734 if (getChildrenIds(node_id)==null) {
735 return GSXML.NODE_TYPE_LEAF;
736 }
737 return GSXML.NODE_TYPE_INTERNAL;
738
739 }
740
741 /** if id ends in .fc, .pc etc, then translate it to the correct id
742 * default implementation: just remove the suffix */
743 protected String translateId(String id) {
744 return id.substring(0,id.length());
745 }
746
747 /** if an id is not a greenstone id (an external id) then translate
748 * it to a greenstone one
749 * default implementation: return the id */
750 protected String translateExternalId(String id) {
751 return id;
752 }
753
754 /** returns the document type of the doc that the specified node
755 belongs to. should be one of
756 GSXML.DOC_TYPE_SIMPLE,
757 GSXML.DOC_TYPE_PAGED,
758 GSXML.DOC_TYPE_HIERARCHY
759 default implementation: return DOC_TYPE_SIMPLE
760 */
761 protected String getDocType(String node_id) {
762 return GSXML.DOC_TYPE_SIMPLE;
763 }
764
765
766 /** returns the id of the root node of the document containing
767 * node node_id. may be the same as node_id
768 * default implemntation: return node_id
769 */
770 protected String getRootId(String node_id) {
771 return node_id;
772 }
773 /** returns a list of the child ids in order, null if no children
774 * default implementation: return null */
775 protected ArrayList getChildrenIds(String node_id) {
776 return null;
777 }
778 /** returns the node id of the parent node, null if no parent
779 * default implementation: return null */
780 protected String getParentId(String node_id) {
781 return null;
782 }
783
784 /** get the metadata for the doc node doc_id
785 * returns a metadataList element:
786 * <metadataList><metadata name="xxx">value</metadata></metadataList>
787 */
788 abstract protected Element getMetadataList(String doc_id,
789 boolean all_metadata,
790 ArrayList metadata_names) throws GSException;
791 /** returns the content of a node
792 * should return a nodeContent element:
793 * <nodeContent>text content or other elements</nodeContent>
794 * can return
795 */
796 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
797
798 /** returns the structural information asked for.
799 * info_type may be one of
800 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
801 */
802 abstract protected String getStructureInfo(String doc_id, String info_type);
803
804 protected String getHrefOID(String href_url){
805 return null;
806 }
807
808}
Note: See TracBrowser for help on using the repository browser.