source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/AbstractDocumentRetrieve.java@ 24393

Last change on this file since 24393 was 24393, checked in by sjm84, 13 years ago

Adding in the server-side code for the Document Maker as well as several other enhancements

  • Property svn:keywords set to Author Date Id Revision
File size: 27.3 KB
Line 
1/*
2 * AbstractDocumentRetrieve.java
3 * a base class for retrieval services
4
5 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21package org.greenstone.gsdl3.service;
22
23// Greenstone classes
24import org.greenstone.util.GlobalProperties;
25import org.greenstone.gsdl3.core.GSException;
26import org.greenstone.gsdl3.util.GSXML;
27import org.greenstone.gsdl3.util.GSPath;
28import org.greenstone.gsdl3.util.MacroResolver;
29import org.greenstone.gsdl3.util.OID;
30import org.greenstone.gsdl3.util.GSConstants;
31
32// XML classes
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.NodeList;
36
37// General Java classes
38import java.io.File;
39import java.util.StringTokenizer;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43
44
45import org.apache.log4j.*;
46
47/** Abstract class for Document Retrieval Services
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 */
51
52public abstract class AbstractDocumentRetrieve
53 extends ServiceRack {
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
56
57 // the services on offer
58 protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
59 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
60 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
61
62 protected static final String STRUCT_PARAM = "structure";
63 protected static final String INFO_PARAM = "info";
64
65 protected static final String STRUCT_ANCESTORS = "ancestors";
66 protected static final String STRUCT_PARENT = "parent";
67 protected static final String STRUCT_SIBS = "siblings";
68 protected static final String STRUCT_CHILDREN = "children";
69 protected static final String STRUCT_DESCENDS = "descendants";
70 protected static final String STRUCT_ENTIRE = "entire";
71
72 protected static final String INFO_NUM_SIBS = "numSiblings";
73 protected static final String INFO_NUM_CHILDREN = "numChildren";
74 protected static final String INFO_SIB_POS = "siblingPosition";
75
76 // means the id is not a greenstone id and needs translating
77 protected static final String EXTID_PARAM = "ext";
78
79 protected Element config_info = null; // the xml from the config file
80
81 protected String default_document_type = null;
82 protected MacroResolver macro_resolver = null;
83
84 /** does this class provide the service?? */
85 protected boolean does_metadata = true;
86 protected boolean does_content = true;
87 protected boolean does_structure = true;
88
89 /** constructor */
90 public AbstractDocumentRetrieve()
91 {
92 }
93
94 /** configure this service */
95 public boolean configure(Element info, Element extra_info)
96 {
97 if (!super.configure(info, extra_info)){
98 return false;
99 }
100
101 logger.info("Configuring AbstractDocumentRetrieve...");
102 this.config_info = info;
103
104 // set up short_service_info_ - for now just has name and type
105 if (does_structure) {
106 Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
107 dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
108 dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
109 this.short_service_info.appendChild(dsr_service);
110 }
111
112 if (does_metadata) {
113 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
114 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
115 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
116 this.short_service_info.appendChild(dmr_service);
117 }
118
119 if (does_content) {
120 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
121 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
122 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
123 this.short_service_info.appendChild(dcr_service);
124 }
125
126 // look for document display format
127 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
128 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
129 if (display_format != null) {
130 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
131 // should we keep a copy?
132 // check for docType option.
133 Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
134 if (doc_type_opt != null) {
135 String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
136 if (!value.equals("")) {
137 this.default_document_type = value;
138 }
139 }
140 }
141
142 if (macro_resolver != null) {
143 macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
144 // set up the macro resolver
145 Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
146 if (replacement_elem != null) {
147 macro_resolver.addMacros(replacement_elem);
148 }
149 // look for any refs to global replace lists
150 NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
151 for (int i=0; i<replace_refs_elems.getLength(); i++) {
152 String id = ((Element)replace_refs_elems.item(i)).getAttribute("id");
153 if (!id.equals("")) {
154 Element replace_list = GSXML.getNamedElement(this.router.config_info, "replaceList", "id", id);
155 if (replace_list != null) {
156 macro_resolver.addMacros(replace_list);
157 }
158 }
159 }
160 }
161
162 return true;
163 }
164
165 protected Element getServiceDescription(String service_id, String lang, String subset) {
166
167 // these ones are probably never called, but put them here just in case
168 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
169 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
170 service_elem.setAttribute(GSXML.NAME_ATT, service_id);
171 return service_elem;
172 }
173
174 protected Element processDocumentMetadataRetrieve(Element request) {
175
176 // Create a new (empty) result message
177 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
178 String lang = request.getAttribute(GSXML.LANG_ATT);
179 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
180 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
181
182 if (!does_metadata) {
183 // shouldn't get here
184 return result;
185 }
186 // Get the parameters of the request
187 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
188 if (param_list == null) {
189 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
190 return result;
191 }
192
193 boolean external_id = false;
194 // The metadata information required
195 ArrayList metadata_names_list = new ArrayList();
196 boolean all_metadata = false;
197 // Process the request parameters
198 Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild();
199 while (param != null) {
200 // Identify the metadata information desired
201 if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) {
202 String metadata = GSXML.getValue(param);
203 if (metadata.equals("all")) {
204 all_metadata = true;
205 break;
206 }
207 metadata_names_list.add(metadata);
208 } else if (param.getAttribute(GSXML.NAME_ATT).equals(EXTID_PARAM)&& GSXML.getValue(param).equals("1")) {
209 external_id = true;
210 }
211 param = (Element) param.getNextSibling();
212 }
213
214 // check that there has been some metadata specified
215 if (!all_metadata && metadata_names_list.size()==0) {
216 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no metadata names found in the "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
217 return result;
218 }
219
220 // Get the documents
221 Element request_node_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
222 if (request_node_list == null) {
223 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
224 return result;
225 }
226
227 // copy the request doc node list to the response
228 Element response_node_list = (Element) this.doc.importNode(request_node_list, true);
229 result.appendChild(response_node_list);
230
231 // use the copied list so that we add the metadata into the copy
232 // are we just adding metadata for the top level nodes? or can we accept a hierarchy here???
233 NodeList request_nodes = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
234 if (request_nodes.getLength()==0) {
235 GSXML.addError(this.doc, result, "DocumentMetadataRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
236 return result;
237 }
238
239 // Whew, now we have checked (almost) all the syntax of the request, now we can process it.
240 for (int i = 0; i < request_nodes.getLength(); i++) {
241 Element request_node = (Element) request_nodes.item(i);
242 String node_id = request_node.getAttribute(GSXML.NODE_ID_ATT);
243
244 boolean is_external_link = false;
245 if (!node_id.startsWith("HASH") && !node_id.startsWith("D")){
246 if (node_id.endsWith(".rt")){
247 node_id = getHrefOID(node_id.substring(0,node_id.length()-3));
248 if (node_id!=null){
249 node_id += ".rt";
250 }else{
251 is_external_link = true;
252 }
253 }else{
254 node_id = getHrefOID(node_id);
255 if (node_id==null){
256 is_external_link = true;
257 }
258 }
259 }
260 if (!is_external_link){
261 if (external_id) {
262 // can we have .pr etc extensions with external ids?
263 node_id = translateExternalId(node_id);
264 } else if (idNeedsTranslating(node_id)) {
265 node_id = translateId(node_id);
266 }
267 }
268
269 if (node_id == null) {
270 continue;
271 }
272 if (!is_external_link){
273 try {
274 Element metadata_list = getMetadataList(node_id, all_metadata, metadata_names_list);
275 request_node.appendChild(metadata_list);
276 } catch (GSException e) {
277 GSXML.addError(this.doc, result, e.getMessage(), e.getType());
278 if (e.getType().equals(GSXML.ERROR_TYPE_SYSTEM)) {
279 // there is no point trying any others
280 return result;
281 }
282 }
283 }else{
284 request_node.setAttribute("external_link",request_node.getAttribute(GSXML.NODE_ID_ATT));
285 }
286 }
287 return result;
288 }
289
290 protected Element processDocumentStructureRetrieve(Element request) {
291
292 // Create a new (empty) result message
293 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
294 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
295 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
296
297 if (!does_structure) {
298 // shouldn't get here
299 return result;
300 }
301
302 String lang = request.getAttribute(GSXML.LANG_ATT);
303
304 // Get the parameters of the request
305 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
306 if (param_list == null) {
307 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing "+ GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
308 return result;
309 }
310
311 // get the documents of the request
312 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
313 if (query_doc_list == null) {
314 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: missing " +GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
315 return result;
316 }
317
318 // copy the doc_list to the response
319 Element response_node_list = (Element) this.doc.importNode(query_doc_list, true);
320 result.appendChild(response_node_list);
321
322 // check that we have some doc nodes specified
323 NodeList node_list = GSXML.getChildrenByTagName(response_node_list, GSXML.DOC_NODE_ELEM);
324 if (node_list.getLength()==0) {
325 GSXML.addError(this.doc, result, "DocumentStructureRetrieve: no "+GSXML.DOC_NODE_ELEM +" found in the "+ GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.ERROR_TYPE_SYNTAX);
326 return result;
327 }
328
329 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
330 boolean external_id = false;
331 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
332 external_id = true;
333 }
334
335 // the type of info required
336 boolean want_structure = false;
337 boolean want_info = false;
338
339 ArrayList info_types=new ArrayList();
340 // The document structure information desired
341 boolean want_ancestors = false;
342 boolean want_parent = false;
343 boolean want_siblings = false;
344 boolean want_children = false;
345 boolean want_descendants = false;
346
347 boolean want_entire_structure = false;
348 // Process the request parameters
349 NodeList params = param_list.getElementsByTagName(GSXML.PARAM_ELEM);
350 for (int i=0; i<params.getLength();i++) {
351
352 Element param = (Element)params.item(i);
353 String p_name = param.getAttribute(GSXML.NAME_ATT);
354 String p_value = GSXML.getValue(param);
355 // Identify the structure information desired
356 if (p_name.equals(STRUCT_PARAM)) {
357 want_structure = true;
358
359 // This is NOT locale sensitive
360 if (p_value.equals(STRUCT_ANCESTORS))
361 want_ancestors = true;
362 else if (p_value.equals(STRUCT_PARENT))
363 want_parent = true;
364 else if (p_value.equals(STRUCT_SIBS))
365 want_siblings = true;
366 else if (p_value.equals(STRUCT_CHILDREN))
367 want_children = true;
368 else if (p_value.equals(STRUCT_DESCENDS))
369 want_descendants = true;
370 else if (p_value.equals(STRUCT_ENTIRE))
371 want_entire_structure = true;
372 else
373 logger.error("AbstractDocumentRetrieve Warning: Unknown value \"" + p_value + "\".");
374 } else if (p_name.equals(INFO_PARAM)) {
375 want_info = true;
376 info_types.add(p_value);
377 }
378 }
379
380 // Make sure there is no repeated information
381 if (want_ancestors)
382 want_parent = false;
383 if (want_descendants)
384 want_children = false;
385
386 for (int i=0; i < node_list.getLength(); i++) {
387 Element doc = (Element) node_list.item(i);
388 String doc_id = doc.getAttribute(GSXML.NODE_ID_ATT);
389 String is_external=doc.getAttribute("externalURL");
390
391 boolean is_external_link = false;
392 if (is_external.equals("0")) {is_external_link = true;}
393 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
394 if (doc_id.endsWith(".rt")){
395 doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
396 if (doc_id!=null){
397 doc_id += ".rt";
398 }else{
399 is_external_link = true;
400 }
401 }else{
402 doc_id = getHrefOID(doc_id);
403 if (doc_id==null){ is_external_link = true;}
404 }
405 }
406 if (!is_external_link){
407 if (external_id) {
408 doc_id = translateExternalId(doc_id);
409 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
410 } else if (idNeedsTranslating(doc_id)) {
411 doc_id = translateId(doc_id);
412 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
413 }
414
415 if (doc_id == null) {
416 continue;
417 }
418
419 if (want_info) {
420
421 Element node_info_elem = this.doc.createElement("nodeStructureInfo");
422 doc.appendChild(node_info_elem);
423
424 for (int j=0; j<info_types.size(); j++) {
425 String info_type = (String)info_types.get(j);
426 String info_value = getStructureInfo(doc_id, info_type);
427 if (info_value != null) {
428 Element info_elem = this.doc.createElement("info");
429 info_elem.setAttribute(GSXML.NAME_ATT, info_type);
430 info_elem.setAttribute(GSXML.VALUE_ATT, info_value);
431 node_info_elem.appendChild(info_elem);
432 }
433 }
434 }
435
436 if (want_structure) {
437 // all structure info goes into a nodeStructure elem
438 Element structure_elem = this.doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
439 doc.appendChild(structure_elem);
440
441 if (want_entire_structure) {
442 String root_id = getRootId(doc_id);
443 Element root_node = createDocNode(root_id); //, true, false);
444 addDescendants(root_node, root_id, true);
445 structure_elem.appendChild(root_node);
446 continue; // with the next document, we dont need to do any more here
447 }
448
449 // Add the requested structure information
450 Element base_node = createDocNode(doc_id); //, false, false);
451
452 //Ancestors: continually add parent nodes until the root is reached
453 Element top_node = base_node; // the top node so far
454 if (want_ancestors) {
455 String current_id = doc_id;
456 while (true) {
457 String parent_id = getParentId(current_id);
458 //Element parent = getParent(current_id);
459 if (parent_id == null)
460 break; // no parent
461 Element parent_node = createDocNode(parent_id);
462 parent_node.appendChild(top_node);
463 current_id = parent_id;//.getAttribute(GSXML.NODE_ID_ATT);
464 top_node = parent_node;
465 }
466 }
467 // Parent: get the parent of the selected node
468 else if (want_parent) {
469 String parent_id = getParentId(doc_id);
470 if (parent_id != null) {
471 Element parent_node = createDocNode(parent_id);
472 parent_node.appendChild(base_node);
473 top_node = parent_node;
474 }
475 }
476
477 // now the top node is the root of the structure
478 structure_elem.appendChild(top_node);
479
480 //Siblings: get the other descendants of the selected node's parent
481 if (want_siblings) {
482 String parent_id = getParentId(doc_id);
483 if (parent_id != null) {
484 // if parent == current id, then we are at the top
485 // and can't get siblings
486 Element parent_node = (Element)base_node.getParentNode(); // this may be the structure element if there has been no request for parents or ancestors
487
488 // add siblings, - returns a pointer to the new current node
489 base_node = addSiblings(parent_node, parent_id, doc_id);
490 }
491
492 }
493
494 // Children: get the descendants, but only one level deep
495 if (want_children) {
496 addDescendants(base_node, doc_id, false);
497 }
498 // Descendants: recursively get every descendant
499 else if (want_descendants) {
500 addDescendants(base_node, doc_id, true);
501 }
502 } // if want structure
503
504 }else{
505 Element external_link_elem = this.doc.createElement("external");
506 external_link_elem.setAttribute("external_link",doc.getAttribute(GSXML.NODE_ID_ATT));
507 doc.appendChild(external_link_elem);
508 }// if is_external_link
509 } // for each doc
510 return result;
511 }
512
513 /** Retrieve the content of a document */
514 protected Element processDocumentContentRetrieve(Element request)
515 {
516 // Create a new (empty) result message
517 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
518 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
519 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
520
521 if (!does_content) {
522 // shouldn't get here
523 return result;
524 }
525
526 // Get the parameters of the request
527 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
528 Element extid_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, EXTID_PARAM);
529 boolean external_id = false;
530 if (extid_param != null && GSXML.getValue(extid_param).equals("1")) {
531 external_id = true;
532 }
533 // Get the request content
534 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
535 if (query_doc_list == null) {
536 logger.error("Error: DocumentContentRetrieve request specified no doc nodes.\n");
537 return result;
538 }
539
540 String lang = request.getAttribute(GSXML.LANG_ATT);
541 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
542 result.appendChild(doc_list);
543
544 // set up the retrieval??
545
546 // Get the documents
547 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
548 GSXML.NODE_ID_ATT);
549 String[] is_externals=GSXML.getAttributeValuesFromList(query_doc_list,"externalURL");
550
551 for (int i = 0; i < doc_ids.length; i++) {
552 String doc_id = doc_ids[i];
553 String is_external=is_externals[i];
554
555 boolean is_external_link=false;
556 if (is_external.equals("0")){is_external_link = true;}
557 if (is_external.equals("1") && !doc_id.startsWith("HASH") && !is_external_link){
558 //if (!doc_id.startsWith("HASH")){
559 if (doc_id.endsWith(".rt")){
560 String find_doc_id = getHrefOID(doc_id.substring(0,doc_id.length()-3));
561 if (find_doc_id!=null){
562 doc_id = doc_id + ".rt";
563 }else{
564 is_external_link=true;
565 }
566
567 }else {
568 String find_doc_id = getHrefOID(doc_id);
569 if (find_doc_id==null){
570 is_external_link=true;
571 }else{
572 doc_id = find_doc_id;
573 }
574 }
575 }
576
577 if (!is_external_link){
578 // Create the document node
579 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
580 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
581 doc_list.appendChild(doc);
582
583 if (external_id) {
584 doc_id = translateExternalId(doc_id);
585 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
586 } else if (idNeedsTranslating(doc_id)) {
587 doc_id = translateId(doc_id);
588 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
589 }
590 if (doc_id == null) {
591 continue;
592 }
593 try {
594 Element node_content = getNodeContent(doc_id, lang);
595 doc.appendChild(node_content);
596 } catch (GSException e) {
597 GSXML.addError(this.doc, result, e.getMessage());
598 return result;
599
600 }
601 }else{
602 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
603 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
604 //doc.setAttribute("external_link", doc_id);
605 Element external_link_elem = this.doc.createElement("external");
606 external_link_elem.setAttribute("external_link",doc_id);
607 doc.appendChild(external_link_elem);
608
609 doc_list.appendChild(doc);
610 }
611 }
612 return result;
613 }
614
615 /** create an element to go into the structure. A node element
616 * has the form
617 * <docNode nodeId='xxx' nodeType='leaf' docType='hierarchy'/>
618 */
619 protected Element createDocNode(String node_id) {
620 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
621 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
622
623 String doc_type = null;
624 if (default_document_type != null) {
625 doc_type = default_document_type;
626 } else {
627 doc_type = getDocType(node_id);
628 }
629 node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
630 String node_type = getNodeType(node_id, doc_type);
631 node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
632 return node;
633 }
634
635 /** adds all the children of doc_id the the doc element,
636 * and if recursive=true, adds all their children as well*/
637 protected void addDescendants(Element doc, String doc_id,
638 boolean recursive)
639 {
640 ArrayList child_ids = getChildrenIds(doc_id);
641 if (child_ids==null) return;
642 for (int i=0; i< child_ids.size(); i++) {
643 String child_id = (String)child_ids.get(i);
644 Element child_elem = createDocNode(child_id);
645 doc.appendChild(child_elem);
646 if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
647 addDescendants(child_elem, child_id, recursive);
648 }
649 }
650 }
651
652 /** adds all the siblings of current_id to the parent element.
653 returns the new current element*/
654 protected Element addSiblings(Element parent_node, String parent_id,
655 String current_id) {
656 Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
657 if (current_node == null) {
658 // create a sensible error message
659 logger.error(" there should be a first child.");
660 return null;
661 }
662 // remove the current child,- will add it in later in its correct place
663 parent_node.removeChild(current_node);
664
665 // add in all the siblings,
666 addDescendants(parent_node, parent_id, false);
667
668 // find the node that is now the current node
669 // this assumes that the new node that was created is the same as
670 // the old one that was removed - we may want to replace the new one
671 // with the old one.
672 Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
673 return new_current;
674 }
675
676 /** returns true if oid ends in
677 .fc (firstchild),
678 .lc (lastchild),
679 .pr (parent),
680 .ns (next sibling),
681 .ps (prev sibling),
682 .rt (root)
683 .ss (specified sibling),
684 false otherwise
685 */
686 protected boolean idNeedsTranslating(String id) {
687 return OID.needsTranslating(id);
688 }
689
690 /** returns the list of sibling ids, including the specified node_id */
691 protected ArrayList getSiblingIds(String node_id) {
692 String parent_id = getParentId(node_id);
693 if (parent_id == null) {
694 return null;
695 }
696 return getChildrenIds(parent_id);
697
698 }
699
700 /** returns the node type of the specified node.
701 should be one of
702 GSXML.NODE_TYPE_LEAF,
703 GSXML.NODE_TYPE_INTERNAL,
704 GSXML.NODE_TYPE_ROOT
705 */
706 protected String getNodeType(String node_id, String doc_type) {
707 if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
708 return GSXML.NODE_TYPE_LEAF;
709 }
710
711 if (getParentId(node_id)==null) {
712 return GSXML.NODE_TYPE_ROOT;
713 }
714 if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
715 return GSXML.NODE_TYPE_LEAF;
716 }
717 if (getChildrenIds(node_id)==null) {
718 return GSXML.NODE_TYPE_LEAF;
719 }
720 return GSXML.NODE_TYPE_INTERNAL;
721
722 }
723
724 /** if id ends in .fc, .pc etc, then translate it to the correct id
725 * default implementation: just remove the suffix */
726 protected String translateId(String id) {
727 return id.substring(0,id.length());
728 }
729
730 /** if an id is not a greenstone id (an external id) then translate
731 * it to a greenstone one
732 * default implementation: return the id */
733 protected String translateExternalId(String id) {
734 return id;
735 }
736
737 /** returns the document type of the doc that the specified node
738 belongs to. should be one of
739 GSXML.DOC_TYPE_SIMPLE,
740 GSXML.DOC_TYPE_PAGED,
741 GSXML.DOC_TYPE_HIERARCHY
742 default implementation: return DOC_TYPE_SIMPLE
743 */
744 protected String getDocType(String node_id) {
745 return GSXML.DOC_TYPE_SIMPLE;
746 }
747
748
749 /** returns the id of the root node of the document containing
750 * node node_id. may be the same as node_id
751 * default implemntation: return node_id
752 */
753 protected String getRootId(String node_id) {
754 return node_id;
755 }
756 /** returns a list of the child ids in order, null if no children
757 * default implementation: return null */
758 protected ArrayList getChildrenIds(String node_id) {
759 return null;
760 }
761 /** returns the node id of the parent node, null if no parent
762 * default implementation: return null */
763 protected String getParentId(String node_id) {
764 return null;
765 }
766
767 /** get the metadata for the doc node doc_id
768 * returns a metadataList element:
769 * <metadataList><metadata name="xxx">value</metadata></metadataList>
770 */
771 abstract protected Element getMetadataList(String doc_id,
772 boolean all_metadata,
773 ArrayList metadata_names) throws GSException;
774 /** returns the content of a node
775 * should return a nodeContent element:
776 * <nodeContent>text content or other elements</nodeContent>
777 * can return
778 */
779 abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
780
781 /** returns the structural information asked for.
782 * info_type may be one of
783 * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
784 */
785 abstract protected String getStructureInfo(String doc_id, String info_type);
786
787 protected String getHrefOID(String href_url){
788 return null;
789 }
790
791}
Note: See TracBrowser for help on using the repository browser.