package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.Attr; import org.w3c.dom.Text; import org.w3c.dom.NodeList; import org.w3c.dom.NamedNodeMap; // General Java classes import java.io.File; import java.util.Vector; import java.util.HashMap; import org.apache.log4j.*; public class XMLRetrieve extends ServiceRack { static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.XMLRetrieve.class.getName()); protected static final String CONTENT_SERVICE = "DocumentContentRetrieve"; protected static final String METADATA_SERVICE = "DocumentMetadataRetrieve"; protected static final String STRUCTURE_SERVICE = "DocumentStructureRetrieve"; protected String toc_xsl_name = ""; protected String document_encoding = ""; protected String document_root_tag = ""; protected Element collection_doc_list = null; protected boolean provide_content = true; protected boolean provide_structure = true; protected boolean provide_metadata = true; public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ return false; } logger.info("configuring XMLRetrieve..."); // look for the parameters Element param_list = (Element)GSXML.getChildByTagName(info, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); HashMap params; String services_to_provide = ""; if (param_list != null) { params = GSXML.extractParams(param_list, false); this.toc_xsl_name = (String)params.get("tocXSLT"); this.document_encoding = (String)params.get("documentEncoding"); this.document_root_tag = (String)params.get("documentRootTag"); services_to_provide = (String)params.get("provideServices"); } if (this.toc_xsl_name == null || this.toc_xsl_name.equals("")) { this.toc_xsl_name = "default_toc"; } this.toc_xsl_name = this.toc_xsl_name+".xsl"; if (this.document_encoding == null || this.document_encoding.equals("")) { this.document_encoding = "UTF-8"; } if (services_to_provide != null && !services_to_provide.equals("")) { if (services_to_provide.indexOf("content")==-1) { provide_content = false; } if (services_to_provide.indexOf("metadata")==-1) { provide_metadata = false; } if (services_to_provide.indexOf("structure")==-1) { provide_structure = false; } } // set up short_service_info_ - for now just has name and type Element retrieve_service; if (provide_content) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, CONTENT_SERVICE); this.short_service_info.appendChild(retrieve_service); } if (provide_metadata) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, METADATA_SERVICE); this.short_service_info.appendChild(retrieve_service); } if (provide_structure) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, STRUCTURE_SERVICE); this.short_service_info.appendChild(retrieve_service); } // find the doc list from the extra_info and keep it - should this be in collect.cfg or build.cfg?? collection_doc_list = (Element)GSXML.getChildByTagName(extra_info, GSXML.DOCUMENT_ELEM+GSXML.LIST_MODIFIER); GSEntityResolver resolver = new GSEntityResolver(); resolver.setClassLoader(this.class_loader); this.converter.setEntityResolver(resolver); return true; } // this may get called but is not useful in the case of retrieve services protected Element getServiceDescription(String service_id, String lang, String subset) { Element retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, service_id); return retrieve_service; } protected Element processDocumentContentRetrieve(Element request) { Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); result.setAttribute(GSXML.FROM_ATT, CONTENT_SERVICE); result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); Element doc_list = (Element)GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); if (doc_list == null) { return result; } Element result_doc_list = (Element)this.doc.importNode(doc_list, true); result.appendChild(result_doc_list); NodeList docs = result_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM); for (int i=0; i 4) { logger.error("badly formatted node id ("+node_id +"), cant retrieve the section"); return null; } String id=""; String tagname = ""; String scope = ""; if (bits.length==2) { tagname = bits[1]; } else { scope = bits[1]; tagname = bits[2]; if (bits.length == 4) { id = bits[3]; } } scope = translateScope(scope); Element top=null; if (!scope.equals("")) { top = (Element)GSXML.getNodeByPath(doc_elem, scope); if (top == null) { // something gone wrong return null; } } else { top = doc_elem; } NodeList elements = top.getElementsByTagName(tagname); if (elements.getLength() == 0) { return null; } // no id, just return the first one if (id.equals("")) { return (Element)elements.item(0); } // have an id, need to check and find the right one. for (int i=0; i