package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.Attr; import org.w3c.dom.Text; import org.w3c.dom.NodeList; import org.w3c.dom.NamedNodeMap; // General Java classes import java.io.File; import java.io.Serializable; import java.util.Vector; import java.util.HashMap; import org.apache.log4j.*; public class XMLRetrieve extends ServiceRack { static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.XMLRetrieve.class.getName()); protected static final String CONTENT_SERVICE = "DocumentContentRetrieve"; protected static final String METADATA_SERVICE = "DocumentMetadataRetrieve"; protected static final String STRUCTURE_SERVICE = "DocumentStructureRetrieve"; protected String toc_xsl_name = ""; protected String document_encoding = ""; protected String document_root_tag = ""; protected Element collection_doc_list = null; protected boolean provide_content = true; protected boolean provide_structure = true; protected boolean provide_metadata = true; public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ return false; } logger.info("configuring XMLRetrieve..."); // look for the parameters Element param_list = (Element)GSXML.getChildByTagName(info, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); HashMap params; String services_to_provide = ""; if (param_list != null) { params = GSXML.extractParams(param_list, false); this.toc_xsl_name = (String)params.get("tocXSLT"); this.document_encoding = (String)params.get("documentEncoding"); this.document_root_tag = (String)params.get("documentRootTag"); services_to_provide = (String)params.get("provideServices"); } if (this.toc_xsl_name == null || this.toc_xsl_name.equals("")) { this.toc_xsl_name = "default_toc"; } this.toc_xsl_name = this.toc_xsl_name+".xsl"; if (this.document_encoding == null || this.document_encoding.equals("")) { this.document_encoding = "UTF-8"; } if (services_to_provide != null && !services_to_provide.equals("")) { if (services_to_provide.indexOf("content")==-1) { provide_content = false; } if (services_to_provide.indexOf("metadata")==-1) { provide_metadata = false; } if (services_to_provide.indexOf("structure")==-1) { provide_structure = false; } } // set up short_service_info_ - for now just has name and type Element retrieve_service; if (provide_content) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, CONTENT_SERVICE); this.short_service_info.appendChild(retrieve_service); } if (provide_metadata) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, METADATA_SERVICE); this.short_service_info.appendChild(retrieve_service); } if (provide_structure) { retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, STRUCTURE_SERVICE); this.short_service_info.appendChild(retrieve_service); } // find the doc list from the extra_info and keep it - should this be in collect.cfg or build.cfg?? collection_doc_list = (Element)GSXML.getChildByTagName(extra_info, GSXML.DOCUMENT_ELEM+GSXML.LIST_MODIFIER); GSEntityResolver resolver = new GSEntityResolver(); resolver.setClassLoader(this.class_loader); this.converter.setEntityResolver(resolver); return true; } // this may get called but is not useful in the case of retrieve services protected Element getServiceDescription(String service_id, String lang, String subset) { Element retrieve_service = this.doc.createElement(GSXML.SERVICE_ELEM); retrieve_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); retrieve_service.setAttribute(GSXML.NAME_ATT, service_id); return retrieve_service; } protected Element processDocumentContentRetrieve(Element request) { Element result = this.doc.createElement(GSXML.RESPONSE_ELEM); result.setAttribute(GSXML.FROM_ATT, CONTENT_SERVICE); result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS); Element doc_list = (Element)GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); if (doc_list == null) { return result; } Element result_doc_list = (Element)this.doc.importNode(doc_list, true); result.appendChild(result_doc_list); NodeList docs = result_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM); for (int i=0; i meta_name_list = new Vector(); boolean all_metadata = false; // Process the request parameters Element param = GSXML.getFirstElementChild(param_list);//(Element) param_list.getFirstChild(); while (param != null) { // Identify the metadata information desired if (param.getAttribute(GSXML.NAME_ATT).equals("metadata")) { String metadata = GSXML.getValue(param); if (metadata.equals("all")) { all_metadata = true; break; } meta_name_list.add(metadata); } param = (Element) param.getNextSibling(); } NodeList docs = result_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM); for (int i=0; i 4) { logger.error("badly formatted node id ("+node_id +"), cant retrieve the section"); return null; } String id=""; String tagname = ""; String scope = ""; if (bits.length==2) { tagname = bits[1]; } else { scope = bits[1]; tagname = bits[2]; if (bits.length == 4) { id = bits[3]; } } scope = translateScope(scope); Element top=null; if (!scope.equals("")) { top = (Element)GSXML.getNodeByPath(doc_elem, scope); if (top == null) { // something gone wrong return null; } } else { top = doc_elem; } NodeList elements = top.getElementsByTagName(tagname); if (elements.getLength() == 0) { return null; } // no id, just return the first one if (id.equals("")) { return (Element)elements.item(0); } // have an id, need to check and find the right one. for (int i=0; i meta_name_list) { // our default strategy here is to only return Title and root:Title // ignore all others // the title of a section is just a little bit of the text inside it. // the root_Title is the title from the doc info in the config file Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+ GSXML.LIST_MODIFIER); String doc_name = getWorkName(node_id); boolean node_is_root = false; if (doc_name.equals(node_id)) { node_is_root = true; } Element this_doc = GSXML.getNamedElement(this.collection_doc_list, GSXML.DOCUMENT_ELEM, GSXML.NAME_ATT, doc_name); Element doc_meta_list = (Element) GSXML.getChildByTagName(this_doc, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); boolean get_section_title = false; if (all) { if (node_is_root) { return (Element)this.doc.importNode(doc_meta_list, true); } else { get_section_title = true; } } else { // have to process metadata one by one for (int i=0; i