/* * AbstractDocumentRetrieve.java * a base class for retrieval services * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.util.GlobalProperties; import org.greenstone.gsdl3.core.GSException; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.GSPath; import org.greenstone.gsdl3.util.MacroResolver; import org.greenstone.gsdl3.util.OID; import org.greenstone.gsdl3.util.GSConstants; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // General Java classes import java.io.File; import java.util.StringTokenizer; import java.util.Set; import java.util.Iterator; import java.util.ArrayList; import org.apache.log4j.*; /** Abstract class for Document Retrieval Services * * @author Katherine Don */ public abstract class AbstractDocumentRetrieve extends ServiceRack { static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName()); // the services on offer protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve"; protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve"; protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve"; protected static final String STRUCT_PARAM = "structure"; protected static final String INFO_PARAM = "info"; protected static final String STRUCT_ANCESTORS = "ancestors"; protected static final String STRUCT_PARENT = "parent"; protected static final String STRUCT_SIBS = "siblings"; protected static final String STRUCT_CHILDREN = "children"; protected static final String STRUCT_DESCENDS = "descendants"; protected static final String STRUCT_ENTIRE = "entire"; protected static final String INFO_NUM_SIBS = "numSiblings"; protected static final String INFO_NUM_CHILDREN = "numChildren"; protected static final String INFO_SIB_POS = "siblingPosition"; // means the id is not a greenstone id and needs translating protected static final String EXTID_PARAM = "ext"; protected Element config_info = null; // the xml from the config file protected String default_document_type = null; protected MacroResolver macro_resolver = null; /** does this class provide the service?? */ protected boolean does_metadata = true; protected boolean does_content = true; protected boolean does_structure = true; /** constructor */ public AbstractDocumentRetrieve() { } /** configure this service */ public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ return false; } logger.info("Configuring AbstractDocumentRetrieve..."); this.config_info = info; // set up short_service_info_ - for now just has name and type if (does_structure) { Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM); dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE); this.short_service_info.appendChild(dsr_service); } if (does_metadata) { Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM); dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE); this.short_service_info.appendChild(dmr_service); } if (does_content) { Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM); dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE); dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE); this.short_service_info.appendChild(dcr_service); } // look for document display format String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM); Element display_format = (Element)GSXML.getNodeByPath(extra_info, path); if (display_format != null) { this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true)); // should we keep a copy? // check for docType option. Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType"); if (doc_type_opt != null) { String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT); if (!value.equals("")) { this.default_document_type = value; } } } if (macro_resolver != null) { macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName()); // set up the macro resolver Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList"); if (replacement_elem != null) { macro_resolver.addMacros(replacement_elem); } // look for any refs to global replace lists NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef"); for (int i=0; i */ protected Element createDocNode(String node_id) { Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM); node.setAttribute(GSXML.NODE_ID_ATT, node_id); String doc_type = null; if (default_document_type != null) { doc_type = default_document_type; } else { doc_type = getDocType(node_id); } node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type); String node_type = getNodeType(node_id, doc_type); node.setAttribute(GSXML.NODE_TYPE_ATT, node_type); return node; } /** adds all the children of doc_id the the doc element, * and if recursive=true, adds all their children as well*/ protected void addDescendants(Element doc, String doc_id, boolean recursive) { ArrayList child_ids = getChildrenIds(doc_id); if (child_ids==null) return; for (int i=0; i< child_ids.size(); i++) { String child_id = (String)child_ids.get(i); Element child_elem = createDocNode(child_id); doc.appendChild(child_elem); if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) { addDescendants(child_elem, child_id, recursive); } } } /** adds all the siblings of current_id to the parent element. returns the new current element*/ protected Element addSiblings(Element parent_node, String parent_id, String current_id) { Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild(); if (current_node == null) { // create a sensible error message logger.error(" there should be a first child."); return null; } // remove the current child,- will add it in later in its correct place parent_node.removeChild(current_node); // add in all the siblings, addDescendants(parent_node, parent_id, false); // find the node that is now the current node // this assumes that the new node that was created is the same as // the old one that was removed - we may want to replace the new one // with the old one. Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id); return new_current; } /** returns true if oid ends in .fc (firstchild), .lc (lastchild), .pr (parent), .ns (next sibling), .ps (prev sibling), .rt (root) .ss (specified sibling), false otherwise */ protected boolean idNeedsTranslating(String id) { return OID.needsTranslating(id); } /** returns the list of sibling ids, including the specified node_id */ protected ArrayList getSiblingIds(String node_id) { String parent_id = getParentId(node_id); if (parent_id == null) { return null; } return getChildrenIds(parent_id); } /** returns the node type of the specified node. should be one of GSXML.NODE_TYPE_LEAF, GSXML.NODE_TYPE_INTERNAL, GSXML.NODE_TYPE_ROOT */ protected String getNodeType(String node_id, String doc_type) { if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) { return GSXML.NODE_TYPE_LEAF; } if (getParentId(node_id)==null) { return GSXML.NODE_TYPE_ROOT; } if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) { return GSXML.NODE_TYPE_LEAF; } if (getChildrenIds(node_id)==null) { return GSXML.NODE_TYPE_LEAF; } return GSXML.NODE_TYPE_INTERNAL; } /** if id ends in .fc, .pc etc, then translate it to the correct id * default implementation: just remove the suffix */ protected String translateId(String id) { return id.substring(0,id.length()); } /** if an id is not a greenstone id (an external id) then translate * it to a greenstone one * default implementation: return the id */ protected String translateExternalId(String id) { return id; } /** returns the document type of the doc that the specified node belongs to. should be one of GSXML.DOC_TYPE_SIMPLE, GSXML.DOC_TYPE_PAGED, GSXML.DOC_TYPE_HIERARCHY default implementation: return DOC_TYPE_SIMPLE */ protected String getDocType(String node_id) { return GSXML.DOC_TYPE_SIMPLE; } /** returns the id of the root node of the document containing * node node_id. may be the same as node_id * default implemntation: return node_id */ protected String getRootId(String node_id) { return node_id; } /** returns a list of the child ids in order, null if no children * default implementation: return null */ protected ArrayList getChildrenIds(String node_id) { return null; } /** returns the node id of the parent node, null if no parent * default implementation: return null */ protected String getParentId(String node_id) { return null; } /** get the metadata for the doc node doc_id * returns a metadataList element: * value */ abstract protected Element getMetadataList(String doc_id, boolean all_metadata, ArrayList metadata_names) throws GSException; /** returns the content of a node * should return a nodeContent element: * text content or other elements * can return */ abstract protected Element getNodeContent(String doc_id, String lang) throws GSException; /** returns the structural information asked for. * info_type may be one of * INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS */ abstract protected String getStructureInfo(String doc_id, String info_type); protected String getHrefOID(String href_url){ return null; } }