/*
* AbstractDocumentRetrieve.java
* a base class for retrieval services
* Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.greenstone.gsdl3.service;
// Greenstone classes
import org.greenstone.util.GlobalProperties;
import org.greenstone.gsdl3.core.GSException;
import org.greenstone.gsdl3.util.GSXML;
import org.greenstone.gsdl3.util.GSPath;
import org.greenstone.gsdl3.util.MacroResolver;
import org.greenstone.gsdl3.util.OID;
import org.greenstone.gsdl3.util.GSConstants;
// XML classes
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
// General Java classes
import java.io.File;
import java.util.StringTokenizer;
import java.util.Set;
import java.util.Iterator;
import java.util.ArrayList;
import org.apache.log4j.*;
/** Abstract class for Document Retrieval Services
*
* @author Katherine Don
*/
public abstract class AbstractDocumentRetrieve
extends ServiceRack {
static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.AbstractDocumentRetrieve.class.getName());
// the services on offer
protected static final String DOCUMENT_STRUCTURE_RETRIEVE_SERVICE = "DocumentStructureRetrieve";
protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
protected static final String STRUCT_PARAM = "structure";
protected static final String INFO_PARAM = "info";
protected static final String STRUCT_ANCESTORS = "ancestors";
protected static final String STRUCT_PARENT = "parent";
protected static final String STRUCT_SIBS = "siblings";
protected static final String STRUCT_CHILDREN = "children";
protected static final String STRUCT_DESCENDS = "descendants";
protected static final String STRUCT_ENTIRE = "entire";
protected static final String INFO_NUM_SIBS = "numSiblings";
protected static final String INFO_NUM_CHILDREN = "numChildren";
protected static final String INFO_SIB_POS = "siblingPosition";
// means the id is not a greenstone id and needs translating
protected static final String EXTID_PARAM = "ext";
protected Element config_info = null; // the xml from the config file
protected String default_document_type = null;
protected MacroResolver macro_resolver = null;
/** does this class provide the service?? */
protected boolean does_metadata = true;
protected boolean does_content = true;
protected boolean does_structure = true;
/** constructor */
public AbstractDocumentRetrieve()
{
}
/** configure this service */
public boolean configure(Element info, Element extra_info)
{
if (!super.configure(info, extra_info)){
return false;
}
logger.info("Configuring AbstractDocumentRetrieve...");
this.config_info = info;
// set up short_service_info_ - for now just has name and type
if (does_structure) {
Element dsr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dsr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dsr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_STRUCTURE_RETRIEVE_SERVICE);
this.short_service_info.appendChild(dsr_service);
}
if (does_metadata) {
Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
this.short_service_info.appendChild(dmr_service);
}
if (does_content) {
Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
this.short_service_info.appendChild(dcr_service);
}
// look for document display format
String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
if (display_format != null) {
this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
// should we keep a copy?
// check for docType option.
Element doc_type_opt = GSXML.getNamedElement(display_format, "gsf:option", GSXML.NAME_ATT, "documentType");
if (doc_type_opt != null) {
String value = doc_type_opt.getAttribute(GSXML.VALUE_ATT);
if (!value.equals("")) {
this.default_document_type = value;
}
}
}
if (macro_resolver != null) {
macro_resolver.setSiteDetails(this.site_http_address, this.cluster_name, this.getLibraryName());
// set up the macro resolver
Element replacement_elem = (Element)GSXML.getChildByTagName(extra_info, "replaceList");
if (replacement_elem != null) {
macro_resolver.addMacros(replacement_elem);
}
// look for any refs to global replace lists
NodeList replace_refs_elems = extra_info.getElementsByTagName("replaceListRef");
for (int i=0; i
*/
protected Element createDocNode(String node_id) {
Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
node.setAttribute(GSXML.NODE_ID_ATT, node_id);
String doc_type = null;
if (default_document_type != null) {
doc_type = default_document_type;
} else {
doc_type = getDocType(node_id);
}
node.setAttribute(GSXML.DOC_TYPE_ATT, doc_type);
String node_type = getNodeType(node_id, doc_type);
node.setAttribute(GSXML.NODE_TYPE_ATT, node_type);
return node;
}
/** adds all the children of doc_id the the doc element,
* and if recursive=true, adds all their children as well*/
protected void addDescendants(Element doc, String doc_id,
boolean recursive)
{
ArrayList child_ids = getChildrenIds(doc_id);
if (child_ids==null) return;
for (int i=0; i< child_ids.size(); i++) {
String child_id = (String)child_ids.get(i);
Element child_elem = createDocNode(child_id);
doc.appendChild(child_elem);
if (recursive && !child_elem.getAttribute(GSXML.NODE_TYPE_ATT).equals(GSXML.NODE_TYPE_LEAF)) {
addDescendants(child_elem, child_id, recursive);
}
}
}
/** adds all the siblings of current_id to the parent element.
returns the new current element*/
protected Element addSiblings(Element parent_node, String parent_id,
String current_id) {
Element current_node = GSXML.getFirstElementChild(parent_node);//(Element)parent_node.getFirstChild();
if (current_node == null) {
// create a sensible error message
logger.error(" there should be a first child.");
return null;
}
// remove the current child,- will add it in later in its correct place
parent_node.removeChild(current_node);
// add in all the siblings,
addDescendants(parent_node, parent_id, false);
// find the node that is now the current node
// this assumes that the new node that was created is the same as
// the old one that was removed - we may want to replace the new one
// with the old one.
Element new_current = GSXML.getNamedElement(parent_node, current_node.getNodeName(), GSXML.NODE_ID_ATT, current_id);
return new_current;
}
/** returns true if oid ends in
.fc (firstchild),
.lc (lastchild),
.pr (parent),
.ns (next sibling),
.ps (prev sibling),
.rt (root)
.ss (specified sibling),
false otherwise
*/
protected boolean idNeedsTranslating(String id) {
return OID.needsTranslating(id);
}
/** returns the list of sibling ids, including the specified node_id */
protected ArrayList getSiblingIds(String node_id) {
String parent_id = getParentId(node_id);
if (parent_id == null) {
return null;
}
return getChildrenIds(parent_id);
}
/** returns the node type of the specified node.
should be one of
GSXML.NODE_TYPE_LEAF,
GSXML.NODE_TYPE_INTERNAL,
GSXML.NODE_TYPE_ROOT
*/
protected String getNodeType(String node_id, String doc_type) {
if (doc_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
return GSXML.NODE_TYPE_LEAF;
}
if (getParentId(node_id)==null) {
return GSXML.NODE_TYPE_ROOT;
}
if (doc_type.equals(GSXML.DOC_TYPE_PAGED)) {
return GSXML.NODE_TYPE_LEAF;
}
if (getChildrenIds(node_id)==null) {
return GSXML.NODE_TYPE_LEAF;
}
return GSXML.NODE_TYPE_INTERNAL;
}
/** if id ends in .fc, .pc etc, then translate it to the correct id
* default implementation: just remove the suffix */
protected String translateId(String id) {
return id.substring(0,id.length());
}
/** if an id is not a greenstone id (an external id) then translate
* it to a greenstone one
* default implementation: return the id */
protected String translateExternalId(String id) {
return id;
}
/** returns the document type of the doc that the specified node
belongs to. should be one of
GSXML.DOC_TYPE_SIMPLE,
GSXML.DOC_TYPE_PAGED,
GSXML.DOC_TYPE_HIERARCHY
default implementation: return DOC_TYPE_SIMPLE
*/
protected String getDocType(String node_id) {
return GSXML.DOC_TYPE_SIMPLE;
}
/** returns the id of the root node of the document containing
* node node_id. may be the same as node_id
* default implemntation: return node_id
*/
protected String getRootId(String node_id) {
return node_id;
}
/** returns a list of the child ids in order, null if no children
* default implementation: return null */
protected ArrayList getChildrenIds(String node_id) {
return null;
}
/** returns the node id of the parent node, null if no parent
* default implementation: return null */
protected String getParentId(String node_id) {
return null;
}
/** get the metadata for the doc node doc_id
* returns a metadataList element:
* value
*/
abstract protected Element getMetadataList(String doc_id,
boolean all_metadata,
ArrayList metadata_names) throws GSException;
/** returns the content of a node
* should return a nodeContent element:
* text content or other elements
* can return
*/
abstract protected Element getNodeContent(String doc_id, String lang) throws GSException;
/** returns the structural information asked for.
* info_type may be one of
* INFO_NUM_SIBS, INFO_NUM_CHILDREN, INFO_SIB_POS
*/
abstract protected String getStructureInfo(String doc_id, String info_type);
protected String getHrefOID(String href_url){
return null;
}
}