/**
*#########################################################################
* FedoraGS3DL.java - works with the demo-client for Greenstone 3, of the
* Greenstone digital library suite from the New Zealand Digital Library
* Project at the * University of Waikato, New Zealand.
*
* Copyright (C) 2008 New Zealand Digital Library Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*########################################################################
*/
package org.greenstone.fedora.services;
import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.rmi.RemoteException;
import javax.xml.transform.TransformerException;
//import javax.xml.transform.TransformerConfigurationException;
/**
* Defines the methods that must be provided to retrieve the datastreams
* specific to Greenstone documents stored in a Fedora repository.
* @author ak19
*/
public interface FedoraGS3DL {
/* CONSTANTS (FOLLOWED BY METHOD DECLARATIONS) */
/** Instead of message router, we indicate that request messages
* sent here come from FedoraGS3 */
public static final String FEDORA_GS3 = "FedoraGS3";
// Some constant string literals we'll be dealing with
/** "greenstone" is the FEDORA PID prefix we'll be working with
* @see Fedora batch processing
*/
public static final String GREENSTONE = "greenstone";
public static final String GREENSTONE_ = GREENSTONE+":";
public static final String COLLECTION = "collection";
public static final String _COLLECTION = "-"+COLLECTION;
public static final String WILDCARD = "*";
public static final String HYPHEN = "-";
public static final String COLON = ":";
public static final String PERIOD = ".";
// public static final String UTF16 = "UTF-16"; // not used
public static final String UTF8 = "UTF8";
// attribute names
public static final String TITLE = "Title";
public static final String ID = "id";
public static final String NAME = "name";
public static final String COLLECTIONNAME = "collectionname";
public static final String QUALIFIER = "qualifier";
public static final String ENGLISH = "en";
// tag names
public static final String METADATA = "metadata";
public static final String SECTION_ELEMENT = "Section";
/** EX marks the XML metadata file that contains Greenstone extracted
* metadata */
public static final String EX = "EX";
/** Fedora's Dublin Core metadata */
public static final String DC = "DC";
/** DLS metadata of Greenstone documents - this metadata set is optionally
* provided for top level documents. Not all Greenstone top-level documents
* in the Fedora repository may have associated DLS metadata, however.*/
public static final String DLS = "DLS";
/** Table of contents for a Greenstone-Fedora document which outlines the
* structure of the document. */
public static final String TOC = "TOC";
/** The SECTION prefix in the name of a section's datastream;
* eg. SECTION1.2.2 */
public static final String SECTION = "SECTION";
/* METHOD DEFINITIONS */
/** @return the default language used to query for titles (and anything else
* where there are multiple language options). Upon initialisation, this
* defaults to English. */
public String getLanguage();
/** Sets the the default language used to query for titles (and anything else
* where there are multiple language options). If the default language for any
* query is not available, then English ("en") is used. If that's not available
* then the first other available language is used.
* @param lang - the two-letter language code to set the default language to. */
public void setLanguage(String lang);
/** The default maximum number of search results returned for a search. Upon
* initialisation, this defaults to Java's Integer.MAX_VALUE. */
public int getMaxResults();
/** Set the default maximum number of search results returned for a search.
* @param maxresults - the new default maximum number of search results to
* be returned. */
public void setMaxResults(int maxresults);
/** @return fedora's baseURL */
public String getBaseURL();
/** @return the portAddressURL of the Fedora APIA web service
* (should be the endpoint location in the APIA's WSDL file).
* Else set this in the .properties file to something else. */
public String getPortAddressURL();
/** @return the baseURL for gsdlAssocFiles */
public String getAssocFileBaseURL();
/* GET COLLECTIONS, DOCUMENTS, SECTIONS, AND TITLES */
/** Gets all greenstone collections. Searches for greenstone:*-collection.
* @return an array of Strings containing the pids of all collections
* matching the format greenstone:*-collection. */
public String[] getCollections() throws RemoteException;
/** @return the <name>s (in greenstone:<name>-collection) for the collections
* indicated by collPIDs.
* @param collPIDs - an array of Strings denoting the pids for greenstone
* collections stored in the fedora repositoryl. These should be of the
* format "greenstone:<collectionName>-collection". */
public String[] getCollectionNames(String[] collPIDs);
/** @return "greenstone:<name>-collection" for all <name>s in the
* parameter collNames.
* @param collNames - a list of names of greenstone collections
* stored in the fedora repository. */
public String[] getCollectionPIDs(String[] collNames);
/** @return greenstone:<name>-collection for the <name> denoted by
* parameter collName.
* @param collName - the name of a greenstone collection stored
* stored in the fedora repository. */
public String getCollectionPID(String collName);
/**
* Gets the title of the collection denoted by the given collection's pid by
* retrieving the title metadata for it from the collection's EX datastream.
* @return the title (in the default language, else English, else the
* first title found) for the particular collection denoted by its PID.
* @param collPID is the pid of a greenstone collection in the fedora
* repository. */
public String getCollectionTitle(String collPID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the collection titles for all the collections indicated by
* collPIDs.
* @param collPIDs - a list of pids identifying greenstone collections
* stored in the fedora repository. */
public String[] getCollectionTitles(String[] collPIDs)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the title metadata for the given doc objects of a collection.
* These titles are returned in the same order as the given docIDs.
* @param docPIDs - a list of pids identifying documents stored in the
* fedora repository. */
public String[] getDocTitles(String[] docPIDs)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** Gets the title metadata for a particular doc object in a collection
* denoted by docPID. The docPID already contains the collection name.
* @return the title for the fedora document item denoted by docPID
* @param docPID is the pid of the document in the fedora repository
* (docPID is of the form greenstone:<colName>-<doc-identifier> */
public String getDocTitle(String docPID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the title metadata for the given document sections.
* These titles are returned in the same order as the given docPIDs
* and associated sectionIDs.
* @param docPIDs - a list of pids identifying documents stored in the
* fedora repository.
* @param sectionIDs - a list of sectionIDs identifying individual sections
* of documents stored in the fedora repository whose titles are requested. */
public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the title metadata for the given document section.
* (The docPID already contain the collection name anyway.)
* @param docPID - a pid identifying a document in the fedora repository.
* @param sectionID - the sectionID of the section of the
* document whose title is requested. */
public String getSectionTitle(String docPID, String sectionID)
throws UnsupportedEncodingException, RemoteException,
SAXException, IOException;
/** @return a list of the fedora pids of all (document) objects in the
* given greenstone collection stored in fedora's repository. All
* pids that do not map to a collection are assumed to be documents.
* @param colPID is the pid of the greenstone collection stored in
* the fedora repository. */
public String[] getCollectionDocs(String colPID) throws RemoteException;
/** Given the pid of a document fedora data object, this method will return
* all itemIDs that are part of that data object and are Sections.
* @return an array of itemIDs of the Sections of the document,
* indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
* @param docPID is a fedora pid identifying a greenstone document object. */
public String[] getSectionNames(String docPID) throws RemoteException;
/** Given the pid of a document fedora data object, this method will return all
* itemIDs that are part of that data object and are Sections, but just the
* Section numbers are returned.
* @return an array of itemIDs of the Section numbers of the document
* indicated by docPID, in ascending order. Return values are of form: "1.*".
* @param docPID is a fedora pid identifying a greenstone document object. */
public String[] getSectionNumbers(String docPID) throws RemoteException;
/** @return the titles for the document sections denoted by the parameters.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionIDs is a list of identifiers identifying sections in the
* document denoted by docPID, whose titles need to be returned. Each
* sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
* or a section number (eg. 1.5.1). */
public String[] getTitles(String docPID, String[] sectionIDs)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the title for the document section denoted by the parameters.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, whose title needs to be returned. The
* sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
* or a section number (eg. 1.5.1). */
public String getTitle(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return the <docName> in the parameter docPID (which is of the form:
* greenstone:<colname>-<docName>)
* @param docPID is the pid of a greenstone document in the fedora
* repository. */
public String getDocName(String docPID);
/** @return the <name> in the parameter collPID
* (greenstone:<name>-collection)
* @param collPID is the pid of a greenstone collection in the fedora
* repository.
*/
public String getCollectionName(String collPID);
/* GETTING A DOCUMENT OR SECTION'S DATA STREAMS */
/* All "greenstone:*" objects in fedora (be they collections or documents)
* have TOC, EX and DC datastreams. The following methods return the content
* (XML) of these datastreams as is. */
/** All objects (incl "greenstone:*" objects) in fedora - be they collections,
* top-level documents or document sections) have an EX datastream. This method
* returns the content (XML) of the DC datastream as it is stored in fedora's
* repository.
* @return a String version of the XML in the DC datastream for the fedora object
* denoted by pid.
* @param pid - the fedora persistent identifier for an item in the fedora
* repository. */
public String getDC(String pid) throws RemoteException, UnsupportedEncodingException;
/** All "greenstone:*" objects in fedora (be they collections, top-level
* documents or document sections) have an EX datastream. This method
* returns the content (XML) of the EX datastream as is.
* @return a String version of the XML in the DC datastream for the fedora
* object denoted by pid.
* @param pid - the fedora persistent identifier for an item in the fedora
* repository. */
public String getEX(String pid) throws RemoteException, UnsupportedEncodingException;
/**
* Some "greenstone:*" top-level documents in the fedora repository (but not
* greenstone collections or document sections) have a DLS metadata datastream.
* This method returns the content (XML) of the DLS datastream as is.
* @return a String version of the XML in the DLS datastream for the fedora
* object denoted by pid.
* @param pid - the fedora persistent identifier for an item in the fedora
* repository.
* */
public String getDLS(String pid) throws RemoteException, UnsupportedEncodingException;
/** All "greenstone:*" objects in fedora (be they collections or documents)
* have a TOC datastream. This method returns the content (XML) of the TOC
* datastream as is. (Calls default fedora-system 3 dissemination <pid>/TOC.)
* @return a String version of the XML in the DC datastream for the fedora
* object denoted by pid.
* @param pid - the fedora persistent identifier for an item in the fedora
* repository.
*/
public String getTOC(String pid) throws RemoteException, UnsupportedEncodingException;
/** @return the section's XML (as a String) as it is stored in fedora.
* Works out if sectionID is a sectionName or sectionNumber.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public String getSection(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException;
/** @return the required section's DC metadata XML datastream.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public String getSectionDCMetadata(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException;
/** @return the required section's EX metadata XML datastream.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public String getSectionEXMetadata(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException;
/* METHODS FOR GETTING THE STRUCTURE OF DOCUMENTS */
/** @return the XML content of the TOC of just that portion of the TOC which
* contains the section denoted by sectionID and its direct child subsections.
* The children are returned in the order they are encountered, which
* happens to be in the required order of ascending sectionID.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public Element getChildrenOfSectionXML(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return a string representing the XML content of the TOC of just
* that portion of the TOC which contains the section denoted by sectionID
* and its direct child subsections.
* The children are returned in the order they are encountered, which
* happens to be in the required order of ascending sectionID.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public String getChildrenOfSection(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException, TransformerException;
/** @return the part of the TOC XML file (which outlines doc structure)
* relating to the given section. This includes the section denoted by
* sectionID as well as all descendent subsections thereof.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public Element getSubsectionXML(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException;
/** @return a String representation of the part of the TOC XML file
* (which outlines doc structure) relating to the given section. This
* includes the section denoted by sectionID as well as all descendent
* subsections thereof.
* @param docPID is a fedora pid identifying a greenstone document object.
* @param sectionID identifies the particular section in the
* document denoted by docPID, may be a section name or number. */
public String getSubsection(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException, SAXException,
IOException, TransformerException;
/* BROWSING */
/** Allows browsing document titles of a greenstone collection stored in
* the fedora repository by letter.
* @return the browse results for documents that start with any letter from
* A to Z. Returns the document pids whose titles start with the given letter.
* @param letter is the starting letter to browse by. */
public String[] browseTitlesByLetter(String collName, String letter)
throws RemoteException, FedoraVersionNotSupportedException;
/** Allows querying document titles of a greenstone collection stored in
* the fedora repository for a term that may occur anywhere in their titles.
* @return the document pids whose titles contain the parameter term.
* @param titleContents is the word or phrase to search the collection's
* document titles for.
* @param startsWith - if true, searches for titles that start with
* titleContents. Else it searches for titles that contain titleContents. */
public String[] searchDocumentTitles(String collName, String titleContents,
boolean startsWith)
throws RemoteException, FedoraVersionNotSupportedException;
/* The following single method can be used instead.
* ComparisonOperator can be contains or startswith. */
/* public String[] browse(String term, ComparisonOperator?); */
}