/** *######################################################################### * FedoraGS3DL.java - works with the demo-client for Greenstone 3, of the * Greenstone digital library suite from the New Zealand Digital Library * Project at the * University of Waikato, New Zealand. *

* Copyright (C) 2008 New Zealand Digital Library Project *

* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *

* This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *######################################################################## */ package org.greenstone.fedora.services; import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException; import org.w3c.dom.Element; import org.xml.sax.SAXException; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.rmi.RemoteException; import javax.xml.transform.TransformerException; //import javax.xml.transform.TransformerConfigurationException; /** * Defines the methods that must be provided to retrieve the datastreams * specific to Greenstone documents stored in a Fedora repository. * @author ak19 */ public interface FedoraGS3DL { /* CONSTANTS (FOLLOWED BY METHOD DECLARATIONS) */ /** Instead of message router, we indicate that request messages * sent here come from FedoraGS3 */ public static final String FEDORA_GS3 = "FedoraGS3"; // Some constant string literals we'll be dealing with /** "greenstone" is the FEDORA PID prefix we'll be working with * @see Fedora batch processing */ public static final String GREENSTONE = "greenstone"; public static final String GREENSTONE_ = GREENSTONE+":"; public static final String COLLECTION = "collection"; public static final String _COLLECTION = "-"+COLLECTION; public static final String WILDCARD = "*"; public static final String HYPHEN = "-"; public static final String COLON = ":"; public static final String PERIOD = "."; // public static final String UTF16 = "UTF-16"; // not used public static final String UTF8 = "UTF8"; // attribute names public static final String TITLE = "Title"; public static final String ID = "id"; public static final String NAME = "name"; public static final String COLLECTIONNAME = "collectionname"; public static final String QUALIFIER = "qualifier"; public static final String ENGLISH = "en"; // tag names public static final String METADATA = "metadata"; public static final String SECTION_ELEMENT = "Section"; /** EX marks the XML metadata file that contains Greenstone extracted * metadata */ public static final String EX = "EX"; /** Fedora's Dublin Core metadata */ public static final String DC = "DC"; /** DLS metadata of Greenstone documents - this metadata set is optionally * provided for top level documents. Not all Greenstone top-level documents * in the Fedora repository may have associated DLS metadata, however.*/ public static final String DLS = "DLS"; /** Table of contents for a Greenstone-Fedora document which outlines the * structure of the document. */ public static final String TOC = "TOC"; /** The SECTION prefix in the name of a section's datastream; * eg. SECTION1.2.2 */ public static final String SECTION = "SECTION"; /* METHOD DEFINITIONS */ /** @return the default language used to query for titles (and anything else * where there are multiple language options). Upon initialisation, this * defaults to English. */ public String getLanguage(); /** Sets the the default language used to query for titles (and anything else * where there are multiple language options). If the default language for any * query is not available, then English ("en") is used. If that's not available * then the first other available language is used. * @param lang - the two-letter language code to set the default language to. */ public void setLanguage(String lang); /** The default maximum number of search results returned for a search. Upon * initialisation, this defaults to Java's Integer.MAX_VALUE. */ public int getMaxResults(); /** Set the default maximum number of search results returned for a search. * @param maxresults - the new default maximum number of search results to * be returned. */ public void setMaxResults(int maxresults); /** @return fedora's baseURL */ public String getBaseURL(); /** @return the portAddressURL of the Fedora APIA web service * (should be the endpoint location in the APIA's WSDL file). * Else set this in the .properties file to something else. */ public String getPortAddressURL(); /** @return the baseURL for gsdlAssocFiles */ public String getAssocFileBaseURL(); /* GET COLLECTIONS, DOCUMENTS, SECTIONS, AND TITLES */ /** Gets all greenstone collections. Searches for greenstone:*-collection. * @return an array of Strings containing the pids of all collections * matching the format greenstone:*-collection. */ public String[] getCollections() throws RemoteException; /** @return the <name>s (in greenstone:<name>-collection) for the collections * indicated by collPIDs. * @param collPIDs - an array of Strings denoting the pids for greenstone * collections stored in the fedora repositoryl. These should be of the * format "greenstone:<collectionName>-collection". */ public String[] getCollectionNames(String[] collPIDs); /** @return "greenstone:<name>-collection" for all <name>s in the * parameter collNames. * @param collNames - a list of names of greenstone collections * stored in the fedora repository. */ public String[] getCollectionPIDs(String[] collNames); /** @return greenstone:<name>-collection for the <name> denoted by * parameter collName. * @param collName - the name of a greenstone collection stored * stored in the fedora repository. */ public String getCollectionPID(String collName); /** * Gets the title of the collection denoted by the given collection's pid by * retrieving the title metadata for it from the collection's EX datastream. * @return the title (in the default language, else English, else the * first title found) for the particular collection denoted by its PID. * @param collPID is the pid of a greenstone collection in the fedora * repository. */ public String getCollectionTitle(String collPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the collection titles for all the collections indicated by * collPIDs. * @param collPIDs - a list of pids identifying greenstone collections * stored in the fedora repository. */ public String[] getCollectionTitles(String[] collPIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the title metadata for the given doc objects of a collection. * These titles are returned in the same order as the given docIDs. * @param docPIDs - a list of pids identifying documents stored in the * fedora repository. */ public String[] getDocTitles(String[] docPIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** Gets the title metadata for a particular doc object in a collection * denoted by docPID. The docPID already contains the collection name. * @return the title for the fedora document item denoted by docPID * @param docPID is the pid of the document in the fedora repository * (docPID is of the form greenstone:<colName>-<doc-identifier> */ public String getDocTitle(String docPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the title metadata for the given document sections. * These titles are returned in the same order as the given docPIDs * and associated sectionIDs. * @param docPIDs - a list of pids identifying documents stored in the * fedora repository. * @param sectionIDs - a list of sectionIDs identifying individual sections * of documents stored in the fedora repository whose titles are requested. */ public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the title metadata for the given document section. * (The docPID already contain the collection name anyway.) * @param docPID - a pid identifying a document in the fedora repository. * @param sectionID - the sectionID of the section of the * document whose title is requested. */ public String getSectionTitle(String docPID, String sectionID) throws UnsupportedEncodingException, RemoteException, SAXException, IOException; /** @return a list of the fedora pids of all (document) objects in the * given greenstone collection stored in fedora's repository. All * pids that do not map to a collection are assumed to be documents. * @param colPID is the pid of the greenstone collection stored in * the fedora repository. */ public String[] getCollectionDocs(String colPID) throws RemoteException; /** Given the pid of a document fedora data object, this method will return * all itemIDs that are part of that data object and are Sections. * @return an array of itemIDs of the Sections of the document, * indicated by docPID, in ascending order. These are of the form: "SECTION1.*" * @param docPID is a fedora pid identifying a greenstone document object. */ public String[] getSectionNames(String docPID) throws RemoteException; /** Given the pid of a document fedora data object, this method will return all * itemIDs that are part of that data object and are Sections, but just the * Section numbers are returned. * @return an array of itemIDs of the Section numbers of the document * indicated by docPID, in ascending order. Return values are of form: "1.*". * @param docPID is a fedora pid identifying a greenstone document object. */ public String[] getSectionNumbers(String docPID) throws RemoteException; /** @return the titles for the document sections denoted by the parameters. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionIDs is a list of identifiers identifying sections in the * document denoted by docPID, whose titles need to be returned. Each * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1) * or a section number (eg. 1.5.1). */ public String[] getTitles(String docPID, String[] sectionIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the title for the document section denoted by the parameters. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, whose title needs to be returned. The * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1) * or a section number (eg. 1.5.1). */ public String getTitle(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return the <docName> in the parameter docPID (which is of the form: * greenstone:<colname>-<docName>) * @param docPID is the pid of a greenstone document in the fedora * repository. */ public String getDocName(String docPID); /** @return the <name> in the parameter collPID * (greenstone:<name>-collection) * @param collPID is the pid of a greenstone collection in the fedora * repository. */ public String getCollectionName(String collPID); /* GETTING A DOCUMENT OR SECTION'S DATA STREAMS */ /* All "greenstone:*" objects in fedora (be they collections or documents) * have TOC, EX and DC datastreams. The following methods return the content * (XML) of these datastreams as is. */ /** All objects (incl "greenstone:*" objects) in fedora - be they collections, * top-level documents or document sections) have an EX datastream. This method * returns the content (XML) of the DC datastream as it is stored in fedora's * repository. * @return a String version of the XML in the DC datastream for the fedora object * denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. */ public String getDC(String pid) throws RemoteException, UnsupportedEncodingException; /** All "greenstone:*" objects in fedora (be they collections, top-level * documents or document sections) have an EX datastream. This method * returns the content (XML) of the EX datastream as is. * @return a String version of the XML in the DC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. */ public String getEX(String pid) throws RemoteException, UnsupportedEncodingException; /** * Some "greenstone:*" top-level documents in the fedora repository (but not * greenstone collections or document sections) have a DLS metadata datastream. * This method returns the content (XML) of the DLS datastream as is. * @return a String version of the XML in the DLS datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * */ public String getDLS(String pid) throws RemoteException, UnsupportedEncodingException; /** All "greenstone:*" objects in fedora (be they collections or documents) * have a TOC datastream. This method returns the content (XML) of the TOC * datastream as is. (Calls default fedora-system 3 dissemination <pid>/TOC.) * @return a String version of the XML in the DC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. */ public String getTOC(String pid) throws RemoteException, UnsupportedEncodingException; /** @return the section's XML (as a String) as it is stored in fedora. * Works out if sectionID is a sectionName or sectionNumber. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSection(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException; /** @return the required section's DC metadata XML datastream. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSectionDCMetadata(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException; /** @return the required section's EX metadata XML datastream. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSectionEXMetadata(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException; /* METHODS FOR GETTING THE STRUCTURE OF DOCUMENTS */ /** @return the XML content of the TOC of just that portion of the TOC which * contains the section denoted by sectionID and its direct child subsections. * The children are returned in the order they are encountered, which * happens to be in the required order of ascending sectionID. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public Element getChildrenOfSectionXML(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return a string representing the XML content of the TOC of just * that portion of the TOC which contains the section denoted by sectionID * and its direct child subsections. * The children are returned in the order they are encountered, which * happens to be in the required order of ascending sectionID. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getChildrenOfSection(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException, TransformerException; /** @return the part of the TOC XML file (which outlines doc structure) * relating to the given section. This includes the section denoted by * sectionID as well as all descendent subsections thereof. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public Element getSubsectionXML(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException; /** @return a String representation of the part of the TOC XML file * (which outlines doc structure) relating to the given section. This * includes the section denoted by sectionID as well as all descendent * subsections thereof. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSubsection(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException, TransformerException; /* BROWSING */ /** Allows browsing document titles of a greenstone collection stored in * the fedora repository by letter. * @return the browse results for documents that start with any letter from * A to Z. Returns the document pids whose titles start with the given letter. * @param letter is the starting letter to browse by. */ public String[] browseTitlesByLetter(String collName, String letter) throws RemoteException, FedoraVersionNotSupportedException; /** Allows querying document titles of a greenstone collection stored in * the fedora repository for a term that may occur anywhere in their titles. * @return the document pids whose titles contain the parameter term. * @param titleContents is the word or phrase to search the collection's * document titles for. * @param startsWith - if true, searches for titles that start with * titleContents. Else it searches for titles that contain titleContents. */ public String[] searchDocumentTitles(String collName, String titleContents, boolean startsWith) throws RemoteException, FedoraVersionNotSupportedException; /* The following single method can be used instead. * ComparisonOperator can be contains or startswith. */ /* public String[] browse(String term, ComparisonOperator?); */ }