/**
*#########################################################################
* FedoraConnection.java - works with the demo-client for Greenstone 3,
* of the Greenstone digital library suite from the New Zealand Digital
* Library Project at the * University of Waikato, New Zealand.
*
* Copyright (C) 2008 New Zealand Digital Library Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*########################################################################
*/
package org.greenstone.fedora.services;
import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
import org.greenstone.gsdl3.util.GSXML;
import fedora.client.utility.AutoFinder;
import fedora.server.access.FedoraAPIAServiceLocator;
// The object for accessing FedoraAPI-A web services:
import fedora.server.access.FedoraAPIA;
// The definitions for all complex fedora types:
import fedora.server.types.gen.MIMETypedStream;
import fedora.server.types.gen.RepositoryInfo;
import fedora.server.types.gen.FieldSearchResult;
import fedora.server.types.gen.FieldSearchQuery;
import fedora.server.types.gen.DatastreamDef;
import fedora.server.types.gen.ObjectFields;
import fedora.server.types.gen.Condition;
import fedora.server.types.gen.ComparisonOperator;
//import fedora.server.types.gen.*;
import javax.net.ssl.SSLHandshakeException;
import java.net.ConnectException;
import org.xml.sax.SAXException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import java.net.MalformedURLException;
import java.rmi.RemoteException;
import java.io.StringReader;
import java.io.FileInputStream;
import java.io.File;
import java.util.TreeSet;
import java.util.Properties;
import java.util.Vector;
import java.awt.GridLayout;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JPasswordField;
import javax.swing.JTextField;
import org.apache.log4j.Logger;
import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.*;
import org.xml.sax.InputSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
/** Class that establishes a connection with Fedora's web services (via
* Java stub classes for the same) and then provides methods to retrieve
* Greenstone-specific data, such as the TOC, EX, DC,and Section
* datastreams of the Greenstone documents stored in Fedora's repository.
* These datastreams are returned as Strings without any changes being
* made to them.
* @author ak19
*/
public class FedoraConnection implements FedoraGS3DL {
/** The logging instance for this class */
private static final Logger LOG = Logger.getLogger(
FedoraConnection.class.getName());
/** The version of fedora that is supported by class FedoraConnection */
protected static final String SUPPORTED_VERSION = "3.3"; //"2.2.1";
// 3.3 works with genericSearch version 2.2
// 3.6.1 works with genericSearch version 2.5
/* Some fixed strings of known literals */
protected static final String GET = "/get/";
// The DemoSOAPClient declares and uses the following as a static member
// Probably none of the APIA methods (web service methods) remembers
// state, that might explain why we can use it as a static member then.
/** The object used to access the Fedora API-A web service methods */
protected FedoraAPIA APIA;
/** Version of the running fedora server */
protected String fedoraVersion;
/** The location of the fedora server, usually of the form: protocol://host:port/fedora
* e.g. (and default) http://localhost:8080/fedora */
protected String baseURL;
/** The user-specified portAddressSuffix of the Fedora Access web services
* (endpoint URL in the WSDL), usually of the form
* http://localhost:8080/fedora/services/access
* Users can tell FedoraGS3 to try accessing that first by setting
* the "port.address.suffix" property in the properties file.
* FedoraGS3 itself will not write the portAddressSuffix currently used in
* the file for next time, but leave whatever value was entered in the
* properties file. The portAddress--not just suffix--currently in use (once
* the FedoraAPIA handle has been instantiated) can be obtained through
* getPortAddressURL() method. */
protected String portAddressSuffix;
/** The part of the portAddress that comes after the baseURL. By default and
* usually this is: "/services/access" */
protected static final String defaultPortAddressSuffix = "/services/access";
/** The preferred language of the display content */
protected String lang;
/** The maximum number of collections to retrieve */
protected int maxresults;
/** DocumentBuilder used to create and parse XML documents */
protected DocumentBuilder builder;
/** Static method that returns the version of Fedora supported by this
* class FedoraConnection. */
public static String getSupportedVersion() { return SUPPORTED_VERSION; }
/** The version of the running Fedora server, which may or may not
* match the supported version. */
public String getFedoraVersion() { return fedoraVersion; }
/** @return the default language used to query for titles (and anything else
* where there are multiple language options). Upon initialisation, this
* defaults to English. */
public String getLanguage() { return lang; }
/** Sets the the default language used to query for titles (and anything else
* where there are multiple language options). If the default language for any
* query is not available, then English ("en") is used. If that's not available
* then the first other available language is used.
* @param lang - the two-letter language code to set the default language to.
*/
public void setLanguage(String lang) { this.lang = lang; }
/** The default maximum number of search results returned for a search. Upon
* initialisation, this defaults to Java's Integer.MAX_VALUE. */
public int getMaxResults() { return maxresults; }
/** Set the default maximum number of search results returned for a search.
* @param maxresults - the new default maximum number of search results to
* be returned. */
public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
/** Code for this constructor is from DemoSOAPClient.java.
* Instantiates the APIA handle using the protocol, host, port, fedora
* server repository username and password.
* @param host - the fedora server host (may be prefixed with http:// or
* https:// if parameter protocol is empty). If there's no protocol, and
* no protocol prefixed to the host, then the protocol defaults to http.
* @param protocol - either http or https (or empty "")
* @param port - the port on which fedora is running.
* @param fedoraServerUsername - the administrator username required to
* access the fedora server's repository. ("fedoraAdmin" unless changed).
* @param fedoraServerPassword - the fedora server repository's
* administrator password. If none was set on fedora installation, this
* can be empty (""). */
public FedoraConnection(String protocol, String host, int port,
String fedoraServerUsername, String fedoraServerPassword)
throws ParserConfigurationException, MalformedURLException,
SSLHandshakeException, RemoteException, AuthenticationFailedException,
NotAFedoraServerException, ConnectException, Exception
{
try {
this.portAddressSuffix = "";
init(protocol, host, Integer.toString(port),
fedoraServerUsername, fedoraServerPassword);
} /*catch(RemoteException re) { //subclass of IOException
throw re;
} catch(SSLHandshakeException ssle) { //subclass of IOException
// this is also of type IOException
throw ssle;
}*/ catch(IOException ioe) { // connected to the wrong server
String exceptMsg = ioe.getMessage().toLowerCase();
if(exceptMsg.indexOf("request failed") != -1
|| exceptMsg.indexOf("404") != -1)
throw new NotAFedoraServerException();
else // the IOException is not due the cause we thought it was, so
throw ioe; // rethrow whatever other IOException was caught (which
// could have been RemoteException or SSLHandshakeException
// or some other cause)
}
}
/** Default constructor which takes input from the user to get host, port,
* fedora username and password.
* It keeps looping to display authentication popup, until valid values are
* entered:
* (a) if password is wrong, a RemoteException is thrown and popup reappears;
* This popup keeps appearing until the password and username are correct (as
* long as there's indeed a fedora server listening at the given host and port).
* (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
* the 'https' protocol to the host string when it should have been 'http';
* OR the ssl connection failed for some other reason.
* Allowing for the 1st case, the authentication popup is displayed just once
* more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
* NOTE: if a fedora server at the protocol (https or http) isn't accessible,
* it takes a long time for the SSLHandshakeException to be thrown.
* (c) if the connection is refused, then a ConnectException is thrown.
* In that case, it's
* EITHER because the host and port values that were entered are wrong (and
* the authentication popup dialog is redisplayed just once more allowing
* the user to correct host/port values)
* OR the entered host and part were right but the fedora server at this
* host and port is not running.
* On the second consecutive attempt where a ConnectionException is thrown,
* it's no longer processed but rethrown, as there's no use in redisplaying
* the authentication popup when the problem is not an authentication issue.
* (d) Another IOException (other than the SSLHandshakeException of (b))
* occurs when there is indeed a server listening at the host and port
* entered, but it's not a Fedora server, because it is unable to process
* Fedora requests. If the expected message is found in the exception, than
* the authentication popup is displayed. However, other causes for an
* IOException are not handled. In such cases, the IOException is rethrown.
* (Note that IOException is not in the throws clause - other causes for
* it being unknown, it can be be considered as the more generic Exception.
*/
public FedoraConnection()
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
Properties properties = new Properties();
// loop to display fedora server authentication popup to
// get user input
setInitialisationProperties(properties);
properties = null; // finished
}
/** Single argument constructor that takes the name of the properties file
* defining the values of the initialisation parameters required to
* instantiate a FedoraConnection. These are fedora server username, password,
* host and port. If these values are not present in the file, they are set
* to "" before showing the initialisation input dialog.
* @param propertyFile is the name of the properties file specifying the
* values for Fedora server username, password, host and port. */
public FedoraConnection(File propertyFile)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
Properties properties = new Properties();
// Load the properties from the given file
try{
if(propertyFile.exists()) {
properties.load(new FileInputStream(propertyFile));
}
} catch(Exception e) {
// If the file didn't exist or could not be located,
// then we just continue by creating empty properties
LOG.warn("Exception loading from propertyFile "
+ propertyFile + ": " + e);
}
// Go through the process of showing the initialisation dialog
setInitialisationProperties(properties);
// Now let's save whatever values the user may have entered into the
// input dialog as the default values for next time the dialog shows
try {
java.io.FileOutputStream out = new java.io.FileOutputStream(
propertyFile); // same file as properties loading file
// First make sure errormessage gets stored as "" and doesn't
// cause problems next time.
properties.setProperty("errormessage", "");
// Don't save passwords
properties.setProperty("password", "");
// If the portAddressSuffix is in the file already, then it's
// user-specified and we shouldn't change it. But if there is no
// such property in the file, then create it and write it to the file
// with an empty string value:
String portSuffix = properties.getProperty("port.address.suffix");
if(portSuffix == null) {
properties.setProperty("port.address.suffix", "");
}
properties.store(out, "fedoraGS3 properties"); // write properties
// Javadoc states that "The output stream remains open after this
// method (Properties.store) returns." So we close it here
out.close();
} catch(Exception e) {
LOG.warn("Exception writing to propertyFile "
+ propertyFile + ": " + e);
}
properties = null; // finished
}
/** Method that loops to display the dialog that retrieves the
* fedora server initialisation properties from the user. If there
* is a property file with values set already, it will display
* the previously entered values by loading them from that file.
* Otherwise, input fields in the dialog are empty.
* @param properties the Properties Hashmap storing values for
* username, password, host and port (and any errormessage). */
protected void setInitialisationProperties(Properties properties)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
// keep looping to display authentication popup, until valid values are
// entered (except when a ConnectionRefused Exception is caught - this
// needs to be rethrown):
boolean authenticated = true;
// reset any error messages that may have been stored (should not be
// the case, but if there had been any difficulty during storing, it
// may not have written out an empty errorMessage)
properties.setProperty("errormessage", "");
do{
// show the Authentication-popup:
// By passing the HashMap Properties, user-updated values will
// be persistent in the authentication-popup fields (rather than
// reset to the default initial values).
properties = showAuthenticationPopup(properties);
String fedoraServerUsername = properties.getProperty("username", "");
String fedoraServerPassword = properties.getProperty("password", "");
String host = properties.getProperty("host", "");
String port = properties.getProperty("port", "");
//String protocol = host.startsWith("http") ? "" : "http://";
String protocol = "http://";
if(host.startsWith("http") || host.startsWith("https"))
protocol = "";
// NOTE THAT: if a fedora server at https:// is not accessible,
// it takes a long time for the authentication popup to reappear.
try{
this.portAddressSuffix
= properties.getProperty("port.address.suffix", "");
// Use the FedoraClient utility to get the SOAP stub for APIA.
// This SOAP stub enables the client to connect to a Fedora
// repository via the API-A web service interface.
init(protocol, host, port,
fedoraServerUsername, fedoraServerPassword);
// will throw Exception if it can't instantiate APIA
// if no exception thrown in the initialisation statement above,
// then we have been authenticated:
authenticated = true;
} catch(AuthenticationFailedException afe) {
authenticated = false;
properties.setProperty("errormessage", afe.getMessage());
} catch(RemoteException e) { // causes could be various
String reason = e.getMessage();
if(e.getCause() != null) {
// For instance, if a ConnectException indicating
// 'Connection Refused' or a java.net.UnknownHostException
// caused the RemoteException
// Strip out prefix "Nested exception is..." from the
// encapsulating Exception's message, by using the Cause's
// message. Keep Exception classname to give it some context:
reason = e.getCause().getClass().getName() + ": "
+ e.getCause().getMessage();
// Give some more information if the connection was refused.
// (This can also happen when the Fedora server is not running)
if(e.getCause().getClass().equals(ConnectException.class)) {
reason += FedoraGS3Exception.connectionRefusedMessage;
}
}
// if the message indicates that a server was running there,
// then we tell the user it was not a Fedora server
if(reason.toLowerCase().contains("404")
|| reason.toLowerCase().contains("request failed"))
{
reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
}
authenticated = false;
properties.setProperty("errormessage", reason);
} catch(ConnectException e) {
properties.setProperty("errormessage",
FedoraGS3Exception.connectionRefusedMessage);
authenticated = false;
} catch(SSLHandshakeException ssle) { // SSLHandshakeException should
// be handled before IOException, as it's an IOException subclass.
authenticated = false;
properties.setProperty("errormessage",
FedoraGS3Exception.sslHandshakeExceptionMessage);
// we won't prefix the host with http for the user, as https
// might be right after all, and something else might have gone
// during the connection attempt instead.
//host = host.replace("https", "http"); //setting it for them
//properties.setProperty("host", host);
} catch(IOException ioe) { // occurs when we try to connect to a
// host/port where some server other than Fedora's is listening
// (e.g. if we end up connecting to GS3's host and port).
// In that case, we can get exception messages like a 404:
// "Unable to instantiate FedoraConnection
// java.io.IOException: Request failed [404 /fedora/describe]"
// Test this by trying to connect to localhost at 9090 where GS3 is
String exceptMsg = ioe.getMessage().toLowerCase();
if(exceptMsg.indexOf("request failed") != -1
|| exceptMsg.indexOf("404") != -1)
{
properties.setProperty("errormessage",
NotAFedoraServerException.MESSAGE
+ "\n(" + ioe.getMessage() + ")");
} else if(exceptMsg.indexOf("401") != -1
|| exceptMsg.indexOf("500") != -1)
{
authenticated = false;
properties.setProperty("errormessage", ioe.getMessage());
} else { // the exception occurred for some other reason, rethrow it
throw ioe;
}
}
} while(!authenticated); // will keep showing popup until auhentication
// and connection input values are valid
}
/**
* Static method that displays a popup to allow the user to provide Fedora
* authentication (username, pwd) and connection (protocol+host, port) details.
* @param properties is a Properties HashMap where the property Keys which must
* have been put in here in advance (even with "" Values if appropriate) are:
*
* - username * - password * - host (may - but need not - be prefixed with either of the protocols * "http://" and "https://" * - port * - errorMessage (displayed near the top of the popup dialog). Can be "". ** The values stored in the properties HashMap for the above property are * initially displayed in the fields and the user can overwrite them. * This is useful in such cases where invalid values were entered and this * popup must be redisplayed to allow the user to correct their previous input. * @return the same HashMap Properties which was passed as parameter. */ protected static Properties showAuthenticationPopup(Properties properties) throws CancelledException { // Retrieve all the properties -- defaults to "" if any are null JTextField usernameField = new JTextField( properties.getProperty("username", "fedoraAdmin")); JTextField passwordField = new JPasswordField( properties.getProperty("password", "")); JTextField hostField = new JTextField( properties.getProperty("host", "localhost")); JTextField portField = new JTextField( properties.getProperty("port", "8080")); JPanel panel = new JPanel(new GridLayout(4,2)); panel.add(new JLabel("User Name")); panel.add(usernameField); panel.add(new JLabel("Password")); panel.add(passwordField); panel.add(new JLabel("Host")); panel.add(hostField); panel.add(new JLabel("Port")); panel.add(portField); String heading = "Fedora Server Admin Authentication:"; String errorMessage = properties.getProperty("errormessage", ""); if(!errorMessage.equals("")) { heading = "=> " + errorMessage + "\n\n" + heading; } int option = JOptionPane.showConfirmDialog(null, new Object[] { heading, panel}, "Enter Network Password", JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE); if (option == JOptionPane.OK_OPTION) { String fedoraServerUsername = usernameField.getText(); String fedoraServerPassword = passwordField.getText(); String host = hostField.getText(); String port = portField.getText(); properties.setProperty("username", fedoraServerUsername); properties.setProperty("password", fedoraServerPassword); properties.setProperty("host", host); properties.setProperty("port", port); } else { // Cancel option throw new CancelledException(); } return properties; } /** Init method that is called by the constructor to set some * important member variables including instantiating the APIA object * used to invoke the Fedora APIA web service operations. * @param protocol can be http or https * @param host is the name of the Fedora server host * @param port is the port number (String form) of the Fedora server * @param fedoraServerUsername is the user name to access the Fedora * Server * @param fedoraServerPassword is the password needed to access the * Fedora Server */ protected void init(String protocol, String host, String port, String fedoraServerUsername, String fedoraServerPassword) throws ParserConfigurationException, MalformedURLException, AuthenticationFailedException, RemoteException, Exception { // initialise member variables lang = ENGLISH; maxresults = Integer.MAX_VALUE; DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); builder = factory.newDocumentBuilder(); // (protocol is "" if host already contains protocol) if(!protocol.equals("") && !protocol.endsWith("://")) protocol += "://"; // now create baseURL = protocol://host:port/fedora this.baseURL = protocol + host + ":" + port + "/fedora"; // Get the FedoraAPIA handle to/stub of the Fedora web services // New way of instantiating connection to Fedora is dependent on // fewer files of FedoraClient.jar FedoraAPIAServiceLocator serviceLocator = new FedoraAPIAServiceLocator(fedoraServerUsername, fedoraServerPassword); APIA = null; boolean isUserSpecifiedPortAddressSuffix = false; // try any portAddressSuffix specified by the user if(!this.portAddressSuffix.equals("")) { isUserSpecifiedPortAddressSuffix = true; this.createAPIA(serviceLocator, this.portAddressSuffix, "user-specified", isUserSpecifiedPortAddressSuffix); } // If the user-specified portAddressSuffix failed or if there was none // given, then APIA will be null, so we will try with the default // portAddressSuffix. This time all exceptions will be passed on. if(APIA == null) { isUserSpecifiedPortAddressSuffix = false; this.createAPIA(serviceLocator, defaultPortAddressSuffix, "default", isUserSpecifiedPortAddressSuffix); } } /** Tries to create the FedoraAPIA instance using the serviceLocator * and the given portSuffix. The APIA instance is obtained for the * baseURL+portSuffix. Any exceptions are (processed and) rethrown * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the * Remote Exception from AXIS that it can't find the target service to * invoke is ignored so that the caller can retry with the default port- * address suffix first before giving up. */ protected void createAPIA(FedoraAPIAServiceLocator serviceLocator, String portSuffix, String messageInsert, boolean isUserSpecifiedPortAddressSuffix) throws Exception { //String portSuffix = (isUserSpecifiedPortAddressSuffix) ? // this.portAddressSuffix : defaultPortAddressSuffix; try { LOG.debug( "Trying to connect to Fedora using the given" + " baseURL and the " + messageInsert + " portAddress suffix:\n" + baseURL + portSuffix); APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP( new java.net.URL(baseURL+portSuffix)); // let's test whether we're authenticated (otherwise a // RemoteException will be thrown to indicate that the // password was incorrect.) RepositoryInfo repositoryInfo = APIA.describeRepository(); // throws RemoteException if pwd wrong or for other reasons // in which case describeRepository() service is unavailable this.fedoraVersion = repositoryInfo.getRepositoryVersion(); // If we come all the way here, no exceptions were thrown: this.portAddressSuffix = portSuffix; // store the one currently in use } catch(RemoteException re) { // if we're here, then APIA was unable to call the web service // If this was because the fedora authentication failed, then // let's throw a custom exception String message = re.getMessage().toLowerCase(); // Looking for something Unauthorized(401) if(message.indexOf("unauthorized") != -1 || message.indexOf("401") != -1) { throw new AuthenticationFailedException(); } else if(isUserSpecifiedPortAddressSuffix && re.getMessage().contains( FedoraGS3Exception.missingTargetService)) { LOG.warn("Failed to connect to Fedora APIA services at given" + " port address:\n" + portSuffix + "\nException: " + re.getMessage()); // APIA.describeRepository can throw a remote exception // whereby AXIS says the target service is missing and can't // be invoked (FedoraGS3Exception.missingTargetService) // Don't rethrow this, if AXIS can't find the user-specified // portAddressSuffix, we will try with the default suffix next APIA = null; } else { // if trying default portAddressSuffix or if any other // RemoteException was generated (whose cause is something // other than an authentication failure) rethrow it. throw re; } } catch(Exception e) { // Other Exceptions // Could possibly be a ServiceException when using ServiceLocator if(isUserSpecifiedPortAddressSuffix) { APIA = null; // we won't throw other exceptions yet until // we have tried the default PortAddressSuffix for the baseURL } else { throw new FedoraGS3InitFailureException(e); } } } /** Gets all greenstone collections. Searches for greenstone:*-collection. * Method getCollections() defaults to getting only those objects in fedora's * repository whose pids are of the format greenstone:*-collection. * The use of AutoFinder and findObjects is shown in * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java * The Fedora-APIA's method definition of findObjects is: *
* fedora-types:FieldSearchResult findObjects( * fedora-types:ArrayOfString resultFields, * xsd:nonNegativeInteger maxResults, * fedora-types:FieldSearchQuery query ) ** @see The local fedora search page for how the search works * @see Fedora access API, API-A for method findObjects * @see XML type definition of FieldSearchQuery * @see Type definition of 2.2.1 FieldSearchQuery * @see does not apply: type definition of 2.1.1 FieldSearchQuery * @see BrowseController.java for an example * * @return an array of Strings containing the pids of all collections * matching the format greenstone:*-collection. */ public String[] getCollections() throws RemoteException { // Available constructors: // FieldSearchQuery(java.util.List conditions) // FieldSearchQuery(java.lang.String terms) final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION; FieldSearchQuery query = new FieldSearchQuery(); query.setTerms(queryStr); query.setConditions(null); // we'd like pid and title returned for each object // we pass maxResults=null to get all objects that match // (i.e. all collections) String[] pids = null; FieldSearchResult collection = AutoFinder.findObjects( APIA, new String[]{"pid", "title"}, maxresults, query); ObjectFields[] results = collection.getResultList(); pids = new String[results.length]; for(int i = 0; i < results.length; i++) { pids[i] = results[i].getPid(); } return pids; } /** All objects (incl "greenstone:*" objects) in fedora - be they collections, * top-level documents or document sections - have a DC datastream. This * method returns the content (XML) of the DC datastream as it is stored in * fedora's repository. * (The pid/DC call is one of the default fedora-system 3 disseminations.) * Try an example of the form: http://localhost:8080/fedora/get/<pid>/DC * To obtain the DC/any datastream, we use method getDatastreamDissemination() * of the interface FedoraAPIA. This method returns a MIMETypedStream. * The method signature is: * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime) * where dsID = itemID (look at datastreams page of running fedora instance) * To access the XML content of the MIMETypedObject returned, we use its method * bytes[] getStream(), but when instantiating a String from this, we have to * use the String() contructor where we can specify the charset encoding (in * this case, it must be UTF-8). Else getStream() returns gobbledygook. * @return a String version of the XML in the DC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * @see FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class * @see TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage. * @see constructor String(byte[], java.lang.String) * @see Charset.java, for character sets and encoding */ public String getDC(String pid) throws RemoteException, UnsupportedEncodingException { // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC // datastream ID, dsID = itemID, look at a running fedora MIMETypedStream dcStream = APIA.getDatastreamDissemination(pid, DC, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(dcStream.getStream(), UTF8); } /** All "greenstone:*" objects in fedora (be they collections be they * collections, top-level documents or document sections) have an EX * datastream. This method returns the content (XML) of the EX datastream as * is. (It calls the default fedora-system 3 dissemination <pid>/EX.) * @return a String version of the XML in the EX datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime). * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream(). * @see String getDC(String pid) throws Exception * */ public String getEX(String pid) throws RemoteException, UnsupportedEncodingException { MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(exStream.getStream(), UTF8); } /** Some "greenstone:*" top-level documents in the fedora repository (but not * greenstone collections or document sections) have a DLS metadata datastream. * This method returns the content (XML) of the DLS datastream as is. (It calls * the default fedora-system 3 dissemination <pid>/DLS.) * @return a String version of the XML in the DLS datastream for the fedora * object denoted by pid, or "" if the document given by pid has no DLS datastream. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime). * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream(). * @see String getDC(String pid) throws Exception * */ public String getDLS(String pid) throws RemoteException, UnsupportedEncodingException { MIMETypedStream dlsStream = null; // If there is no DLS datastream, it throws an exception (whose class // fedora.server.errors.DatastreamNotFoundException can't be imported // here (it's not in the client side fedora.server.* package, but on // the server side package of that name): try{ dlsStream = APIA.getDatastreamDissemination(pid, DLS, null); //asOfDateTime=null to get the current version of the dataStream } catch(RemoteException e) { //These two don't work: //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException")) //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException")) if(e.getMessage().contains("No datastream could be returned.") || e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException")) { // there is no DLS data stream for this document return ""; } else { // different problem, exception due to different cause throw(e); } } if(dlsStream == null) return ""; // need to set the charset encoding to UTF8 return new String(dlsStream.getStream(), UTF8); } /** All "greenstone:*" objects in fedora (be they collections or documents) * have a TOC datastream, unless they have only 1 section (SECTION1). * This method returns the content (XML) of the TOC datastream as is. * (Calls default fedora-system 3 dissemination <pid>/TOC.) * @return a String version of the XML in the TOC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime) * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream() * @see String getDC(String pid) throws Exception * */ public String getTOC(String pid) throws RemoteException, UnsupportedEncodingException { try { MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(tocStream.getStream(), UTF8); } catch(RemoteException re) { // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1 return new String("".getBytes(), UTF8); //set charset } } /** @return the <name>s (in greenstone:<name>-collection) * for the collections indicated by collPIDs. * @param collPIDs - an array of Strings denoting the pids for greenstone * collections stored in the fedora repositoryl. These should be of the * format "greenstone:<collectionName>-collection". */ public String[] getCollectionNames(String[] collPIDs) { String[] collNames = new String[collPIDs.length]; for(int i = 0; i < collPIDs.length; i++) collNames[i] = getCollectionName(collPIDs[i]); return collNames; } /** @return "greenstone:<name>-collection" for all <name>s * in the parameter collNames. * @param collNames - a list of names of greenstone collections * stored in the fedora repository. */ public String[] getCollectionPIDs(String[] collNames) { String[] collPIDs = new String[collNames.length]; for(int i = 0; i < collNames.length; i++) collPIDs[i] = getCollectionName(collNames[i]); return collPIDs; } /** @return greenstone:<name>-collection for the<name> * denoted by parameter collName. * @param collName - the name of a greenstone collection stored * stored in the fedora repository. */ public String getCollectionPID(String collName) { return GREENSTONE_+collName+_COLLECTION; } /** * Gets the title of the collection denoted by the given collection's pid by * retrieving the title metadata for it from the collection's EX datastream. * @return the title (in the default language, else English, else the * first title found) for the particular collection denoted by its PID. * @param collPID is the pid of a greenstone collection in the fedora * repository. */ public String getCollectionTitle(String collPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String title = null; // has to be null initially, we do a check on it // Parse the EX datastream (XML), and in its DOM, find the //
* "There are two search methods: a search on all fields or a search on * specific fields. To search all fields the setTerms function of the * FieldSearchQuery must be used, with the paramater being the desired string. * * To search by specific fields, you must create an array of Condition * objects. Each condition consists of three parts: * the field to be searched (.setProperty()), * the operation to be used (.setOperator(ComparisonOperator. <operator>)), * and the search string (.setValue())" ** We want to use the second search method above when browsing and searching, * and search for: pid~greenstone:<collName>* title~<letter>* * or pid~greenstone:<collName>* title~<first word of search phrase> * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java. * * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based * web services are defined. (The web.xml defines the "Servlets for REST-based * interfaces to the Fedora Repository Server"). * Do a search on the word "search": * fedora.server.access.FieldSearchServlet is the class we need to look at * It accesses a different Condition.java class: fedora.server.search.Condition.java * The above is what is used by the REST-based interface in FieldSearchServlet.java * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java * is what's used in the fedora client application that makes use of * the SOAP-based interface. * * @see FedoraAccess.java * @see MaAPI * @see Fedora server type Condition.java * @see Fedora client test Search.java */ protected FieldSearchResult findObjectsWithTitlesContaining( String collName, final String titleWord) throws RemoteException, FedoraVersionNotSupportedException { // Searching for pids of the form "greenstone:gs2mgdemo-*"; final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD; Condition[] conditions = new Condition[2]; conditions[0] = new Condition("pid", ComparisonOperator.has, pid); conditions[1] = new Condition("title", ComparisonOperator.has, titleWord); FieldSearchQuery query = new FieldSearchQuery(); query.setConditions(conditions); // We'd like pid and title returned for each object, because we'll make // use of title. We pass maxResults=null to get all objects that match // (i.e. all collections). FieldSearchResult objects = null; final String[] retrieveFields = {"pid", "title"}; try { objects = AutoFinder.findObjects( APIA, retrieveFields, maxresults, query); // collection = APIA.findObjects(new String[]{"pid", "title"}, // new NonNegativeInteger(Integer.toString(maxresults)), query); } catch(RemoteException ex) { if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) { // fedoraVersion is too low, searching/browsing is not possible // (because class Condition has changed after 2.0, from 2.1.1 // onwards) throw new FedoraVersionNotSupportedException(fedoraVersion); } else { LOG.error( "Remote exception when calling web service operation " + "findObject() to execute search:\n" + ex.getMessage()); ex.printStackTrace(); throw ex; } } return objects; // return the FieldSearchResult objects found } /** @return the <docName> in the parameter docPID (which is of the form: * greenstone:<colname>-<docName>) * @param docPID - pid of a greenstone document in the fedora repository. */ public String getDocName(String docPID) { return docPID.substring(docPID.indexOf('-')+1); } /** @return the <name> in the parameter collPID * (greenstone:<name>-collection) * If collPID is a docPID, this method does the same: return the <name> * in the docPID (greenstone:<name>-docID). * @param collPID - pid of a greenstone collection in the fedora repository. */ public String getCollectionName(String collPID) { return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-')); } /** Return the TOC substructure requested * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * @param e - the element to start copying from and whose structure is requested. * @param structure - a string containing any combination of the values: * ancestors, parent, siblings, children, descendants, * specifying the portion of the structure to retrieve. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getSubstructure(Element original, String structure) { Document doc = builder.newDocument(); boolean descendants = (structure.indexOf("descendants") != -1) ? true : false; Node current = doc.importNode(original, descendants); // descendants=true: import/copy descendants. // Else, copy just current node original (later copy its direct children) Node parentOfCurrent = null; Node parentOfOriginal = original.getParentNode(); if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot) parentOfOriginal = null; } if(parentOfOriginal == null) { // no parentNode, so current is the root node. // can't get ancestors/parent/siblings, since all these need parentNode doc.appendChild(current); } else { // siblings, ancestors and parent requests all require parent node to exist // First check if we need to get ancestors, else for whether parent is required if(structure.indexOf("ancestors") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); Node child = null; Node parent = parentOfCurrent; // the copy Node n = parentOfOriginal.getParentNode(); // the doc to copy from while(n != null && n != original.getOwnerDocument()) { child = parent; parent = doc.importNode(n, false); // no descendants parent.appendChild(child); n = n.getParentNode(); } doc.appendChild(parent); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else if(structure.indexOf("parent") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); //parentOfCurrent.appendChild(current); doc.appendChild(parentOfCurrent); } // a request for siblings is independently tested for if(structure.indexOf("siblings") != -1) { // only import parent if we didn't already import // it for a request for ancestors or parent if(parentOfCurrent == null) { parentOfCurrent = doc.importNode(parentOfOriginal, false); doc.appendChild(parentOfCurrent); // this becomes the root } // now the siblings of current (children of parentOfCurrent) NodeList children = parentOfOriginal.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if((Element)n != original) { // skip original which was already imported Node child = doc.importNode(n, false); // no descendants parentOfCurrent.appendChild(child); } else { // already imported Current element, insert at this position parentOfCurrent.appendChild(current); } } } } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests // (sibling request adds the current node into a particular position) parentOfCurrent.appendChild(current); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else { // when only children or descendants were requested, current becomes root document doc.appendChild(current); } } // if we are not recursively copying all descendants, then copy just // the childnodes of current: if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children // get e's children and copy them into the new document NodeList children = original.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { // create copy Node n = doc.importNode(children.item(i), false); // attach it to parent current.appendChild(n); // Now we need to indicate whether this new node (child) is a leaf // or not. (This is necessary for getChildrenOfSection(), else // it's hard to know if the children are leaves or have further // subsections. if(n.getNodeName().equals(SECTION_ELEMENT)) { // we're dealing only with section children // Check if the matching original had children: Element originalsChild = (Element)children.item(i); NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT); if(grandchildren.getLength() > 0) { // original's child has children, so indicate this // in the copied child: Element child = (Element)current; // child.setAttribute(TYPE, INTERNAL_NODE); } } } } return doc.getDocumentElement(); } /** Return the TOC substructure with the requested structural info. * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * Returns null if the element, e, passed in is null. * @param doc - the new document into whose root element the structural information * will be inserted as attributes. * @param e - the element to start copying from and whose structure is requested. * @param info - a string containing any combination of the values: numChildren, * numSiblings, siblingPosition. The requested info gets added as attributes to * the returned root element. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getStructureInfo(Document doc, Element e, String info) { if(e == null) { return null; } Element root = doc.getDocumentElement(); if(!info.equals("")) { if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1 || info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength(); int numChildren = 0; NodeList children = e.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { numChildren++; } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN, Integer.toString(numChildren)); } if(info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //String docType = (numChildren > 0) ? "hierarchy" : "simple"; String docType = "hierarchy"; root.setAttribute(AbstractBasicDocument.INFO_DOC_TYPE, docType); } } if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings int numSiblings = 0; int siblingPosition = 0; Node parent = e.getParentNode(); if(parent == null) { numSiblings = 0; siblingPosition = 1; } else { //numSiblings = parent.getChildNodes().getLength(); NodeList siblings = parent.getChildNodes(); for(int i = 0; i < siblings.getLength(); i++) { Node n = siblings.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if(e == (Element)n) { siblingPosition = numSiblings+1; } else { // count every sibling section element, except e itself numSiblings++; } } } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_SIBS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_SIBS, Integer.toString(numSiblings)); } if(info.indexOf(AbstractBasicDocument.INFO_SIB_POS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_SIB_POS, Integer.toString(siblingPosition)); } } } return root; } /** * Return a datastream of a document, given the document's id * and the item id of the datastream which is to be retrieved. * @return the XML (in String form) of the item denoted by itemID * that's part of the fedora data object denoted by docPID. * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3 * Can't retrieve images denoted by itemID using this method, only items * that are of XML format. * @param docPID - pid of a greenstone document in the fedora repository. * @param itemID - the itemID of a datastream of the fedora object * identified by docPID. */ protected String getItem(String docPID, String itemID) throws RemoteException, UnsupportedEncodingException { // MIMETypedStream getDatastreamDissemination( // String pid, String dsID, asOfDateTime) MIMETypedStream datastream = APIA.getDatastreamDissemination(docPID, itemID, null); return new String(datastream.getStream(), UTF8); } /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method * returns "1.2.1". * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1, * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1". * However, the string str is returned unchanged if the prefix does not occur * at the start of str. * @return the String parameter str without the prefix. * It can be used to return the number of an itemID of a greenstone document * stored in the fedora repository without the given prefix. * @param prefix - the prefix which ought to be removed from the itemID. * @param str - the value of the itemID. */ protected String removePrefix(String str, String prefix) { // do nothing in those cases where the prefix is not in param str if(!str.startsWith(prefix)) return str; // otherwise: if(prefix.equals(EX+".") || prefix.equals(DC+".")) { return "1" + str.substring(prefix.length()); } else { return str.substring(prefix.length()); } } /** Given a number of the form x(.y.z), this method returns this number * as is, except when x = 1, in which case, it would return .y.z * That is, given number=3.2.1, this method would return 3.2.1 * But, given number=1.2.3, this method would return .2.3. * When number=1, it is NOT a special case: "" is returned as explained. * @param number - a proper (fedora-greenstone document) section number * @return the same number as it ought to be for the associated EX, DC datastreama. */ protected String convertToMetaNumber(String number) { if(number.startsWith("1.") || number.equals("1")) return number.substring(1); // remove the first char: the initial '1' else return number; } /** @return fedora's baseURL. It's of the form * "http://localhost:8080/fedora" */ public String getBaseURL() { return baseURL; } /** @return the portAddressURL (in use) of the Fedora APIA * web service (should be the endpoint location in the APIA's * WSDL file). * It's usually of the form baseURL+"/services/access" */ public String getPortAddressURL() { return this.baseURL + this.portAddressSuffix; } /** @return the baseURL for gsdlAssocFiles */ public String getAssocFileBaseURL() { return baseURL + "/get/"; } public static void main(String args[]) { try { FedoraConnection fedoraCon = new FedoraConnection(new File("fedoraGS3.properties")); String[] pids = null; pids = fedoraCon.getCollections(); String[] titles = fedoraCon.getCollectionTitles(pids); for(int i = 0; i < pids.length; i++) { System.out.println("extracted title:" + titles[i]); String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]); String[] docTitles = fedoraCon.getDocTitles(docPIDs); for(int j = 0; j < docPIDs.length; j++) { System.out.println("\tExtr doc title: " + docTitles[j]); } } String PID = "greenstone:gs2mgdemo-collection"; String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1"; String dcXML = fedoraCon.getDC(PID); String exXML = fedoraCon.getEX(PID); String tocXML = fedoraCon.getTOC(docPID); System.out.println("Dublin Core Metadata for " + PID + " is:\n" + dcXML); System.out.println("GS3 extracted metadata for " + PID + " is:\n" + exXML); System.out.println("Table of Contents for " + docPID + " is:\n" + tocXML); String[] sectionNames = fedoraCon.getSectionNames(docPID); System.out.println("\nSection names for " + docPID + " are:"); for(int i = 0; i < sectionNames.length; i++) System.out.println(sectionNames[i]); String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID); //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers); String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames); System.out.println("\nSection numbers for " + docPID + " are:"); for(int i = 0; i < sectionNumbers.length; i++) { //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]); System.out.println(sectionNames[i] + " " + sectionTitles[i]); } String sectionID = "SECTION1"; //SECTION1.5 System.out.println("\n"); System.out.println( "browsing greenstone's gs2mgdemo collection by (first) letter F:"); pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f"); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println( "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:"); pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println("\nDone - exiting."); System.exit(0); } catch(RemoteException re) { System.out.println("Remote Exception when calling web service operation\n" + re.getMessage()); re.printStackTrace(); } catch(Exception e) { System.out.println("Unable to instantiate FedoraConnection\n" + e); e.printStackTrace(); //LOG.error("Unable to instantiate FedoraConnection\n" + e, e); } } }