/** *######################################################################### * FedoraConnection.java - works with the demo-client for Greenstone 3, * of the Greenstone digital library suite from the New Zealand Digital * Library Project at the * University of Waikato, New Zealand. *

* Copyright (C) 2008 New Zealand Digital Library Project *

* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *

* This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *######################################################################## */ package org.greenstone.fedora.services; import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants import org.greenstone.gsdl3.util.GSXML; import fedora.client.utility.AutoFinder; import fedora.server.access.FedoraAPIAServiceLocator; // The object for accessing FedoraAPI-A web services: import fedora.server.access.FedoraAPIA; // The definitions for all complex fedora types: import fedora.server.types.gen.MIMETypedStream; import fedora.server.types.gen.RepositoryInfo; import fedora.server.types.gen.FieldSearchResult; import fedora.server.types.gen.FieldSearchQuery; import fedora.server.types.gen.DatastreamDef; import fedora.server.types.gen.ObjectFields; import fedora.server.types.gen.Condition; import fedora.server.types.gen.ComparisonOperator; //import fedora.server.types.gen.*; import javax.net.ssl.SSLHandshakeException; import java.net.ConnectException; import org.xml.sax.SAXException; import java.io.UnsupportedEncodingException; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import java.net.MalformedURLException; import java.rmi.RemoteException; import java.io.StringReader; import java.io.FileInputStream; import java.io.File; import java.util.TreeSet; import java.util.Properties; import java.util.Vector; import java.awt.GridLayout; import javax.swing.JLabel; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JPasswordField; import javax.swing.JTextField; import org.apache.log4j.Logger; import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException; import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException; import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException; import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException; import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.transform.*; import org.xml.sax.InputSource; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.w3c.dom.Node; /** Class that establishes a connection with Fedora's web services (via * Java stub classes for the same) and then provides methods to retrieve * Greenstone-specific data, such as the TOC, EX, DC,and Section * datastreams of the Greenstone documents stored in Fedora's repository. * These datastreams are returned as Strings without any changes being * made to them. * @author ak19 */ public class FedoraConnection implements FedoraGS3DL { /** The logging instance for this class */ private static final Logger LOG = Logger.getLogger( FedoraConnection.class.getName()); /** The version of fedora that is supported by class FedoraConnection */ protected static final String SUPPORTED_VERSION = "3.3"; //"2.2.1"; // 3.3 works with genericSearch version 2.2 // 3.6.1 works with genericSearch version 2.5 /* Some fixed strings of known literals */ protected static final String GET = "/get/"; // The DemoSOAPClient declares and uses the following as a static member // Probably none of the APIA methods (web service methods) remembers // state, that might explain why we can use it as a static member then. /** The object used to access the Fedora API-A web service methods */ protected FedoraAPIA APIA; /** Version of the running fedora server */ protected String fedoraVersion; /** The location of the fedora server, usually of the form: protocol://host:port/fedora * e.g. (and default) http://localhost:8080/fedora */ protected String baseURL; /** The user-specified portAddressSuffix of the Fedora Access web services * (endpoint URL in the WSDL), usually of the form * http://localhost:8080/fedora/services/access * Users can tell FedoraGS3 to try accessing that first by setting * the "port.address.suffix" property in the properties file. * FedoraGS3 itself will not write the portAddressSuffix currently used in * the file for next time, but leave whatever value was entered in the * properties file. The portAddress--not just suffix--currently in use (once * the FedoraAPIA handle has been instantiated) can be obtained through * getPortAddressURL() method. */ protected String portAddressSuffix; /** The part of the portAddress that comes after the baseURL. By default and * usually this is: "/services/access" */ protected static final String defaultPortAddressSuffix = "/services/access"; /** The preferred language of the display content */ protected String lang; /** The maximum number of collections to retrieve */ protected int maxresults; /** DocumentBuilder used to create and parse XML documents */ protected DocumentBuilder builder; /** Static method that returns the version of Fedora supported by this * class FedoraConnection. */ public static String getSupportedVersion() { return SUPPORTED_VERSION; } /** The version of the running Fedora server, which may or may not * match the supported version. */ public String getFedoraVersion() { return fedoraVersion; } /** @return the default language used to query for titles (and anything else * where there are multiple language options). Upon initialisation, this * defaults to English. */ public String getLanguage() { return lang; } /** Sets the the default language used to query for titles (and anything else * where there are multiple language options). If the default language for any * query is not available, then English ("en") is used. If that's not available * then the first other available language is used. * @param lang - the two-letter language code to set the default language to. */ public void setLanguage(String lang) { this.lang = lang; } /** The default maximum number of search results returned for a search. Upon * initialisation, this defaults to Java's Integer.MAX_VALUE. */ public int getMaxResults() { return maxresults; } /** Set the default maximum number of search results returned for a search. * @param maxresults - the new default maximum number of search results to * be returned. */ public void setMaxResults(int maxresults) { this.maxresults = maxresults; } /** Code for this constructor is from DemoSOAPClient.java. * Instantiates the APIA handle using the protocol, host, port, fedora * server repository username and password. * @param host - the fedora server host (may be prefixed with http:// or * https:// if parameter protocol is empty). If there's no protocol, and * no protocol prefixed to the host, then the protocol defaults to http. * @param protocol - either http or https (or empty "") * @param port - the port on which fedora is running. * @param fedoraServerUsername - the administrator username required to * access the fedora server's repository. ("fedoraAdmin" unless changed). * @param fedoraServerPassword - the fedora server repository's * administrator password. If none was set on fedora installation, this * can be empty (""). */ public FedoraConnection(String protocol, String host, int port, String fedoraServerUsername, String fedoraServerPassword) throws ParserConfigurationException, MalformedURLException, SSLHandshakeException, RemoteException, AuthenticationFailedException, NotAFedoraServerException, ConnectException, Exception { try { this.portAddressSuffix = ""; init(protocol, host, Integer.toString(port), fedoraServerUsername, fedoraServerPassword); } /*catch(RemoteException re) { //subclass of IOException throw re; } catch(SSLHandshakeException ssle) { //subclass of IOException // this is also of type IOException throw ssle; }*/ catch(IOException ioe) { // connected to the wrong server String exceptMsg = ioe.getMessage().toLowerCase(); if(exceptMsg.indexOf("request failed") != -1 || exceptMsg.indexOf("404") != -1) throw new NotAFedoraServerException(); else // the IOException is not due the cause we thought it was, so throw ioe; // rethrow whatever other IOException was caught (which // could have been RemoteException or SSLHandshakeException // or some other cause) } } /** Default constructor which takes input from the user to get host, port, * fedora username and password. * It keeps looping to display authentication popup, until valid values are * entered: * (a) if password is wrong, a RemoteException is thrown and popup reappears; * This popup keeps appearing until the password and username are correct (as * long as there's indeed a fedora server listening at the given host and port). * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed * the 'https' protocol to the host string when it should have been 'http'; * OR the ssl connection failed for some other reason. * Allowing for the 1st case, the authentication popup is displayed just once * more. On the second (consec) attempt, the SSLHandshakeException is rethrown. * NOTE: if a fedora server at the protocol (https or http) isn't accessible, * it takes a long time for the SSLHandshakeException to be thrown. * (c) if the connection is refused, then a ConnectException is thrown. * In that case, it's * EITHER because the host and port values that were entered are wrong (and * the authentication popup dialog is redisplayed just once more allowing * the user to correct host/port values) * OR the entered host and part were right but the fedora server at this * host and port is not running. * On the second consecutive attempt where a ConnectionException is thrown, * it's no longer processed but rethrown, as there's no use in redisplaying * the authentication popup when the problem is not an authentication issue. * (d) Another IOException (other than the SSLHandshakeException of (b)) * occurs when there is indeed a server listening at the host and port * entered, but it's not a Fedora server, because it is unable to process * Fedora requests. If the expected message is found in the exception, than * the authentication popup is displayed. However, other causes for an * IOException are not handled. In such cases, the IOException is rethrown. * (Note that IOException is not in the throws clause - other causes for * it being unknown, it can be be considered as the more generic Exception. */ public FedoraConnection() throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { Properties properties = new Properties(); // loop to display fedora server authentication popup to // get user input setInitialisationProperties(properties); properties = null; // finished } /** Single argument constructor that takes the name of the properties file * defining the values of the initialisation parameters required to * instantiate a FedoraConnection. These are fedora server username, password, * host and port. If these values are not present in the file, they are set * to "" before showing the initialisation input dialog. * @param propertyFile is the name of the properties file specifying the * values for Fedora server username, password, host and port. */ public FedoraConnection(File propertyFile) throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { Properties properties = new Properties(); // Load the properties from the given file try{ if(propertyFile.exists()) { properties.load(new FileInputStream(propertyFile)); } } catch(Exception e) { // If the file didn't exist or could not be located, // then we just continue by creating empty properties LOG.warn("Exception loading from propertyFile " + propertyFile + ": " + e); } // Go through the process of showing the initialisation dialog setInitialisationProperties(properties); // Now let's save whatever values the user may have entered into the // input dialog as the default values for next time the dialog shows try { java.io.FileOutputStream out = new java.io.FileOutputStream( propertyFile); // same file as properties loading file // First make sure errormessage gets stored as "" and doesn't // cause problems next time. properties.setProperty("errormessage", ""); // Don't save passwords properties.setProperty("password", ""); // If the portAddressSuffix is in the file already, then it's // user-specified and we shouldn't change it. But if there is no // such property in the file, then create it and write it to the file // with an empty string value: String portSuffix = properties.getProperty("port.address.suffix"); if(portSuffix == null) { properties.setProperty("port.address.suffix", ""); } properties.store(out, "fedoraGS3 properties"); // write properties // Javadoc states that "The output stream remains open after this // method (Properties.store) returns." So we close it here out.close(); } catch(Exception e) { LOG.warn("Exception writing to propertyFile " + propertyFile + ": " + e); } properties = null; // finished } /** Method that loops to display the dialog that retrieves the * fedora server initialisation properties from the user. If there * is a property file with values set already, it will display * the previously entered values by loading them from that file. * Otherwise, input fields in the dialog are empty. * @param properties the Properties Hashmap storing values for * username, password, host and port (and any errormessage). */ protected void setInitialisationProperties(Properties properties) throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { // keep looping to display authentication popup, until valid values are // entered (except when a ConnectionRefused Exception is caught - this // needs to be rethrown): boolean authenticated = true; // reset any error messages that may have been stored (should not be // the case, but if there had been any difficulty during storing, it // may not have written out an empty errorMessage) properties.setProperty("errormessage", ""); do{ // show the Authentication-popup: // By passing the HashMap Properties, user-updated values will // be persistent in the authentication-popup fields (rather than // reset to the default initial values). properties = showAuthenticationPopup(properties); String fedoraServerUsername = properties.getProperty("username", ""); String fedoraServerPassword = properties.getProperty("password", ""); String host = properties.getProperty("host", ""); String port = properties.getProperty("port", ""); //String protocol = host.startsWith("http") ? "" : "http://"; String protocol = "http://"; if(host.startsWith("http") || host.startsWith("https")) protocol = ""; // NOTE THAT: if a fedora server at https:// is not accessible, // it takes a long time for the authentication popup to reappear. try{ this.portAddressSuffix = properties.getProperty("port.address.suffix", ""); // Use the FedoraClient utility to get the SOAP stub for APIA. // This SOAP stub enables the client to connect to a Fedora // repository via the API-A web service interface. init(protocol, host, port, fedoraServerUsername, fedoraServerPassword); // will throw Exception if it can't instantiate APIA // if no exception thrown in the initialisation statement above, // then we have been authenticated: authenticated = true; } catch(AuthenticationFailedException afe) { authenticated = false; properties.setProperty("errormessage", afe.getMessage()); } catch(RemoteException e) { // causes could be various String reason = e.getMessage(); if(e.getCause() != null) { // For instance, if a ConnectException indicating // 'Connection Refused' or a java.net.UnknownHostException // caused the RemoteException // Strip out prefix "Nested exception is..." from the // encapsulating Exception's message, by using the Cause's // message. Keep Exception classname to give it some context: reason = e.getCause().getClass().getName() + ": " + e.getCause().getMessage(); // Give some more information if the connection was refused. // (This can also happen when the Fedora server is not running) if(e.getCause().getClass().equals(ConnectException.class)) { reason += FedoraGS3Exception.connectionRefusedMessage; } } // if the message indicates that a server was running there, // then we tell the user it was not a Fedora server if(reason.toLowerCase().contains("404") || reason.toLowerCase().contains("request failed")) { reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")"; } authenticated = false; properties.setProperty("errormessage", reason); } catch(ConnectException e) { properties.setProperty("errormessage", FedoraGS3Exception.connectionRefusedMessage); authenticated = false; } catch(SSLHandshakeException ssle) { // SSLHandshakeException should // be handled before IOException, as it's an IOException subclass. authenticated = false; properties.setProperty("errormessage", FedoraGS3Exception.sslHandshakeExceptionMessage); // we won't prefix the host with http for the user, as https // might be right after all, and something else might have gone // during the connection attempt instead. //host = host.replace("https", "http"); //setting it for them //properties.setProperty("host", host); } catch(IOException ioe) { // occurs when we try to connect to a // host/port where some server other than Fedora's is listening // (e.g. if we end up connecting to GS3's host and port). // In that case, we can get exception messages like a 404: // "Unable to instantiate FedoraConnection // java.io.IOException: Request failed [404 /fedora/describe]" // Test this by trying to connect to localhost at 9090 where GS3 is String exceptMsg = ioe.getMessage().toLowerCase(); if(exceptMsg.indexOf("request failed") != -1 || exceptMsg.indexOf("404") != -1) { properties.setProperty("errormessage", NotAFedoraServerException.MESSAGE + "\n(" + ioe.getMessage() + ")"); } else if(exceptMsg.indexOf("401") != -1 || exceptMsg.indexOf("500") != -1) { authenticated = false; properties.setProperty("errormessage", ioe.getMessage()); } else { // the exception occurred for some other reason, rethrow it throw ioe; } } } while(!authenticated); // will keep showing popup until auhentication // and connection input values are valid } /** * Static method that displays a popup to allow the user to provide Fedora * authentication (username, pwd) and connection (protocol+host, port) details. * @param properties is a Properties HashMap where the property Keys which must * have been put in here in advance (even with "" Values if appropriate) are: *
	 * - username
	 * - password
	 * - host (may - but need not - be prefixed with either of the protocols 
	 *   "http://" and "https://"
	 * - port
	 * - errorMessage (displayed near the top of the popup dialog). Can be "".
	 * 
* The values stored in the properties HashMap for the above property are * initially displayed in the fields and the user can overwrite them. * This is useful in such cases where invalid values were entered and this * popup must be redisplayed to allow the user to correct their previous input. * @return the same HashMap Properties which was passed as parameter. */ protected static Properties showAuthenticationPopup(Properties properties) throws CancelledException { // Retrieve all the properties -- defaults to "" if any are null JTextField usernameField = new JTextField( properties.getProperty("username", "fedoraAdmin")); JTextField passwordField = new JPasswordField( properties.getProperty("password", "")); JTextField hostField = new JTextField( properties.getProperty("host", "localhost")); JTextField portField = new JTextField( properties.getProperty("port", "8080")); JPanel panel = new JPanel(new GridLayout(4,2)); panel.add(new JLabel("User Name")); panel.add(usernameField); panel.add(new JLabel("Password")); panel.add(passwordField); panel.add(new JLabel("Host")); panel.add(hostField); panel.add(new JLabel("Port")); panel.add(portField); String heading = "Fedora Server Admin Authentication:"; String errorMessage = properties.getProperty("errormessage", ""); if(!errorMessage.equals("")) { heading = "=> " + errorMessage + "\n\n" + heading; } int option = JOptionPane.showConfirmDialog(null, new Object[] { heading, panel}, "Enter Network Password", JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE); if (option == JOptionPane.OK_OPTION) { String fedoraServerUsername = usernameField.getText(); String fedoraServerPassword = passwordField.getText(); String host = hostField.getText(); String port = portField.getText(); properties.setProperty("username", fedoraServerUsername); properties.setProperty("password", fedoraServerPassword); properties.setProperty("host", host); properties.setProperty("port", port); } else { // Cancel option throw new CancelledException(); } return properties; } /** Init method that is called by the constructor to set some * important member variables including instantiating the APIA object * used to invoke the Fedora APIA web service operations. * @param protocol can be http or https * @param host is the name of the Fedora server host * @param port is the port number (String form) of the Fedora server * @param fedoraServerUsername is the user name to access the Fedora * Server * @param fedoraServerPassword is the password needed to access the * Fedora Server */ protected void init(String protocol, String host, String port, String fedoraServerUsername, String fedoraServerPassword) throws ParserConfigurationException, MalformedURLException, AuthenticationFailedException, RemoteException, Exception { // initialise member variables lang = ENGLISH; maxresults = Integer.MAX_VALUE; DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); builder = factory.newDocumentBuilder(); // (protocol is "" if host already contains protocol) if(!protocol.equals("") && !protocol.endsWith("://")) protocol += "://"; // now create baseURL = protocol://host:port/fedora this.baseURL = protocol + host + ":" + port + "/fedora"; // Get the FedoraAPIA handle to/stub of the Fedora web services // New way of instantiating connection to Fedora is dependent on // fewer files of FedoraClient.jar FedoraAPIAServiceLocator serviceLocator = new FedoraAPIAServiceLocator(fedoraServerUsername, fedoraServerPassword); APIA = null; boolean isUserSpecifiedPortAddressSuffix = false; // try any portAddressSuffix specified by the user if(!this.portAddressSuffix.equals("")) { isUserSpecifiedPortAddressSuffix = true; this.createAPIA(serviceLocator, this.portAddressSuffix, "user-specified", isUserSpecifiedPortAddressSuffix); } // If the user-specified portAddressSuffix failed or if there was none // given, then APIA will be null, so we will try with the default // portAddressSuffix. This time all exceptions will be passed on. if(APIA == null) { isUserSpecifiedPortAddressSuffix = false; this.createAPIA(serviceLocator, defaultPortAddressSuffix, "default", isUserSpecifiedPortAddressSuffix); } } /** Tries to create the FedoraAPIA instance using the serviceLocator * and the given portSuffix. The APIA instance is obtained for the * baseURL+portSuffix. Any exceptions are (processed and) rethrown * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the * Remote Exception from AXIS that it can't find the target service to * invoke is ignored so that the caller can retry with the default port- * address suffix first before giving up. */ protected void createAPIA(FedoraAPIAServiceLocator serviceLocator, String portSuffix, String messageInsert, boolean isUserSpecifiedPortAddressSuffix) throws Exception { //String portSuffix = (isUserSpecifiedPortAddressSuffix) ? // this.portAddressSuffix : defaultPortAddressSuffix; try { LOG.debug( "Trying to connect to Fedora using the given" + " baseURL and the " + messageInsert + " portAddress suffix:\n" + baseURL + portSuffix); APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP( new java.net.URL(baseURL+portSuffix)); // let's test whether we're authenticated (otherwise a // RemoteException will be thrown to indicate that the // password was incorrect.) RepositoryInfo repositoryInfo = APIA.describeRepository(); // throws RemoteException if pwd wrong or for other reasons // in which case describeRepository() service is unavailable this.fedoraVersion = repositoryInfo.getRepositoryVersion(); // If we come all the way here, no exceptions were thrown: this.portAddressSuffix = portSuffix; // store the one currently in use } catch(RemoteException re) { // if we're here, then APIA was unable to call the web service // If this was because the fedora authentication failed, then // let's throw a custom exception String message = re.getMessage().toLowerCase(); // Looking for something Unauthorized(401) if(message.indexOf("unauthorized") != -1 || message.indexOf("401") != -1) { throw new AuthenticationFailedException(); } else if(isUserSpecifiedPortAddressSuffix && re.getMessage().contains( FedoraGS3Exception.missingTargetService)) { LOG.warn("Failed to connect to Fedora APIA services at given" + " port address:\n" + portSuffix + "\nException: " + re.getMessage()); // APIA.describeRepository can throw a remote exception // whereby AXIS says the target service is missing and can't // be invoked (FedoraGS3Exception.missingTargetService) // Don't rethrow this, if AXIS can't find the user-specified // portAddressSuffix, we will try with the default suffix next APIA = null; } else { // if trying default portAddressSuffix or if any other // RemoteException was generated (whose cause is something // other than an authentication failure) rethrow it. throw re; } } catch(Exception e) { // Other Exceptions // Could possibly be a ServiceException when using ServiceLocator if(isUserSpecifiedPortAddressSuffix) { APIA = null; // we won't throw other exceptions yet until // we have tried the default PortAddressSuffix for the baseURL } else { throw new FedoraGS3InitFailureException(e); } } } /** Gets all greenstone collections. Searches for greenstone:*-collection. * Method getCollections() defaults to getting only those objects in fedora's * repository whose pids are of the format greenstone:*-collection. * The use of AutoFinder and findObjects is shown in * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java * The Fedora-APIA's method definition of findObjects is: *
	 * fedora-types:FieldSearchResult findObjects(
	 * 				fedora-types:ArrayOfString resultFields,
	 * 				xsd:nonNegativeInteger maxResults,
	 * 				fedora-types:FieldSearchQuery query )
	 * 
* @see The local fedora search page for how the search works * @see Fedora access API, API-A for method findObjects * @see XML type definition of FieldSearchQuery * @see Type definition of 2.2.1 FieldSearchQuery * @see does not apply: type definition of 2.1.1 FieldSearchQuery * @see BrowseController.java for an example * * @return an array of Strings containing the pids of all collections * matching the format greenstone:*-collection. */ public String[] getCollections() throws RemoteException { // Available constructors: // FieldSearchQuery(java.util.List conditions) // FieldSearchQuery(java.lang.String terms) final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION; FieldSearchQuery query = new FieldSearchQuery(); query.setTerms(queryStr); query.setConditions(null); // we'd like pid and title returned for each object // we pass maxResults=null to get all objects that match // (i.e. all collections) String[] pids = null; FieldSearchResult collection = AutoFinder.findObjects( APIA, new String[]{"pid", "title"}, maxresults, query); ObjectFields[] results = collection.getResultList(); pids = new String[results.length]; for(int i = 0; i < results.length; i++) { pids[i] = results[i].getPid(); } return pids; } /** All objects (incl "greenstone:*" objects) in fedora - be they collections, * top-level documents or document sections - have a DC datastream. This * method returns the content (XML) of the DC datastream as it is stored in * fedora's repository. * (The pid/DC call is one of the default fedora-system 3 disseminations.) * Try an example of the form: http://localhost:8080/fedora/get/<pid>/DC * To obtain the DC/any datastream, we use method getDatastreamDissemination() * of the interface FedoraAPIA. This method returns a MIMETypedStream. * The method signature is: * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime) * where dsID = itemID (look at datastreams page of running fedora instance) * To access the XML content of the MIMETypedObject returned, we use its method * bytes[] getStream(), but when instantiating a String from this, we have to * use the String() contructor where we can specify the charset encoding (in * this case, it must be UTF-8). Else getStream() returns gobbledygook. * @return a String version of the XML in the DC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * @see FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class * @see TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage. * @see constructor String(byte[], java.lang.String) * @see Charset.java, for character sets and encoding */ public String getDC(String pid) throws RemoteException, UnsupportedEncodingException { // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC // datastream ID, dsID = itemID, look at a running fedora MIMETypedStream dcStream = APIA.getDatastreamDissemination(pid, DC, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(dcStream.getStream(), UTF8); } /** All "greenstone:*" objects in fedora (be they collections be they * collections, top-level documents or document sections) have an EX * datastream. This method returns the content (XML) of the EX datastream as * is. (It calls the default fedora-system 3 dissemination <pid>/EX.) * @return a String version of the XML in the EX datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime). * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream(). * @see String getDC(String pid) throws Exception * */ public String getEX(String pid) throws RemoteException, UnsupportedEncodingException { MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(exStream.getStream(), UTF8); } /** Some "greenstone:*" top-level documents in the fedora repository (but not * greenstone collections or document sections) have a DLS metadata datastream. * This method returns the content (XML) of the DLS datastream as is. (It calls * the default fedora-system 3 dissemination <pid>/DLS.) * @return a String version of the XML in the DLS datastream for the fedora * object denoted by pid, or "" if the document given by pid has no DLS datastream. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime). * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream(). * @see String getDC(String pid) throws Exception * */ public String getDLS(String pid) throws RemoteException, UnsupportedEncodingException { MIMETypedStream dlsStream = null; // If there is no DLS datastream, it throws an exception (whose class // fedora.server.errors.DatastreamNotFoundException can't be imported // here (it's not in the client side fedora.server.* package, but on // the server side package of that name): try{ dlsStream = APIA.getDatastreamDissemination(pid, DLS, null); //asOfDateTime=null to get the current version of the dataStream } catch(RemoteException e) { //These two don't work: //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException")) //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException")) if(e.getMessage().contains("No datastream could be returned.") || e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException")) { // there is no DLS data stream for this document return ""; } else { // different problem, exception due to different cause throw(e); } } if(dlsStream == null) return ""; // need to set the charset encoding to UTF8 return new String(dlsStream.getStream(), UTF8); } /** All "greenstone:*" objects in fedora (be they collections or documents) * have a TOC datastream, unless they have only 1 section (SECTION1). * This method returns the content (XML) of the TOC datastream as is. * (Calls default fedora-system 3 dissemination <pid>/TOC.) * @return a String version of the XML in the TOC datastream for the fedora * object denoted by pid. * @param pid - the fedora persistent identifier for an item in the fedora * repository. * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID, * asOfDateTime) * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream() * @see String getDC(String pid) throws Exception * */ public String getTOC(String pid) throws RemoteException, UnsupportedEncodingException { try { MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null); //asOfDateTime = null to get the current version of the dataStream // need to set the charset encoding to UTF8 return new String(tocStream.getStream(), UTF8); } catch(RemoteException re) { // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1 return new String("
".getBytes(), UTF8); //set charset } } /** @return the <name>s (in greenstone:<name>-collection) * for the collections indicated by collPIDs. * @param collPIDs - an array of Strings denoting the pids for greenstone * collections stored in the fedora repositoryl. These should be of the * format "greenstone:<collectionName>-collection". */ public String[] getCollectionNames(String[] collPIDs) { String[] collNames = new String[collPIDs.length]; for(int i = 0; i < collPIDs.length; i++) collNames[i] = getCollectionName(collPIDs[i]); return collNames; } /** @return "greenstone:<name>-collection" for all <name>s * in the parameter collNames. * @param collNames - a list of names of greenstone collections * stored in the fedora repository. */ public String[] getCollectionPIDs(String[] collNames) { String[] collPIDs = new String[collNames.length]; for(int i = 0; i < collNames.length; i++) collPIDs[i] = getCollectionName(collNames[i]); return collPIDs; } /** @return greenstone:<name>-collection for the<name> * denoted by parameter collName. * @param collName - the name of a greenstone collection stored * stored in the fedora repository. */ public String getCollectionPID(String collName) { return GREENSTONE_+collName+_COLLECTION; } /** * Gets the title of the collection denoted by the given collection's pid by * retrieving the title metadata for it from the collection's EX datastream. * @return the title (in the default language, else English, else the * first title found) for the particular collection denoted by its PID. * @param collPID is the pid of a greenstone collection in the fedora * repository. */ public String getCollectionTitle(String collPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String title = null; // has to be null initially, we do a check on it // Parse the EX datastream (XML), and in its DOM, find the // title // There might be one OR several of those with attribute // name="collectionname". If there's only one, then get that. // If there are several, there would possibly a be qualifier attribute, // in which case get qualifier=lang (where lang is the member variable) // If there is no qualifier with the requested language, then get the // english one which is likely to be there, else return the title for // the first collectionname . MIMETypedStream exdata = APIA.getDatastreamDissemination(collPID, EX, null); String exStream = new String(exdata.getStream(), UTF8); InputSource source = new InputSource(new StringReader(exStream)); Document doc = builder.parse(source); Element docEl = doc.getDocumentElement(); // docEl= NodeList children = docEl.getChildNodes(); String firstName = ""; String englishName = ""; for(int i = 0; i < children.getLength(); i++ ) { Node n = children.item(i); if(n.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element)n; if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(COLLECTIONNAME)) { firstName = FedoraCommons.getValue(e); if(!e.hasAttribute(QUALIFIER)) { title = FedoraCommons.getValue(e); break; } else if(e.getAttribute(QUALIFIER).equals(lang)) { title = FedoraCommons.getValue(e); break; } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) { englishName = FedoraCommons.getValue(e); } } } } // if the title is still not set to that of the requested language, // then try setting it to the collection name in English. If English // isn't available, then set it to the first collection name provided // (in whichever language). if(title == null) { title = englishName.equals("") ? firstName : englishName; } doc = null; return title; } /** @return the collection titles for all the collections indicated by * collPIDs. * @param collPIDs - a list of pids identifying greenstone collections * stored in the fedora repository. */ public String[] getCollectionTitles(String[] collPIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String[] titles = new String[collPIDs.length]; // parse each EX datastream (XML) which contains the gs3-extracted meta. for(int i = 0; i < collPIDs.length; i++) { titles[i] = getCollectionTitle(collPIDs[i]); } return titles; } /** @return the title metadata for the given doc objects of a collection. * These titles are returned in the same order as the given docIDs. * (The docPIDs already contain the collection name anyway.) * @param docPIDs - a list of pids identifying documents stored in the * fedora repository. */ public String[] getDocTitles(String[] docPIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String[] titles = new String[docPIDs.length]; for(int i = 0; i < docPIDs.length; i++) { titles[i] = getDocTitle(docPIDs[i]); } return titles; } /** Gets the title metadata for a particular doc object in a collection * denoted by docPID. The docPID already contains the collection name. * @return the title for the fedora document item denoted by docPID * @param docPID is the pid of the document in the fedora repository * (docPID is of the form greenstone:<colName>-<doc-identifier> */ public String getDocTitle(String docPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // We need the extracted metadata file, and find its // documentElement's child // sometitle // where the title we return is sometitle String title = ""; MIMETypedStream exdata = APIA.getDatastreamDissemination(docPID, EX, null); String exStream = new String(exdata.getStream(), UTF8); return getTitle(exStream); } /** Given a string representation of a document's or document section's * EX datastream -- which is a greenstone extracted metadata XML file -- * of the form: * <ex> * <ex:metadata name="Title">sometitle</ex:metadata> * <ex:metadata name="...">....</ex:metadata> * ... * </ex> * This method finds the <ex:metadata> where the name="Title" and * returns the value embedded in that element ('sometitle' in * the example above). * @return the title metadata of the document/document section whose EX * datastream is passed as parameter * @param exStream the EX datastream in String form of the document or * document section. */ protected String getTitle(String exStream) throws SAXException, IOException { String title = ""; InputSource source = new InputSource(new StringReader(exStream)); Document doc = builder.parse(source); Element docEl = doc.getDocumentElement(); // docEl= NodeList children = docEl.getChildNodes(); // Cycle through all the *element* children of // which are all of the form: // somevalue // Find the one where name="Title", its value is the title for(int i = 0; i < children.getLength(); i++ ) { Node n = children.item(i); if(n.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element)n; if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE)) { title = FedoraCommons.getValue(e); break; } } } return title; } /** Used to obtain the dc:title value (hashID) of the DC stream of a digital * object whose fedoraID is of a special sort: greenstone-http:-id. */ protected String getDCTitle(String fedoraPID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String title = ""; MIMETypedStream dcdata = APIA.getDatastreamDissemination(fedoraPID, DC, null); if(dcdata == null || dcdata.equals("")) { return title; } String dcStream = new String(dcdata.getStream(), UTF8); InputSource source = new InputSource(new StringReader(dcStream)); Document doc = builder.parse(source); Element docEl = doc.getDocumentElement(); // docEl= NodeList children = docEl.getElementsByTagName("dc:title"); if(children != null && children.getLength() > 0) { Node n = children.item(0); // Element e = (Element)n; title = FedoraCommons.getValue(e); } return title; } /** @return the title metadata for the given document sections. * These titles are returned in the same order as the given docPIDs * and associated sectionIDs. * (The docPIDs already contain the collection name anyway.) * @param docPIDs - a list of pids identifying documents stored in the * fedora repository. * @param sectionIDs - a list of sectionIDs identifying individual sections * of documents stored in the fedora repository whose titles are requested. */ public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String[] titles = new String[docPIDs.length]; for(int i = 0; i < docPIDs.length; i++) { titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]); } return titles; } /** @return the title metadata for the given document section. * (The docPID already contain the collection name anyway.) * @param docPID - a pid identifying a document in the fedora repository. * @param sectionID - the sectionID of the section of the * document whose title is requested. */ public String getSectionTitle(String docPID, String sectionID) throws UnsupportedEncodingException, RemoteException, SAXException, IOException { String ex = this.getSectionEXMetadata(docPID, sectionID); return getTitle(ex); } /** Searches the fedora repository for all greenstone:<colPID>* and * returns the PIDs of the data objects found, with the exception of * greenstone:<colPID>-collection, which is not a document but a * collection PID. * That is, pids of objects whose pid is greenstone:<colName>* * (but not greenstone:<colName>-collection itself, because that represents * the collection and not an object of the same collection) are returned. * All pids that do not map to a collection are assumed to be documents! * @return a list of the pids of all the (doc) objects in a collection. * @param colPID is the pid of the greenstone collection stored in * the fedora repository. */ public String[] getCollectionDocs(String colPID) throws RemoteException { String colName = getCollectionName(colPID); //LOG.debug("colName: " + colName); // Search fedora objects for pid=greenstone:-* final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD; // searches for "greenstone:"+colName+"-*"; FieldSearchQuery query = new FieldSearchQuery(); query.setTerms(queryStr); query.setConditions(null); String[] pids = null; FieldSearchResult objects = AutoFinder.findObjects( APIA, new String[]{"pid", "title"}, maxresults, query); ObjectFields[] results = objects.getResultList(); // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because // that's not a document object: pids = new String[results.length-1]; // not storing collection object int index = 0; // keeps track of docPid index for(int i = 0; i < results.length; i++) { // check it's not a collection object if(!results[i].getPid().endsWith(_COLLECTION)) { pids[index] = results[i].getPid(); index++; } } return pids; } /** Given the pid of a document fedora data object, this method will return * all itemIDs that are part of that data object and are Sections. For further * information see interface Comparable (implemented by String), SortedSet * and TreeSet. * @return an array of itemIDs of the Sections of the document, * indicated by docPID, in ascending order. These are of the form: "SECTION1.*" * @param docPID is a fedora pid identifying a greenstone document object. * @see FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services * @see DatastreamDef.java * @see Fedora batch processing */ public String[] getSectionNames(String docPID) throws RemoteException { // DatastreamDef[] listDatastreams( // java.lang.String pid, java.lang.String asOfDateTime) // listDatastreams returns information on each item (including itemID=dsID) // in the document object indicated by docPID // Need to give an object version number, because null for asOfDateTime // does not return any datastreams! String[] times = APIA.getObjectHistory(docPID); DatastreamDef[] datastreams = APIA.listDatastreams( docPID, times[times.length-1]); // TreeSet is a SortedSet. We're going to put Strings into it, // and Strings implement interface Comparable already. TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator()) for(int i = 0; i < datastreams.length; i++) { String itemID = datastreams[i].getID(); if (itemID.startsWith("SECTION")) orderedList.add(itemID); } String[] sectionNames = new String[orderedList.size()]; orderedList.toArray(sectionNames); orderedList = null; return sectionNames; } /** Given the pid of a document fedora data object, this method will return all * itemIDs that are part of that data object and are Sections, but just the * Section numbers are returned. For further information see interface Comparable * (implemented by String), SortedSet and TreeSet. * @return an array of itemIDs of the Section numbers of the document * indicated by docPID, in ascending order. Return values are of form: "1.*". * @param docPID is a fedora pid identifying a greenstone document object. * @see FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services * @see DatastreamDef.java * @see Fedora batch processing */ public String[] getSectionNumbers(String docPID) throws RemoteException { String[] times = APIA.getObjectHistory(docPID); DatastreamDef[] datastreams = APIA.listDatastreams(docPID, times[times.length-1]); //Vector v = new Vector(datastreams.length); TreeSet orderedList = new TreeSet(); for(int i = 0; i < datastreams.length; i++) { String itemID = datastreams[i].getID(); if (itemID.startsWith("SECTION")) { //int index = SECTION.length(); //itemID = itemID.substring(index); itemID = removePrefix(itemID, SECTION); orderedList.add(itemID); } } String[] sectionNumbers = new String[orderedList.size()]; orderedList.toArray(sectionNumbers); orderedList = null; return sectionNumbers; } /** @return the titles for the document sections denoted by the parameters. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionIDs is a list of identifiers identifying sections in the * document denoted by docPID, whose titles need to be returned. Each * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1) * or a section number (eg. 1.5.1). */ public String[] getTitles(String docPID, String[] sectionIDs) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String[] titles = new String[sectionIDs.length]; for(int i = 0; i < titles.length; i++) titles[i] = getTitle(docPID, sectionIDs[i]); return titles; } /** @return the title for the document section denoted by the parameters. * @param docPID is a fedora pid identifying a greenstone document object. * @param sectionID identifies the particular section in the document denoted * by docPID, whose title needs to be returned. The sectionID may be either a * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */ public String getTitle(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // Compose the itemID for the EX data stream from the number in the // sectionID: String exID = removePrefix(sectionID, SECTION); exID = EX+convertToMetaNumber(exID); // Retrieve the extracted metadata stream (EX, in XML) for the given // section String exStream = getItem(docPID, exID); // Extract the title from the XML, look for: // title InputSource source = new InputSource(new StringReader(exStream)); Document doc = builder.parse(source); Element docEl = doc.getDocumentElement(); // docEl= NodeList children = docEl.getElementsByTagName( EX.toLowerCase()+COLON+METADATA); // for(int i = 0; i < children.getLength(); i++) { Element e = (Element)children.item(i); if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE)) return FedoraCommons.getValue(e); // extract and return the title } return ""; // if we got here, then we couldn't find a title } /** @return the section's XML (as a String) as it is stored in fedora. * Works out if sectionID is a sectionName or sectionNumber. * @param docPID - a fedora pid identifying a greenstone document object. * @param sectionID - identifyies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSection(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException { if(!sectionID.startsWith(SECTION)) // then it has only section number sectionID = SECTION+sectionID; String sectionXML = this.getItem(docPID, sectionID); return sectionXML; } /** @return the required section's DC metadata XML datastream. * @param docPID - a fedora pid identifying a greenstone document object. * @param sectionID - identifyies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSectionDCMetadata(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException { String dcID = removePrefix(sectionID, SECTION); // ensure we have just the section number dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number // now get the DC datastream for that number String dcXML = this.getItem(docPID, dcID); return dcXML; } /** Returns the section EX metadata XML datastream for SectionID which may be * a section name or number. Currently a few EX files are named awkwardly: * the EX file for section 1.* is actually associated with datastream EX.*. * But subsequent EX datastreams are named appropriately: for instance, * EX2.1.1 matches with section 2.1.1 * @return the required section's EX metadata XML datastream. * @param docPID - a fedora pid identifying a greenstone document object. * @param sectionID - identifyies the particular section in the * document denoted by docPID, may be a section name or number. */ public String getSectionEXMetadata(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException { String exID = removePrefix(sectionID, SECTION); exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number // now get the EX datastream for that for number String exXML = this.getItem(docPID, exID); return exXML; } /** Given a documentNode element, adds the nodetype attribute to all of its * docNode descendants. The nodetype is either Root, Internal or Leaf to indicate * whether the docnode is a toplevel document Node, or has children or has none. * @param e - the documentNode element whose descendants' nodetypes will be set * at method's end. */ protected void addNodeTypeToDescendants(Element e) { NodeList sections = e.getElementsByTagName(SECTION_ELEMENT); for(int i = 0; i < sections.getLength(); i++) { Element section = (Element)sections.item(i); NodeList descendants = section.getElementsByTagName(SECTION_ELEMENT); if(descendants.getLength() > 0) { // if there are any descendants (which includes children) that are SECTIONS section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL); } else { section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF); } } } /** @return the part of the TOC XML file (which outlines doc structure) * relating to the given section. This includes the section denoted by * sectionID as well as all descendent subsections thereof. * @param docPID - a fedora pid identifying a greenstone document object. * @param sectionID - identifyies the particular section in the * document denoted by docPID, may be a section name or number. * @param structure can contain any combination of: ancestors, parent, * siblings, children, descendants, entire, specifying the portion of * the structure to retrieve. * @param info can contain any combination of: siblingPosition, numSiblings, * numChildren, documentType, requesting additional information about the structure. */ public Element getSectionStructureXML(String docPID, String sectionID, String structure, String info) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // get the TableOfContents (TOC) XML datastream as a String String xmlTOC = getTOC(docPID); // convert it into a DOM document InputSource source = new InputSource(new StringReader(xmlTOC)); Document doc = builder.parse(source); // toplevel element docEl =
Element docEl = doc.getDocumentElement(); addNodeTypeToDescendants(docEl); docEl.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT); if(structure.indexOf("entire") != -1) { // don't need to find the specific section, doc root is what's required docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info); return docEl; } if(sectionID.equals("")) { sectionID = "1"; } // Store just the number String sectionNumber = removePrefix(sectionID, SECTION); // Check whether we're requested to return the toplevel element itself // If sectionNumber=1, then the top-level element/document element // of the TOC XML is requested, so return the TOC as is. if(sectionNumber.equals("1") && structure.indexOf("descendants") != -1) { docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info); return docEl; } // if the root is the section required, return that if(docEl.getTagName().equals(SECTION_ELEMENT) && docEl.getAttribute(ID).equals(sectionNumber)) { Element substructure = getSubstructure(docEl, structure); return getStructureInfo(substructure.getOwnerDocument(), docEl, info); //return docEl; } // Else, get all
elements and find the //
and return that NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT); for(int i = 0; i < sections.getLength(); i++) { Element e = (Element)sections.item(i); if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) { Element substructure = getSubstructure(e, structure); return getStructureInfo(substructure.getOwnerDocument(), e, info); } } return null; // not found } /** Implements browsing document titles of a greenstone collection stored in * the fedora repository by letter. * @return the document pids whose titles start with the given letter. * @param collName - the name of the collection. * @param letter - the starting letter to browse by. */ public String[] browseTitlesByLetter(final String collName, final String letter) throws RemoteException, FedoraVersionNotSupportedException { String[] pids = null; // We want to do the following kind of search (assuming letter=f // and collName=demo): // pid~greenstone:demo* title~f* // We don't need to normalise the letter first (to search titles starting // with both uppercase and lowercase versions of the letter), because // Fedora always searches for both. // HOWEVER, searching for title~f* returns all documents containing f (or F) // ANYWHERE in their titles! // SOLUTION: search the collection for all titles containing f as given, // retrieving pid and title fields. Then from the list of results, select // only those titles that start with the given letter. // This may seem an unnecessarily cumbersome job (when it looked like it // should have worked with just title~f*), BUT, at least the resulting // documents will be reduced to a set of titles containing f; rather than // having to search *all* documents in the collection. final String title = letter+WILDCARD; FieldSearchResult objects = findObjectsWithTitlesContaining( collName, title); ObjectFields[] results = objects.getResultList(); TreeSet v = new TreeSet(); // TreeSet to return the results in //alphabetical order for(int i = 0; i < results.length; i++) { // from the result list, select those titles that don't // just *contain* the letter, but actually start with it: String resultTitle = results[i].getTitle(0); if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) { String pid = results[i].getPid(); // skip the collection object itself if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) { v.add(pid); //LOG.debug(resultTitle); } } } pids = new String[v.size()]; v.toArray(pids); return pids; } /** Implements querying document DC titles of a greenstone collection stored in * the fedora repository for a term that may occur anywhere in their titles. * @return the document pids whose DC titles contain the parameter term. * @param titleContents - the word or phrase to search the collection's * document titles for. Only one word, and this method finds Greenstone * DOCUMENT titles CONTAINING that word (if any). * @param startsWith - if true, searches for titles that start with * titleContents. Else it searches for titles that contain titleContents. */ public String[] searchDocumentTitles(String collName, String titleContents, boolean startsWith) throws RemoteException, FedoraVersionNotSupportedException { String[] pids = null; // We want to do the following kind of search (when written in Fedora's // REST format - see http://localhost:8080/fedora/search): // pid~greenstone:-* title~<1st word of titleContents> // We don't need to normalise the word first (to search titles starting // with both uppercase and lowercase versions of it), because // Fedora always searches for the normalised word. // 2 difficulties: // - We can only search for single words with Fedora's Conditional Search. // Obtain pids and titles of documents containing the first word and then // we filter the titles to those containing the entire phrase of // titleContents. // - Searching for title~FirstWord returns all documents containing // this word ANYWHERE in their titles. If parameter startsWith is false, // then this is fine. But if parameter startsWith is true, then go // through all the resulting titles found (containing FirstWord), select // only pids of those titles that contain the entire phrase titleContents final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD; int indexOfFirstSpace = titleContents.indexOf(' '); // check for space // if titleContents is a phrase (contains space), then it's not // a single word, in which case search for just the first word String title = titleContents; // assume it's a single word if(indexOfFirstSpace != -1) // if not single word but a phrase, store title = titleContents.substring(0, indexOfFirstSpace); // 1st word FieldSearchResult objects = findObjectsWithTitlesContaining( collName, title); if(objects == null) { final String[] empty = {}; return empty; } // Go through all the titles found and for those that match the criteria*, // store their pid. *Criteria: titles that start with OR contain the // word OR phrase of titleContents. ObjectFields[] results = objects.getResultList(); Vector v = new Vector(); // return pids in the order found for(int i = 0; i < results.length; i++) { // from the result list, select those titles that don't // just *contain* the first word, but the entire phrase of // words in titleContents: String resultTitle = results[i].getTitle(0); boolean accepted = false; // accept the resultTitle found String resultPID = results[i].getPid(); // skip the collection object itself, since it's not a document if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) { accepted = false; } // if titleContents is a single word and we are checking // whether resultTitle contains titleContents: else if(indexOfFirstSpace == -1) { // titleContents is a single word if(!startsWith) // titles that *contain* the word titleContents accepted = true; //accept all titles found // else startWith: accept titles starting with word titleContents else if (resultTitle.toLowerCase().startsWith( titleContents.toLowerCase())) accepted = true; } else { // otherwise, titleContents is a phrase of >1 word, need // to check that the result title contains the entire phrase if(startsWith && resultTitle.toLowerCase().startsWith( titleContents.toLowerCase())) accepted = true; else if(!startsWith && resultTitle.toLowerCase().contains( titleContents.toLowerCase())) accepted = true; } // if the resultTitle fit the criteria, store its pid if(accepted) { v.add(resultPID); //System.out.println(resultTitle); } } pids = new String[v.size()]; v.toArray(pids); return pids; } /** * @param collName - the collection of documents we'll be searching in. * @param titleWord - the word we'll be searching the document titles for. * (Fedora's search returns all objects whose title contains that word). * * Two kinds of search are provided by Fedora as stated in FedoraAccess.java * (see link): *
	 * "There are two search methods: a search on all fields or a search on
	 * specific fields. To search all fields the setTerms function of the 
	 * FieldSearchQuery must be used, with the paramater being the desired string.
	 * 
	 * To search by specific fields, you must create an array of Condition
	 * objects. Each condition consists of three parts: 
	 * the field to be searched (.setProperty()), 
	 * the operation to be used (.setOperator(ComparisonOperator. <operator>)), 
	 * and the search string (.setValue())"
	 * 
* We want to use the second search method above when browsing and searching, * and search for: pid~greenstone:<collName>* title~<letter>* * or pid~greenstone:<collName>* title~<first word of search phrase> * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java. * * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based * web services are defined. (The web.xml defines the "Servlets for REST-based * interfaces to the Fedora Repository Server"). * Do a search on the word "search": * fedora.server.access.FieldSearchServlet is the class we need to look at * It accesses a different Condition.java class: fedora.server.search.Condition.java * The above is what is used by the REST-based interface in FieldSearchServlet.java * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java * is what's used in the fedora client application that makes use of * the SOAP-based interface. * * @see FedoraAccess.java * @see MaAPI * @see Fedora server type Condition.java * @see Fedora client test Search.java */ protected FieldSearchResult findObjectsWithTitlesContaining( String collName, final String titleWord) throws RemoteException, FedoraVersionNotSupportedException { // Searching for pids of the form "greenstone:gs2mgdemo-*"; final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD; Condition[] conditions = new Condition[2]; conditions[0] = new Condition("pid", ComparisonOperator.has, pid); conditions[1] = new Condition("title", ComparisonOperator.has, titleWord); FieldSearchQuery query = new FieldSearchQuery(); query.setConditions(conditions); // We'd like pid and title returned for each object, because we'll make // use of title. We pass maxResults=null to get all objects that match // (i.e. all collections). FieldSearchResult objects = null; final String[] retrieveFields = {"pid", "title"}; try { objects = AutoFinder.findObjects( APIA, retrieveFields, maxresults, query); // collection = APIA.findObjects(new String[]{"pid", "title"}, // new NonNegativeInteger(Integer.toString(maxresults)), query); } catch(RemoteException ex) { if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) { // fedoraVersion is too low, searching/browsing is not possible // (because class Condition has changed after 2.0, from 2.1.1 // onwards) throw new FedoraVersionNotSupportedException(fedoraVersion); } else { LOG.error( "Remote exception when calling web service operation " + "findObject() to execute search:\n" + ex.getMessage()); ex.printStackTrace(); throw ex; } } return objects; // return the FieldSearchResult objects found } /** @return the <docName> in the parameter docPID (which is of the form: * greenstone:<colname>-<docName>) * @param docPID - pid of a greenstone document in the fedora repository. */ public String getDocName(String docPID) { return docPID.substring(docPID.indexOf('-')+1); } /** @return the <name> in the parameter collPID * (greenstone:<name>-collection) * If collPID is a docPID, this method does the same: return the <name> * in the docPID (greenstone:<name>-docID). * @param collPID - pid of a greenstone collection in the fedora repository. */ public String getCollectionName(String collPID) { return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-')); } /** Return the TOC substructure requested * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * @param e - the element to start copying from and whose structure is requested. * @param structure - a string containing any combination of the values: * ancestors, parent, siblings, children, descendants, * specifying the portion of the structure to retrieve. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getSubstructure(Element original, String structure) { Document doc = builder.newDocument(); boolean descendants = (structure.indexOf("descendants") != -1) ? true : false; Node current = doc.importNode(original, descendants); // descendants=true: import/copy descendants. // Else, copy just current node original (later copy its direct children) Node parentOfCurrent = null; Node parentOfOriginal = original.getParentNode(); if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot) parentOfOriginal = null; } if(parentOfOriginal == null) { // no parentNode, so current is the root node. // can't get ancestors/parent/siblings, since all these need parentNode doc.appendChild(current); } else { // siblings, ancestors and parent requests all require parent node to exist // First check if we need to get ancestors, else for whether parent is required if(structure.indexOf("ancestors") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); Node child = null; Node parent = parentOfCurrent; // the copy Node n = parentOfOriginal.getParentNode(); // the doc to copy from while(n != null && n != original.getOwnerDocument()) { child = parent; parent = doc.importNode(n, false); // no descendants parent.appendChild(child); n = n.getParentNode(); } doc.appendChild(parent); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else if(structure.indexOf("parent") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); //parentOfCurrent.appendChild(current); doc.appendChild(parentOfCurrent); } // a request for siblings is independently tested for if(structure.indexOf("siblings") != -1) { // only import parent if we didn't already import // it for a request for ancestors or parent if(parentOfCurrent == null) { parentOfCurrent = doc.importNode(parentOfOriginal, false); doc.appendChild(parentOfCurrent); // this becomes the root } // now the siblings of current (children of parentOfCurrent) NodeList children = parentOfOriginal.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if((Element)n != original) { // skip original which was already imported Node child = doc.importNode(n, false); // no descendants parentOfCurrent.appendChild(child); } else { // already imported Current element, insert at this position parentOfCurrent.appendChild(current); } } } } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests // (sibling request adds the current node into a particular position) parentOfCurrent.appendChild(current); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else { // when only children or descendants were requested, current becomes root document doc.appendChild(current); } } // if we are not recursively copying all descendants, then copy just // the childnodes of current: if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children // get e's children and copy them into the new document NodeList children = original.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { // create copy Node n = doc.importNode(children.item(i), false); // attach it to parent current.appendChild(n); // Now we need to indicate whether this new node (child) is a leaf // or not. (This is necessary for getChildrenOfSection(), else // it's hard to know if the children are leaves or have further // subsections. if(n.getNodeName().equals(SECTION_ELEMENT)) { // we're dealing only with section children // Check if the matching original had children: Element originalsChild = (Element)children.item(i); NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT); if(grandchildren.getLength() > 0) { // original's child has children, so indicate this // in the copied child: Element child = (Element)current; // child.setAttribute(TYPE, INTERNAL_NODE); } } } } return doc.getDocumentElement(); } /** Return the TOC substructure with the requested structural info. * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * Returns null if the element, e, passed in is null. * @param doc - the new document into whose root element the structural information * will be inserted as attributes. * @param e - the element to start copying from and whose structure is requested. * @param info - a string containing any combination of the values: numChildren, * numSiblings, siblingPosition. The requested info gets added as attributes to * the returned root element. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getStructureInfo(Document doc, Element e, String info) { if(e == null) { return null; } Element root = doc.getDocumentElement(); if(!info.equals("")) { if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1 || info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength(); int numChildren = 0; NodeList children = e.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { numChildren++; } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN, Integer.toString(numChildren)); } if(info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //String docType = (numChildren > 0) ? "hierarchy" : "simple"; String docType = "hierarchy"; root.setAttribute(AbstractBasicDocument.INFO_DOC_TYPE, docType); } } if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings int numSiblings = 0; int siblingPosition = 0; Node parent = e.getParentNode(); if(parent == null) { numSiblings = 0; siblingPosition = 1; } else { //numSiblings = parent.getChildNodes().getLength(); NodeList siblings = parent.getChildNodes(); for(int i = 0; i < siblings.getLength(); i++) { Node n = siblings.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if(e == (Element)n) { siblingPosition = numSiblings+1; } else { // count every sibling section element, except e itself numSiblings++; } } } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_SIBS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_SIBS, Integer.toString(numSiblings)); } if(info.indexOf(AbstractBasicDocument.INFO_SIB_POS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_SIB_POS, Integer.toString(siblingPosition)); } } } return root; } /** * Return a datastream of a document, given the document's id * and the item id of the datastream which is to be retrieved. * @return the XML (in String form) of the item denoted by itemID * that's part of the fedora data object denoted by docPID. * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3 * Can't retrieve images denoted by itemID using this method, only items * that are of XML format. * @param docPID - pid of a greenstone document in the fedora repository. * @param itemID - the itemID of a datastream of the fedora object * identified by docPID. */ protected String getItem(String docPID, String itemID) throws RemoteException, UnsupportedEncodingException { // MIMETypedStream getDatastreamDissemination( // String pid, String dsID, asOfDateTime) MIMETypedStream datastream = APIA.getDatastreamDissemination(docPID, itemID, null); return new String(datastream.getStream(), UTF8); } /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method * returns "1.2.1". * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1, * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1". * However, the string str is returned unchanged if the prefix does not occur * at the start of str. * @return the String parameter str without the prefix. * It can be used to return the number of an itemID of a greenstone document * stored in the fedora repository without the given prefix. * @param prefix - the prefix which ought to be removed from the itemID. * @param str - the value of the itemID. */ protected String removePrefix(String str, String prefix) { // do nothing in those cases where the prefix is not in param str if(!str.startsWith(prefix)) return str; // otherwise: if(prefix.equals(EX+".") || prefix.equals(DC+".")) { return "1" + str.substring(prefix.length()); } else { return str.substring(prefix.length()); } } /** Given a number of the form x(.y.z), this method returns this number * as is, except when x = 1, in which case, it would return .y.z * That is, given number=3.2.1, this method would return 3.2.1 * But, given number=1.2.3, this method would return .2.3. * When number=1, it is NOT a special case: "" is returned as explained. * @param number - a proper (fedora-greenstone document) section number * @return the same number as it ought to be for the associated EX, DC datastreama. */ protected String convertToMetaNumber(String number) { if(number.startsWith("1.") || number.equals("1")) return number.substring(1); // remove the first char: the initial '1' else return number; } /** @return fedora's baseURL. It's of the form * "http://localhost:8080/fedora" */ public String getBaseURL() { return baseURL; } /** @return the portAddressURL (in use) of the Fedora APIA * web service (should be the endpoint location in the APIA's * WSDL file). * It's usually of the form baseURL+"/services/access" */ public String getPortAddressURL() { return this.baseURL + this.portAddressSuffix; } /** @return the baseURL for gsdlAssocFiles */ public String getAssocFileBaseURL() { return baseURL + "/get/"; } public static void main(String args[]) { try { FedoraConnection fedoraCon = new FedoraConnection(new File("fedoraGS3.properties")); String[] pids = null; pids = fedoraCon.getCollections(); String[] titles = fedoraCon.getCollectionTitles(pids); for(int i = 0; i < pids.length; i++) { System.out.println("extracted title:" + titles[i]); String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]); String[] docTitles = fedoraCon.getDocTitles(docPIDs); for(int j = 0; j < docPIDs.length; j++) { System.out.println("\tExtr doc title: " + docTitles[j]); } } String PID = "greenstone:gs2mgdemo-collection"; String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1"; String dcXML = fedoraCon.getDC(PID); String exXML = fedoraCon.getEX(PID); String tocXML = fedoraCon.getTOC(docPID); System.out.println("Dublin Core Metadata for " + PID + " is:\n" + dcXML); System.out.println("GS3 extracted metadata for " + PID + " is:\n" + exXML); System.out.println("Table of Contents for " + docPID + " is:\n" + tocXML); String[] sectionNames = fedoraCon.getSectionNames(docPID); System.out.println("\nSection names for " + docPID + " are:"); for(int i = 0; i < sectionNames.length; i++) System.out.println(sectionNames[i]); String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID); //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers); String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames); System.out.println("\nSection numbers for " + docPID + " are:"); for(int i = 0; i < sectionNumbers.length; i++) { //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]); System.out.println(sectionNames[i] + " " + sectionTitles[i]); } String sectionID = "SECTION1"; //SECTION1.5 System.out.println("\n"); System.out.println( "browsing greenstone's gs2mgdemo collection by (first) letter F:"); pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f"); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println( "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:"); pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println("\nDone - exiting."); System.exit(0); } catch(RemoteException re) { System.out.println("Remote Exception when calling web service operation\n" + re.getMessage()); re.printStackTrace(); } catch(Exception e) { System.out.println("Unable to instantiate FedoraConnection\n" + e); e.printStackTrace(); //LOG.error("Unable to instantiate FedoraConnection\n" + e, e); } } }