[15222] | 1 | /**
|
---|
| 2 | *#########################################################################
|
---|
| 3 | * FedoraConnection.java - works with the demo-client for Greenstone 3,
|
---|
| 4 | * of the Greenstone digital library suite from the New Zealand Digital
|
---|
| 5 | * Library Project at the * University of Waikato, New Zealand.
|
---|
| 6 | * <BR><BR>
|
---|
| 7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
| 8 | * <BR><BR>
|
---|
| 9 | * This program is free software; you can redistribute it and/or modify
|
---|
| 10 | * it under the terms of the GNU General Public License as published by
|
---|
| 11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 12 | * (at your option) any later version.
|
---|
| 13 | * <BR><BR>
|
---|
| 14 | * This program is distributed in the hope that it will be useful,
|
---|
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 17 | * GNU General Public License for more details.
|
---|
| 18 | *########################################################################
|
---|
| 19 | */
|
---|
| 20 |
|
---|
| 21 | package org.greenstone.fedora.services;
|
---|
| 22 |
|
---|
| 23 |
|
---|
| 24 | import fedora.client.utility.AutoFinder;
|
---|
| 25 | import fedora.server.access.FedoraAPIAServiceLocator;
|
---|
| 26 | // The object for accessing FedoraAPI-A web services:
|
---|
| 27 | import fedora.server.access.FedoraAPIA;
|
---|
| 28 |
|
---|
| 29 | // The definitions for all complex fedora types:
|
---|
| 30 | import fedora.server.types.gen.MIMETypedStream;
|
---|
| 31 | import fedora.server.types.gen.RepositoryInfo;
|
---|
| 32 | import fedora.server.types.gen.FieldSearchResult;
|
---|
| 33 | import fedora.server.types.gen.FieldSearchQuery;
|
---|
| 34 | import fedora.server.types.gen.DatastreamDef;
|
---|
| 35 | import fedora.server.types.gen.ObjectFields;
|
---|
| 36 | import fedora.server.types.gen.Condition;
|
---|
| 37 | import fedora.server.types.gen.ComparisonOperator;
|
---|
| 38 | //import fedora.server.types.gen.*;
|
---|
| 39 |
|
---|
| 40 | import javax.net.ssl.SSLHandshakeException;
|
---|
| 41 | import java.net.ConnectException;
|
---|
| 42 | import org.xml.sax.SAXException;
|
---|
| 43 | import java.io.UnsupportedEncodingException;
|
---|
| 44 | import java.io.IOException;
|
---|
| 45 | import javax.xml.parsers.ParserConfigurationException;
|
---|
| 46 | import java.net.MalformedURLException;
|
---|
| 47 | import java.rmi.RemoteException;
|
---|
| 48 |
|
---|
| 49 | import java.io.StringReader;
|
---|
| 50 | import java.io.FileInputStream;
|
---|
| 51 | import java.io.File;
|
---|
| 52 | import java.util.TreeSet;
|
---|
| 53 | import java.util.Properties;
|
---|
| 54 | import java.util.Vector;
|
---|
| 55 |
|
---|
| 56 | import java.awt.GridLayout;
|
---|
| 57 | import javax.swing.JLabel;
|
---|
| 58 | import javax.swing.JOptionPane;
|
---|
| 59 | import javax.swing.JPanel;
|
---|
| 60 | import javax.swing.JPasswordField;
|
---|
| 61 | import javax.swing.JTextField;
|
---|
| 62 |
|
---|
| 63 | import org.apache.log4j.Logger;
|
---|
| 64 | import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
|
---|
| 65 | import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
|
---|
| 66 | import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
|
---|
| 67 | import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
|
---|
| 68 | import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
|
---|
| 69 |
|
---|
| 70 | import javax.xml.parsers.DocumentBuilderFactory;
|
---|
| 71 | import javax.xml.parsers.DocumentBuilder;
|
---|
| 72 | import javax.xml.transform.*;
|
---|
| 73 |
|
---|
| 74 | import org.xml.sax.InputSource;
|
---|
| 75 | import org.w3c.dom.Document;
|
---|
| 76 | import org.w3c.dom.Element;
|
---|
| 77 | import org.w3c.dom.NodeList;
|
---|
| 78 | import org.w3c.dom.Node;
|
---|
| 79 |
|
---|
| 80 | /** Class that establishes a connection with Fedora's web services (via
|
---|
| 81 | * Java stub classes for the same) and then provides methods to retrieve
|
---|
| 82 | * Greenstone-specific data, such as the TOC, EX, DC,and Section
|
---|
| 83 | * datastreams of the Greenstone documents stored in Fedora's repository.
|
---|
| 84 | * These datastreams are returned as Strings without any changes being
|
---|
| 85 | * made to them.
|
---|
| 86 | * @author ak19
|
---|
| 87 | */
|
---|
| 88 | public class FedoraConnection implements FedoraGS3DL {
|
---|
| 89 | /** The logging instance for this class */
|
---|
| 90 | private static final Logger LOG = Logger.getLogger(
|
---|
| 91 | FedoraConnection.class.getName());
|
---|
| 92 |
|
---|
| 93 | /** The version of fedora that is supported by class FedoraConnection */
|
---|
| 94 | protected static final String SUPPORTED_VERSION = "2.2.1";
|
---|
| 95 |
|
---|
| 96 | /* Some fixed strings of known literals */
|
---|
| 97 | protected static final String TYPE = "type";
|
---|
| 98 | protected static final String INTERNAL_NODE = "internalNode";
|
---|
| 99 | protected static final String GET= "/get/";
|
---|
| 100 |
|
---|
| 101 | // The DemoSOAPClient declares and uses the following as a static member
|
---|
| 102 | // Probably none of the APIA methods (web service methods) remembers
|
---|
| 103 | // state, that might explain why we can use it as a static member then.
|
---|
| 104 | /** The object used to access the Fedora API-A web service methods */
|
---|
| 105 | protected static FedoraAPIA APIA;
|
---|
| 106 |
|
---|
| 107 | /** Version of the running fedora server */
|
---|
| 108 | protected String fedoraVersion;
|
---|
| 109 | /** The location of the fedora server, usually of the form
|
---|
| 110 | * http://localhost:8080/fedora */
|
---|
| 111 | protected String baseURL;
|
---|
| 112 |
|
---|
| 113 | /** The user-specified portAddressSuffix of the Fedora Access web services
|
---|
| 114 | * (endpoint URL in the WSDL), usually of the form
|
---|
| 115 | * http://localhost:8080/fedora/services/access
|
---|
| 116 | * Users can tell FedoraGS3 to try accessing that first by setting
|
---|
| 117 | * the "port.address.suffix" property in the properties file.
|
---|
| 118 | * FedoraGS3 itself will not write the portAddressSuffix currently used in
|
---|
| 119 | * the file for next time, but leave whatever value was entered in the
|
---|
| 120 | * properties file. The portAddress--not just suffix--currently in use (once
|
---|
| 121 | * the FedoraAPIA handle has been instantiated) can be obtained through
|
---|
| 122 | * getPortAddressURL() method. */
|
---|
| 123 | protected String portAddressSuffix;
|
---|
| 124 |
|
---|
| 125 | /** The part of the portAddress that comes after the baseURL. It is usually:
|
---|
| 126 | * "/services/access" */
|
---|
| 127 | protected static final String defaultPortAddressSuffix = "/services/access";
|
---|
| 128 |
|
---|
| 129 | /** The preferred language of the displat content */
|
---|
| 130 | protected String lang;
|
---|
| 131 | /** The maximum number of collections to retrieve */
|
---|
| 132 | protected int maxresults;
|
---|
| 133 | /** DocumentBuilder used to create and parse XML documents */
|
---|
| 134 | protected DocumentBuilder builder;
|
---|
| 135 |
|
---|
| 136 | /** Static method that returns the version of Fedora supported by this
|
---|
| 137 | * class FedoraConnection. */
|
---|
| 138 | public static String getSupportedVersion() { return SUPPORTED_VERSION; }
|
---|
| 139 | /** The version of the running Fedora server, which may or may not
|
---|
| 140 | * match the supported version. */
|
---|
| 141 | public String getFedoraVersion() { return fedoraVersion; }
|
---|
| 142 |
|
---|
| 143 | /** @return the default language used to query for titles (and anything else
|
---|
| 144 | * where there are multiple language options). Upon initialisation, this
|
---|
| 145 | * defaults to English. */
|
---|
| 146 | public String getLanguage() { return lang; }
|
---|
| 147 |
|
---|
| 148 | /** Sets the the default language used to query for titles (and anything else
|
---|
| 149 | * where there are multiple language options). If the default language for any
|
---|
| 150 | * query is not available, then English ("en") is used. If that's not available
|
---|
| 151 | * then the first other available language is used.
|
---|
| 152 | * @param lang - the two-letter language code to set the default language to.
|
---|
| 153 | */
|
---|
| 154 | public void setLanguage(String lang) { this.lang = lang; }
|
---|
| 155 |
|
---|
| 156 | /** The default maximum number of search results returned for a search. Upon
|
---|
| 157 | * initialisation, this defaults to Java's Integer.MAX_VALUE. */
|
---|
| 158 | public int getMaxResults() { return maxresults; }
|
---|
| 159 |
|
---|
| 160 | /** Set the default maximum number of search results returned for a search.
|
---|
| 161 | * @param maxresults - the new default maximum number of search results to
|
---|
| 162 | * be returned. */
|
---|
| 163 | public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
|
---|
| 164 |
|
---|
| 165 | /** Code for this constructor is from DemoSOAPClient.java.
|
---|
| 166 | * Instantiates the APIA handle using the protocol, host, port, fedora
|
---|
| 167 | * server repository username and password.
|
---|
| 168 | * @param host - the fedora server host (may be prefixed with http:// or
|
---|
| 169 | * https:// if parameter protocol is empty). If there's no protocol, and
|
---|
| 170 | * no protocol prefixed to the host, then the protocol defaults to http.
|
---|
| 171 | * @param protocol - either http or https (or empty "")
|
---|
| 172 | * @param port - the port on which fedora is running.
|
---|
| 173 | * @param fedoraServerUsername - the administrator username required to
|
---|
| 174 | * access the fedora server's repository. ("fedoraAdmin" unless changed).
|
---|
| 175 | * @param fedoraServerPassword - the fedora server repository's
|
---|
| 176 | * administrator password. If none was set on fedora installation, this
|
---|
| 177 | * can be empty (""). */
|
---|
| 178 | public FedoraConnection(String protocol, String host, int port,
|
---|
| 179 | String fedoraServerUsername, String fedoraServerPassword)
|
---|
| 180 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 181 | SSLHandshakeException, RemoteException, AuthenticationFailedException,
|
---|
| 182 | NotAFedoraServerException, ConnectException, Exception
|
---|
| 183 | {
|
---|
| 184 | try {
|
---|
| 185 | init(protocol, host, Integer.toString(port),
|
---|
| 186 | fedoraServerUsername, fedoraServerPassword);
|
---|
| 187 | } /*catch(RemoteException re) { //subclass of IOException
|
---|
| 188 | throw re;
|
---|
| 189 | } catch(SSLHandshakeException ssle) { //subclass of IOException
|
---|
| 190 | // this is also of type IOException
|
---|
| 191 | throw ssle;
|
---|
| 192 | }*/ catch(IOException ioe) { // connected to the wrong server
|
---|
| 193 | String exceptMsg = ioe.getMessage().toLowerCase();
|
---|
| 194 | if(exceptMsg.indexOf("request failed") != -1
|
---|
| 195 | || exceptMsg.indexOf("404") != -1)
|
---|
| 196 | throw new NotAFedoraServerException();
|
---|
| 197 | else // the IOException is not due the cause we thought it was, so
|
---|
| 198 | throw ioe; // rethrow whatever other IOException was caught (which
|
---|
| 199 | // could have been RemoteException or SSLHandshakeException
|
---|
| 200 | // or some other cause)
|
---|
| 201 | }
|
---|
| 202 | }
|
---|
| 203 |
|
---|
| 204 | /** Default constructor which takes input from the user to get host, port,
|
---|
| 205 | * fedora username and password.
|
---|
| 206 | * It keeps looping to display authentication popup, until valid values are
|
---|
| 207 | * entered:
|
---|
| 208 | * (a) if password is wrong, a RemoteException is thrown and popup reappears;
|
---|
| 209 | * This popup keeps appearing until the password and username are correct (as
|
---|
| 210 | * long as there's indeed a fedora server listening at the given host and port).
|
---|
| 211 | * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
|
---|
| 212 | * the 'https' protocol to the host string when it should have been 'http';
|
---|
| 213 | * OR the ssl connection failed for some other reason.
|
---|
| 214 | * Allowing for the 1st case, the authentication popup is displayed just once
|
---|
| 215 | * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
|
---|
| 216 | * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
|
---|
| 217 | * it takes a long time for the SSLHandshakeException to be thrown.
|
---|
| 218 | * (c) if the connection is refused, then a ConnectException is thrown.
|
---|
| 219 | * In that case, it's
|
---|
| 220 | * EITHER because the host and port values that were entered are wrong (and
|
---|
| 221 | * the authentication popup dialog is redisplayed just once more allowing
|
---|
| 222 | * the user to correct host/port values)
|
---|
| 223 | * OR the entered host and part were right but the fedora server at this
|
---|
| 224 | * host and port is not running.
|
---|
| 225 | * On the second consecutive attempt where a ConnectionException is thrown,
|
---|
| 226 | * it's no longer processed but rethrown, as there's no use in redisplaying
|
---|
| 227 | * the authentication popup when the problem is not an authentication issue.
|
---|
| 228 | * (d) Another IOException (other than the SSLHandshakeException of (b))
|
---|
| 229 | * occurs when there is indeed a server listening at the host and port
|
---|
| 230 | * entered, but it's not a Fedora server, because it is unable to process
|
---|
| 231 | * Fedora requests. If the expected message is found in the exception, than
|
---|
| 232 | * the authentication popup is displayed. However, other causes for an
|
---|
| 233 | * IOException are not handled. In such cases, the IOException is rethrown.
|
---|
| 234 | * (Note that IOException is not in the throws clause - other causes for
|
---|
| 235 | * it being unknown, it can be be considered as the more generic Exception.
|
---|
| 236 | */
|
---|
| 237 | public FedoraConnection()
|
---|
| 238 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 239 | CancelledException, ConnectException, RemoteException,
|
---|
| 240 | SSLHandshakeException, Exception
|
---|
| 241 | {
|
---|
| 242 | Properties properties = new Properties();
|
---|
| 243 | // loop to display fedora server authentication popup to
|
---|
| 244 | // get user input
|
---|
| 245 | setInitialisationProperties(properties);
|
---|
| 246 | properties = null; // finished
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | /** Single argument constructor that takes the name of the properties file
|
---|
| 250 | * defining the values of the initialisation parameters required to
|
---|
| 251 | * instantiate a FedoraConnection. These are fedora server username, password,
|
---|
| 252 | * host and port. If these values are not present in the file, they are set
|
---|
| 253 | * to "" before showing the initialisation input dialog.
|
---|
| 254 | * @param propertyFile is the name of the properties file specifying the
|
---|
| 255 | * values for Fedora server username, password, host and port. */
|
---|
| 256 | public FedoraConnection(File propertyFile)
|
---|
| 257 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 258 | CancelledException, ConnectException, RemoteException,
|
---|
| 259 | SSLHandshakeException, Exception
|
---|
| 260 | {
|
---|
| 261 | Properties properties = new Properties();
|
---|
| 262 | // Load the properties from the given file
|
---|
| 263 | try{
|
---|
| 264 | if(propertyFile.exists()) {
|
---|
| 265 | properties.load(new FileInputStream(propertyFile));
|
---|
| 266 | }
|
---|
| 267 | } catch(Exception e) {
|
---|
| 268 | // If the file didn't exist or could not be located,
|
---|
| 269 | // then we just continue by creating empty properties
|
---|
| 270 | LOG.warn("Exception loading from propertyFile "
|
---|
| 271 | + propertyFile + ": " + e);
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 | // Go through the process of showing the initialisation dialog
|
---|
| 275 | setInitialisationProperties(properties);
|
---|
| 276 |
|
---|
| 277 | // Now let's save whatever values the user may have entered into the
|
---|
| 278 | // input dialog as the default values for next time the dialog shows
|
---|
| 279 | try {
|
---|
| 280 | java.io.FileOutputStream out = new java.io.FileOutputStream(
|
---|
| 281 | propertyFile); // same file as properties loading file
|
---|
| 282 | // First make sure errormessage gets stored as "" and doesn't
|
---|
| 283 | // cause problems next time.
|
---|
| 284 | properties.setProperty("errormessage", "");
|
---|
| 285 | // Don't save passwords
|
---|
| 286 | properties.setProperty("password", "");
|
---|
| 287 | // If the portAddressSuffix is in the file already, then it's
|
---|
| 288 | // user-specified and we shouldn't change it. But if there is no
|
---|
| 289 | // such property in the file, then create it and write it to the file
|
---|
| 290 | // with an empty string value:
|
---|
| 291 | String portSuffix = properties.getProperty("port.address.suffix");
|
---|
| 292 | if(portSuffix == null) {
|
---|
| 293 | properties.setProperty("port.address.suffix", "");
|
---|
| 294 | }
|
---|
| 295 |
|
---|
| 296 | properties.store(out, "fedoraGS3 properties"); // write properties
|
---|
| 297 | // Javadoc states that "The output stream remains open after this
|
---|
| 298 | // method (Properties.store) returns." So we close it here
|
---|
| 299 | out.close();
|
---|
| 300 | } catch(Exception e) {
|
---|
| 301 | LOG.warn("Exception writing to propertyFile "
|
---|
| 302 | + propertyFile + ": " + e);
|
---|
| 303 | }
|
---|
| 304 | properties = null; // finished
|
---|
| 305 | }
|
---|
| 306 |
|
---|
| 307 | /** Method that loops to display the dialog that retrieves the
|
---|
| 308 | * fedora server initialisation properties from the user. If there
|
---|
| 309 | * is a property file with values set already, it will display
|
---|
| 310 | * the previously entered values by loading them from that file.
|
---|
| 311 | * Otherwise, input fields in the dialog are empty.
|
---|
| 312 | * @param properties the Properties Hashmap storing values for
|
---|
| 313 | * username, password, host and port (and any errormessage). */
|
---|
| 314 | protected void setInitialisationProperties(Properties properties)
|
---|
| 315 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 316 | CancelledException, ConnectException, RemoteException,
|
---|
| 317 | SSLHandshakeException, Exception
|
---|
| 318 | {
|
---|
| 319 | // keep looping to display authentication popup, until valid values are
|
---|
| 320 | // entered (except when a ConnectionRefused Exception is caught - this
|
---|
| 321 | // needs to be rethrown):
|
---|
| 322 | boolean authenticated = true;
|
---|
| 323 | // reset any error messages that may have been stored (should not be
|
---|
| 324 | // the case, but if there had been any difficulty during storing, it
|
---|
| 325 | // may not have written out an empty errorMessage)
|
---|
| 326 | properties.setProperty("errormessage", "");
|
---|
| 327 | do{
|
---|
| 328 | // show the Authentication-popup:
|
---|
| 329 | // By passing the HashMap Properties, user-updated values will
|
---|
| 330 | // be persistent in the authentication-popup fields (rather than
|
---|
| 331 | // reset to the default initial values).
|
---|
| 332 | properties = showAuthenticationPopup(properties);
|
---|
| 333 | String fedoraServerUsername = properties.getProperty("username", "");
|
---|
| 334 | String fedoraServerPassword = properties.getProperty("password", "");
|
---|
| 335 | String host = properties.getProperty("host", "");
|
---|
| 336 | String port = properties.getProperty("port", "");
|
---|
| 337 | //String protocol = host.startsWith("http") ? "" : "http://";
|
---|
| 338 | String protocol = "http://";
|
---|
| 339 | if(host.startsWith("http") || host.startsWith("https"))
|
---|
| 340 | protocol = "";
|
---|
| 341 | // NOTE THAT: if a fedora server at https:// is not accessible,
|
---|
| 342 | // it takes a long time for the authentication popup to reappear.
|
---|
| 343 |
|
---|
| 344 | try{
|
---|
| 345 | this.portAddressSuffix
|
---|
| 346 | = properties.getProperty("port.address.suffix", "");
|
---|
| 347 | // Use the FedoraClient utility to get the SOAP stub for APIA.
|
---|
| 348 | // This SOAP stub enables the client to connect to a Fedora
|
---|
| 349 | // repository via the API-A web service interface.
|
---|
| 350 | init(protocol, host, port,
|
---|
| 351 | fedoraServerUsername, fedoraServerPassword);
|
---|
| 352 | // will throw Exception if it can't instantiate APIA
|
---|
| 353 |
|
---|
| 354 | // if no exception thrown in the initialisation statement above,
|
---|
| 355 | // then we have been authenticated:
|
---|
| 356 | authenticated = true;
|
---|
| 357 | } catch(AuthenticationFailedException afe) {
|
---|
| 358 | authenticated = false;
|
---|
| 359 | properties.setProperty("errormessage", afe.getMessage());
|
---|
| 360 | } catch(RemoteException e) { // causes could be various
|
---|
| 361 | String reason = e.getMessage();
|
---|
| 362 | if(e.getCause() != null) {
|
---|
| 363 | // For instance, if a ConnectException indicating
|
---|
| 364 | // 'Connection Refused' or a java.net.UnknownHostException
|
---|
| 365 | // caused the RemoteException
|
---|
| 366 |
|
---|
| 367 | // Strip out prefix "Nested exception is..." from the
|
---|
| 368 | // encapsulating Exception's message, by using the Cause's
|
---|
| 369 | // message. Keep Exception classname to give it some context:
|
---|
| 370 | reason = e.getCause().getClass().getName() + ": "
|
---|
| 371 | + e.getCause().getMessage();
|
---|
| 372 | // Give some more information if the connection was refused.
|
---|
| 373 | // (This can also happen when the Fedora server is not running)
|
---|
| 374 | if(e.getCause().getClass().equals(ConnectException.class)) {
|
---|
| 375 | reason += FedoraGS3Exception.connectionRefusedMessage;
|
---|
| 376 | }
|
---|
| 377 | }
|
---|
| 378 | // if the message indicates that a server was running there,
|
---|
| 379 | // then we tell the user it was not a Fedora server
|
---|
| 380 | if(reason.toLowerCase().contains("404")
|
---|
| 381 | || reason.toLowerCase().contains("request failed"))
|
---|
| 382 | {
|
---|
| 383 | reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
|
---|
| 384 | }
|
---|
| 385 | authenticated = false;
|
---|
| 386 | properties.setProperty("errormessage", reason);
|
---|
| 387 | } catch(ConnectException e) {
|
---|
| 388 | properties.setProperty("errormessage",
|
---|
| 389 | FedoraGS3Exception.connectionRefusedMessage);
|
---|
| 390 | authenticated = false;
|
---|
| 391 | } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
|
---|
| 392 | // be handled before IOException, as it's an IOException subclass.
|
---|
| 393 | authenticated = false;
|
---|
| 394 | properties.setProperty("errormessage",
|
---|
| 395 | FedoraGS3Exception.sslHandshakeExceptionMessage);
|
---|
| 396 | // we won't prefix the host with http for the user, as https
|
---|
| 397 | // might be right after all, and something else might have gone
|
---|
| 398 | // during the connection attempt instead.
|
---|
| 399 | //host = host.replace("https", "http"); //setting it for them
|
---|
| 400 | //properties.setProperty("host", host);
|
---|
| 401 | } catch(IOException ioe) { // occurs when we try to connect to a
|
---|
| 402 | // host/port where some server other than Fedora's is listening
|
---|
| 403 | // (e.g. if we end up connecting to GS3's host and port).
|
---|
| 404 | // In that case, we can get exception messages like a 404:
|
---|
| 405 | // "Unable to instantiate FedoraConnection
|
---|
| 406 | // java.io.IOException: Request failed [404 /fedora/describe]"
|
---|
| 407 | // Test this by trying to connect to localhost at 9090 where GS3 is
|
---|
| 408 | String exceptMsg = ioe.getMessage().toLowerCase();
|
---|
| 409 | if(exceptMsg.indexOf("request failed") != -1
|
---|
| 410 | || exceptMsg.indexOf("404") != -1)
|
---|
| 411 | {
|
---|
| 412 | properties.setProperty("errormessage",
|
---|
| 413 | NotAFedoraServerException.MESSAGE
|
---|
| 414 | + "\n(" + ioe.getMessage() + ")");
|
---|
| 415 | } else if(exceptMsg.indexOf("401") != -1
|
---|
| 416 | || exceptMsg.indexOf("500") != -1)
|
---|
| 417 | {
|
---|
| 418 | authenticated = false;
|
---|
| 419 | properties.setProperty("errormessage", ioe.getMessage());
|
---|
| 420 | } else { // the exception occurred for some other reason, rethrow it
|
---|
| 421 | throw ioe;
|
---|
| 422 | }
|
---|
| 423 | }
|
---|
| 424 | } while(!authenticated); // will keep showing popup until auhentication
|
---|
| 425 | // and connection input values are valid
|
---|
| 426 | }
|
---|
| 427 |
|
---|
| 428 | /**
|
---|
| 429 | * Static method that displays a popup to allow the user to provide Fedora
|
---|
| 430 | * authentication (username, pwd) and connection (protocol+host, port) details.
|
---|
| 431 | * @param properties is a Properties HashMap where the property Keys which must
|
---|
| 432 | * have been put in here in advance (even with "" Values if appropriate) are:
|
---|
| 433 | * <pre>
|
---|
| 434 | * - username
|
---|
| 435 | * - password
|
---|
| 436 | * - host (may - but need not - be prefixed with either of the protocols
|
---|
| 437 | * "http://" and "https://"
|
---|
| 438 | * - port
|
---|
| 439 | * - errorMessage (displayed near the top of the popup dialog). Can be "".
|
---|
| 440 | * </pre>
|
---|
| 441 | * The values stored in the properties HashMap for the above property are
|
---|
| 442 | * initially displayed in the fields and the user can overwrite them.
|
---|
| 443 | * This is useful in such cases where invalid values were entered and this
|
---|
| 444 | * popup must be redisplayed to allow the user to correct their previous input.
|
---|
| 445 | * @return the same HashMap Properties which was passed as parameter. */
|
---|
| 446 | protected static Properties showAuthenticationPopup(Properties properties)
|
---|
| 447 | throws CancelledException
|
---|
| 448 | {
|
---|
| 449 | // Retrieve all the properties -- defaults to "" if any are null
|
---|
| 450 | JTextField usernameField = new JTextField(
|
---|
| 451 | properties.getProperty("username", "fedoraAdmin"));
|
---|
| 452 | JTextField passwordField = new JPasswordField(
|
---|
| 453 | properties.getProperty("password", ""));
|
---|
| 454 | JTextField hostField = new JTextField(
|
---|
| 455 | properties.getProperty("host", "localhost"));
|
---|
| 456 | JTextField portField = new JTextField(
|
---|
| 457 | properties.getProperty("port", "8080"));
|
---|
| 458 |
|
---|
| 459 | JPanel panel = new JPanel(new GridLayout(4,2));
|
---|
| 460 | panel.add(new JLabel("User Name"));
|
---|
| 461 | panel.add(usernameField);
|
---|
| 462 | panel.add(new JLabel("Password"));
|
---|
| 463 | panel.add(passwordField);
|
---|
| 464 | panel.add(new JLabel("Host"));
|
---|
| 465 | panel.add(hostField);
|
---|
| 466 | panel.add(new JLabel("Port"));
|
---|
| 467 | panel.add(portField);
|
---|
| 468 |
|
---|
| 469 | String heading = "Fedora Server Admin Authentication:";
|
---|
| 470 | String errorMessage = properties.getProperty("errormessage", "");
|
---|
| 471 | if(!errorMessage.equals("")) {
|
---|
| 472 | heading = "=> " + errorMessage + "\n\n" + heading;
|
---|
| 473 | }
|
---|
| 474 | int option = JOptionPane.showConfirmDialog(null, new Object[] {
|
---|
| 475 | heading, panel},
|
---|
| 476 | "Enter Network Password",
|
---|
| 477 | JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
|
---|
| 478 |
|
---|
| 479 | if (option == JOptionPane.OK_OPTION) {
|
---|
| 480 | String fedoraServerUsername = usernameField.getText();
|
---|
| 481 | String fedoraServerPassword = passwordField.getText();
|
---|
| 482 | String host = hostField.getText();
|
---|
| 483 | String port = portField.getText();
|
---|
| 484 | properties.setProperty("username", fedoraServerUsername);
|
---|
| 485 | properties.setProperty("password", fedoraServerPassword);
|
---|
| 486 | properties.setProperty("host", host);
|
---|
| 487 | properties.setProperty("port", port);
|
---|
| 488 | } else { // Cancel option
|
---|
| 489 | throw new CancelledException();
|
---|
| 490 | }
|
---|
| 491 | return properties;
|
---|
| 492 | }
|
---|
| 493 |
|
---|
| 494 | /** Init method that is called by the constructor to set some
|
---|
| 495 | * important member variables including instantiating the APIA object
|
---|
| 496 | * used to invoke the Fedora APIA web service operations.
|
---|
| 497 | * @param protocol can be http or https
|
---|
| 498 | * @param host is the name of the Fedora server host
|
---|
| 499 | * @param port is the port number (String form) of the Fedora server
|
---|
| 500 | * @param fedoraServerUsername is the user name to access the Fedora
|
---|
| 501 | * Server
|
---|
| 502 | * @param fedoraServerPassword is the password needed to access the
|
---|
| 503 | * Fedora Server
|
---|
| 504 | */
|
---|
| 505 | protected void init(String protocol, String host, String port,
|
---|
| 506 | String fedoraServerUsername, String fedoraServerPassword)
|
---|
| 507 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 508 | AuthenticationFailedException, RemoteException, Exception
|
---|
| 509 | {
|
---|
| 510 | // initialise member variables
|
---|
| 511 | lang = ENGLISH;
|
---|
| 512 | maxresults = Integer.MAX_VALUE;
|
---|
| 513 | DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
---|
| 514 | builder = factory.newDocumentBuilder();
|
---|
| 515 |
|
---|
| 516 | // (protocol is "" if host already contains protocol)
|
---|
| 517 | if(!protocol.equals("") && !protocol.endsWith("://"))
|
---|
| 518 | protocol += "://";
|
---|
| 519 | // now create baseURL = protocol://host:port/fedora
|
---|
| 520 | this.baseURL = protocol + host + ":" + port + "/fedora";
|
---|
| 521 |
|
---|
| 522 | // Get the FedoraAPIA handle to/stub of the Fedora web services
|
---|
| 523 | // New way of instantiating connection to Fedora is dependent on
|
---|
| 524 | // fewer files of FedoraClient.jar
|
---|
| 525 | FedoraAPIAServiceLocator serviceLocator
|
---|
| 526 | = new FedoraAPIAServiceLocator(fedoraServerUsername,
|
---|
| 527 | fedoraServerPassword);
|
---|
| 528 |
|
---|
| 529 | APIA = null;
|
---|
| 530 | boolean isUserSpecifiedPortAddressSuffix = false;
|
---|
| 531 | // try any portAddressSuffix specified by the user
|
---|
| 532 | if(!this.portAddressSuffix.equals("")) {
|
---|
| 533 | isUserSpecifiedPortAddressSuffix = true;
|
---|
| 534 | this.createAPIA(serviceLocator, this.portAddressSuffix,
|
---|
| 535 | "user-specified", isUserSpecifiedPortAddressSuffix);
|
---|
| 536 | }
|
---|
| 537 |
|
---|
| 538 | // If the user-specified portAddressSuffix failed or if there was none
|
---|
| 539 | // given, then APIA will be null, so we will try with the default
|
---|
| 540 | // portAddressSuffix. This time all exceptions will be passed on.
|
---|
| 541 | if(APIA == null) {
|
---|
| 542 | isUserSpecifiedPortAddressSuffix = false;
|
---|
| 543 | this.createAPIA(serviceLocator, defaultPortAddressSuffix,
|
---|
| 544 | "default", isUserSpecifiedPortAddressSuffix);
|
---|
| 545 | }
|
---|
| 546 |
|
---|
| 547 | }
|
---|
| 548 |
|
---|
| 549 | /** Tries to create the FedoraAPIA instance using the serviceLocator
|
---|
| 550 | * and the given portSuffix. The APIA instance is obtained for the
|
---|
| 551 | * baseURL+portSuffix. Any exceptions are (processed and) rethrown
|
---|
| 552 | * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
|
---|
| 553 | * Remote Exception from AXIS that it can't find the target service to
|
---|
| 554 | * invoke is ignored so that the caller can retry with the default port-
|
---|
| 555 | * address suffix first before giving up. */
|
---|
| 556 | protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
|
---|
| 557 | String portSuffix, String messageInsert,
|
---|
| 558 | boolean isUserSpecifiedPortAddressSuffix)
|
---|
| 559 | throws Exception
|
---|
| 560 | {
|
---|
| 561 | //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
|
---|
| 562 | // this.portAddressSuffix : defaultPortAddressSuffix;
|
---|
| 563 |
|
---|
| 564 | try {
|
---|
| 565 | LOG.debug( "Trying to connect to Fedora using the given"
|
---|
| 566 | + " baseURL and the " + messageInsert + " portAddress suffix:\n"
|
---|
| 567 | + baseURL + portSuffix);
|
---|
| 568 | APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
|
---|
| 569 | new java.net.URL(baseURL+portSuffix));
|
---|
| 570 | // let's test whether we're authenticated (otherwise a
|
---|
| 571 | // RemoteException will be thrown to indicate that the
|
---|
| 572 | // password was incorrect.)
|
---|
| 573 | RepositoryInfo repositoryInfo = APIA.describeRepository();
|
---|
| 574 | // throws RemoteException if pwd wrong or for other reasons
|
---|
| 575 | // in which case describeRepository() service is unavailable
|
---|
| 576 | this.fedoraVersion = repositoryInfo.getRepositoryVersion();
|
---|
| 577 | // If we come all the way here, no exceptions were thrown:
|
---|
| 578 | this.portAddressSuffix = portSuffix; // store the one currently in use
|
---|
| 579 | } catch(RemoteException re) {
|
---|
| 580 | // if we're here, then APIA was unable to call the web service
|
---|
| 581 | // If this was because the fedora authentication failed, then
|
---|
| 582 | // let's throw a custom exception
|
---|
| 583 | String message = re.getMessage().toLowerCase();
|
---|
| 584 | // Looking for something Unauthorized(401)
|
---|
| 585 | if(message.indexOf("unauthorized") != -1
|
---|
| 586 | || message.indexOf("401") != -1)
|
---|
| 587 | {
|
---|
| 588 | throw new AuthenticationFailedException();
|
---|
| 589 | } else if(isUserSpecifiedPortAddressSuffix
|
---|
| 590 | && re.getMessage().contains(
|
---|
| 591 | FedoraGS3Exception.missingTargetService))
|
---|
| 592 | {
|
---|
| 593 | LOG.warn("Failed to connect to Fedora APIA services at given"
|
---|
| 594 | + " port address:\n" + portSuffix
|
---|
| 595 | + "\nException: " + re.getMessage());
|
---|
| 596 | // APIA.describeRepository can throw a remote exception
|
---|
| 597 | // whereby AXIS says the target service is missing and can't
|
---|
| 598 | // be invoked (FedoraGS3Exception.missingTargetService)
|
---|
| 599 | // Don't rethrow this, if AXIS can't find the user-specified
|
---|
| 600 | // portAddressSuffix, we will try with the default suffix next
|
---|
| 601 | APIA = null;
|
---|
| 602 | } else { // if trying default portAddressSuffix or if any other
|
---|
| 603 | // RemoteException was generated (whose cause is something
|
---|
| 604 | // other than an authentication failure) rethrow it.
|
---|
| 605 | throw re;
|
---|
| 606 | }
|
---|
| 607 | } catch(Exception e) { // Other Exceptions
|
---|
| 608 | // Could possibly be a ServiceException when using ServiceLocator
|
---|
| 609 | if(isUserSpecifiedPortAddressSuffix) {
|
---|
| 610 | APIA = null; // we won't throw other exceptions yet until
|
---|
| 611 | // we have tried the default PortAddressSuffix for the baseURL
|
---|
| 612 | } else {
|
---|
| 613 | throw new FedoraGS3InitFailureException(e);
|
---|
| 614 | }
|
---|
| 615 | }
|
---|
| 616 | }
|
---|
| 617 |
|
---|
| 618 | /** Gets all greenstone collections. Searches for greenstone:*-collection.
|
---|
| 619 | * Method getCollections() defaults to getting only those objects in fedora's
|
---|
| 620 | * repository whose pids are of the format greenstone:*-collection.
|
---|
| 621 | * The use of AutoFinder and findObjects is shown in
|
---|
| 622 | * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
|
---|
| 623 | * The Fedora-APIA's method definition of findObjects is:
|
---|
| 624 | * <pre>
|
---|
| 625 | * fedora-types:FieldSearchResult findObjects(
|
---|
| 626 | * fedora-types:ArrayOfString resultFields,
|
---|
| 627 | * xsd:nonNegativeInteger maxResults,
|
---|
| 628 | * fedora-types:FieldSearchQuery query )
|
---|
| 629 | * </pre>
|
---|
| 630 | * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
|
---|
| 631 | * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
|
---|
| 632 | * @see <a href="http://www.fedora.info/definitions/1/0/types/#complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
|
---|
| 633 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html>Type definition of 2.2.1 FieldSearchQuery</a>
|
---|
| 634 | * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
|
---|
| 635 | * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
|
---|
| 636 | *
|
---|
| 637 | * @return an array of Strings containing the pids of all collections
|
---|
| 638 | * matching the format greenstone:*-collection.
|
---|
| 639 | */
|
---|
| 640 | public String[] getCollections() throws RemoteException
|
---|
| 641 | {
|
---|
| 642 | // Available constructors:
|
---|
| 643 | // FieldSearchQuery(java.util.List conditions)
|
---|
| 644 | // FieldSearchQuery(java.lang.String terms)
|
---|
| 645 | final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
|
---|
| 646 | FieldSearchQuery query = new FieldSearchQuery();
|
---|
| 647 | query.setTerms(queryStr);
|
---|
| 648 | query.setConditions(null);
|
---|
| 649 | // we'd like pid and title returned for each object
|
---|
| 650 | // we pass maxResults=null to get all objects that match
|
---|
| 651 | // (i.e. all collections)
|
---|
| 652 | String[] pids = null;
|
---|
| 653 |
|
---|
| 654 | FieldSearchResult collection = AutoFinder.findObjects(
|
---|
| 655 | APIA, new String[]{"pid", "title"}, maxresults, query);
|
---|
| 656 | ObjectFields[] results = collection.getResultList();
|
---|
| 657 | pids = new String[results.length];
|
---|
| 658 | for(int i = 0; i < results.length; i++) {
|
---|
| 659 | pids[i] = results[i].getPid();
|
---|
| 660 | }
|
---|
| 661 | return pids;
|
---|
| 662 | }
|
---|
| 663 |
|
---|
| 664 | /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
|
---|
| 665 | * top-level documents or document sections - have a DC datastream. This
|
---|
| 666 | * method returns the content (XML) of the DC datastream as it is stored in
|
---|
| 667 | * fedora's repository.
|
---|
| 668 | * (The pid/DC call is one of the default fedora-system 3 disseminations.)
|
---|
| 669 | * Try an example of the form: http://localhost:8080/fedora/get/<pid>/DC
|
---|
| 670 | * To obtain the DC/any datastream, we use method getDatastreamDissemination()
|
---|
| 671 | * of the interface FedoraAPIA. This method returns a MIMETypedStream.
|
---|
| 672 | * The method signature is:
|
---|
| 673 | * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
|
---|
| 674 | * where dsID = itemID (look at datastreams page of running fedora instance)
|
---|
| 675 | * To access the XML content of the MIMETypedObject returned, we use its method
|
---|
| 676 | * bytes[] getStream(), but when instantiating a String from this, we have to
|
---|
| 677 | * use the String() contructor where we can specify the charset encoding (in
|
---|
| 678 | * this case, it must be UTF-8). Else getStream() returns gobbledygook.
|
---|
| 679 | * @return a String version of the XML in the DC datastream for the fedora
|
---|
| 680 | * object denoted by pid.
|
---|
| 681 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
| 682 | * repository.
|
---|
| 683 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
|
---|
| 684 | * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
|
---|
| 685 | * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
|
---|
| 686 | * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
|
---|
| 687 | */
|
---|
| 688 | public String getDC(String pid)
|
---|
| 689 | throws RemoteException, UnsupportedEncodingException
|
---|
| 690 | {
|
---|
| 691 | // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
|
---|
| 692 | // datastream ID, dsID = itemID, look at a running fedora
|
---|
| 693 | MIMETypedStream dcStream
|
---|
| 694 | = APIA.getDatastreamDissemination(pid, DC, null);
|
---|
| 695 | //asOfDateTime = null to get the current version of the dataStream
|
---|
| 696 |
|
---|
| 697 | // need to set the charset encoding to UTF8
|
---|
| 698 | return new String(dcStream.getStream(), UTF8);
|
---|
| 699 | }
|
---|
| 700 |
|
---|
| 701 | /** All "greenstone:*" objects in fedora (be they collections be they
|
---|
| 702 | * collections, top-level documents or document sections) have an EX
|
---|
| 703 | * datastream. This method returns the content (XML) of the EX datastream as
|
---|
| 704 | * is. (It calls the default fedora-system 3 dissemination <pid>/EX.)
|
---|
| 705 | * @return a String version of the XML in the EX datastream for the fedora
|
---|
| 706 | * object denoted by pid.
|
---|
| 707 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
| 708 | * repository.
|
---|
| 709 | * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
|
---|
| 710 | * asOfDateTime).
|
---|
| 711 | * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
|
---|
| 712 | * @see String getDC(String pid) throws Exception
|
---|
| 713 | * */
|
---|
| 714 | public String getEX(String pid)
|
---|
| 715 | throws RemoteException, UnsupportedEncodingException
|
---|
| 716 | {
|
---|
| 717 | MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
|
---|
| 718 | //asOfDateTime = null to get the current version of the dataStream
|
---|
| 719 |
|
---|
| 720 | // need to set the charset encoding to UTF8
|
---|
| 721 | return new String(exStream.getStream(), UTF8);
|
---|
| 722 | }
|
---|
| 723 |
|
---|
| 724 | /** Some "greenstone:*" top-level documents in the fedora repository (but not
|
---|
| 725 | * greenstone collections or document sections) have a DLS metadata datastream.
|
---|
| 726 | * This method returns the content (XML) of the DLS datastream as is. (It calls
|
---|
| 727 | * the default fedora-system 3 dissemination <pid>/DLS.)
|
---|
| 728 | * @return a String version of the XML in the DLS datastream for the fedora
|
---|
| 729 | * object denoted by pid, or "" if the document given by pid has no DLS datastream.
|
---|
| 730 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
| 731 | * repository.
|
---|
| 732 | * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
|
---|
| 733 | * asOfDateTime).
|
---|
| 734 | * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
|
---|
| 735 | * @see String getDC(String pid) throws Exception
|
---|
| 736 | * */
|
---|
| 737 | public String getDLS(String pid)
|
---|
| 738 | throws RemoteException, UnsupportedEncodingException
|
---|
| 739 | {
|
---|
| 740 | MIMETypedStream dlsStream = null;
|
---|
| 741 | // If there is no DLS datastream, it throws an exception (whose class
|
---|
| 742 | // fedora.server.errors.DatastreamNotFoundException can't be imported
|
---|
| 743 | // here (it's not in the client side fedora.server.* package, but on
|
---|
| 744 | // the server side package of that name):
|
---|
| 745 | try{
|
---|
| 746 | dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
|
---|
| 747 | //asOfDateTime=null to get the current version of the dataStream
|
---|
| 748 | } catch(RemoteException e) {
|
---|
| 749 | //These two don't work:
|
---|
| 750 | //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
|
---|
| 751 | //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
|
---|
| 752 |
|
---|
| 753 | if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
|
---|
| 754 | { // there is no DLS data stream for this document
|
---|
| 755 | return "";
|
---|
| 756 | }
|
---|
| 757 | else { // different problem, exception due to different cause
|
---|
| 758 | throw(e);
|
---|
| 759 | }
|
---|
| 760 | }
|
---|
| 761 | if(dlsStream == null)
|
---|
| 762 | return "";
|
---|
| 763 | // need to set the charset encoding to UTF8
|
---|
| 764 | return new String(dlsStream.getStream(), UTF8);
|
---|
| 765 | }
|
---|
| 766 |
|
---|
| 767 | /** All "greenstone:*" objects in fedora (be they collections or documents)
|
---|
| 768 | * have a TOC datastream. This method returns the content (XML) of the TOC
|
---|
| 769 | * datastream as is. (Calls default fedora-system 3 dissemination <pid>/TOC.)
|
---|
| 770 | * @return a String version of the XML in the TOC datastream for the fedora
|
---|
| 771 | * object denoted by pid.
|
---|
| 772 | * @param pid - the fedora persistent identifier for an item in the fedora
|
---|
| 773 | * repository.
|
---|
| 774 | * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
|
---|
| 775 | * asOfDateTime)
|
---|
| 776 | * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
|
---|
| 777 | * @see String getDC(String pid) throws Exception
|
---|
| 778 | * */
|
---|
| 779 | public String getTOC(String pid)
|
---|
| 780 | throws RemoteException, UnsupportedEncodingException
|
---|
| 781 | {
|
---|
| 782 | MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
|
---|
| 783 | //asOfDateTime = null to get the current version of the dataStream
|
---|
| 784 |
|
---|
| 785 | // need to set the charset encoding to UTF8
|
---|
| 786 | return new String(tocStream.getStream(), UTF8);
|
---|
| 787 | }
|
---|
| 788 |
|
---|
| 789 | /** @return the <name>s (in greenstone:<name>-collection)
|
---|
| 790 | * for the collections indicated by collPIDs.
|
---|
| 791 | * @param collPIDs - an array of Strings denoting the pids for greenstone
|
---|
| 792 | * collections stored in the fedora repositoryl. These should be of the
|
---|
| 793 | * format "greenstone:<collectionName>-collection". */
|
---|
| 794 | public String[] getCollectionNames(String[] collPIDs) {
|
---|
| 795 | String[] collNames = new String[collPIDs.length];
|
---|
| 796 | for(int i = 0; i < collPIDs.length; i++)
|
---|
| 797 | collNames[i] = getCollectionName(collPIDs[i]);
|
---|
| 798 | return collNames;
|
---|
| 799 | }
|
---|
| 800 |
|
---|
| 801 | /** @return "greenstone:<name>-collection" for all <name>s
|
---|
| 802 | * in the parameter collNames.
|
---|
| 803 | * @param collNames - a list of names of greenstone collections
|
---|
| 804 | * stored in the fedora repository. */
|
---|
| 805 | public String[] getCollectionPIDs(String[] collNames) {
|
---|
| 806 | String[] collPIDs = new String[collNames.length];
|
---|
| 807 | for(int i = 0; i < collNames.length; i++)
|
---|
| 808 | collPIDs[i] = getCollectionName(collNames[i]);
|
---|
| 809 | return collPIDs;
|
---|
| 810 | }
|
---|
| 811 |
|
---|
| 812 | /** @return greenstone:<name>-collection for the<name>
|
---|
| 813 | * denoted by parameter collName.
|
---|
| 814 | * @param collName - the name of a greenstone collection stored
|
---|
| 815 | * stored in the fedora repository. */
|
---|
| 816 | public String getCollectionPID(String collName) {
|
---|
| 817 | return GREENSTONE_+collName+_COLLECTION;
|
---|
| 818 | }
|
---|
| 819 |
|
---|
| 820 | /**
|
---|
| 821 | * Gets the title of the collection denoted by the given collection's pid by
|
---|
| 822 | * retrieving the title metadata for it from the collection's EX datastream.
|
---|
| 823 | * @return the title (in the default language, else English, else the
|
---|
| 824 | * first title found) for the particular collection denoted by its PID.
|
---|
| 825 | * @param collPID is the pid of a greenstone collection in the fedora
|
---|
| 826 | * repository. */
|
---|
| 827 | public String getCollectionTitle(String collPID)
|
---|
| 828 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 829 | SAXException, IOException
|
---|
| 830 | {
|
---|
| 831 | String title = null; // has to be null initially, we do a check on it
|
---|
| 832 | // Parse the EX datastream (XML), and in its DOM, find the
|
---|
| 833 | // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
|
---|
| 834 | // There might be one OR several of those with attribute
|
---|
| 835 | // name="collectionname". If there's only one, then get that.
|
---|
| 836 | // If there are several, there would possibly a be qualifier attribute,
|
---|
| 837 | // in which case get qualifier=lang (where lang is the member variable)
|
---|
| 838 | // If there is no qualifier with the requested language, then get the
|
---|
| 839 | // english one which is likely to be there, else return the title for
|
---|
| 840 | // the first collectionname .
|
---|
| 841 |
|
---|
| 842 | MIMETypedStream exdata
|
---|
| 843 | = APIA.getDatastreamDissemination(collPID, EX, null);
|
---|
| 844 | String exStream = new String(exdata.getStream(), UTF8);
|
---|
| 845 |
|
---|
| 846 | InputSource source = new InputSource(new StringReader(exStream));
|
---|
| 847 | Document doc = builder.parse(source);
|
---|
| 848 | Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
|
---|
| 849 | NodeList children = docEl.getChildNodes();
|
---|
| 850 |
|
---|
| 851 | String firstName = "";
|
---|
| 852 | String englishName = "";
|
---|
| 853 | for(int i = 0; i < children.getLength(); i++ ) {
|
---|
| 854 | Node n = children.item(i);
|
---|
| 855 | if(n.getNodeType() == Node.ELEMENT_NODE) {
|
---|
| 856 | Element e = (Element)n;
|
---|
| 857 | if(e.hasAttribute(NAME)
|
---|
| 858 | && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
|
---|
| 859 | firstName = FedoraCommons.getValue(e);
|
---|
| 860 | if(!e.hasAttribute(QUALIFIER)) {
|
---|
| 861 | title = FedoraCommons.getValue(e);
|
---|
| 862 | break;
|
---|
| 863 | }
|
---|
| 864 | else if(e.getAttribute(QUALIFIER).equals(lang)) {
|
---|
| 865 | title = FedoraCommons.getValue(e);
|
---|
| 866 | break;
|
---|
| 867 | } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
|
---|
| 868 | englishName = FedoraCommons.getValue(e);
|
---|
| 869 | }
|
---|
| 870 | }
|
---|
| 871 | }
|
---|
| 872 | }
|
---|
| 873 |
|
---|
| 874 | // if the title is still not set to that of the requested language,
|
---|
| 875 | // then try setting it to the collection name in English. If English
|
---|
| 876 | // isn't available, then set it to the first collection name provided
|
---|
| 877 | // (in whichever language).
|
---|
| 878 | if(title == null) {
|
---|
| 879 | title = englishName.equals("") ? firstName : englishName;
|
---|
| 880 | }
|
---|
| 881 | doc = null;
|
---|
| 882 | return title;
|
---|
| 883 | }
|
---|
| 884 |
|
---|
| 885 | /** @return the collection titles for all the collections indicated by
|
---|
| 886 | * collPIDs.
|
---|
| 887 | * @param collPIDs - a list of pids identifying greenstone collections
|
---|
| 888 | * stored in the fedora repository. */
|
---|
| 889 | public String[] getCollectionTitles(String[] collPIDs)
|
---|
| 890 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 891 | SAXException, IOException
|
---|
| 892 | {
|
---|
| 893 | String[] titles = new String[collPIDs.length];
|
---|
| 894 |
|
---|
| 895 | // parse each EX datastream (XML) which contains the gs3-extracted meta.
|
---|
| 896 | for(int i = 0; i < collPIDs.length; i++) {
|
---|
| 897 | titles[i] = getCollectionTitle(collPIDs[i]);
|
---|
| 898 | }
|
---|
| 899 | return titles;
|
---|
| 900 | }
|
---|
| 901 |
|
---|
| 902 | /** @return the title metadata for the given doc objects of a collection.
|
---|
| 903 | * These titles are returned in the same order as the given docIDs.
|
---|
| 904 | * (The docPIDs already contain the collection name anyway.)
|
---|
| 905 | * @param docPIDs - a list of pids identifying documents stored in the
|
---|
| 906 | * fedora repository. */
|
---|
| 907 | public String[] getDocTitles(String[] docPIDs)
|
---|
| 908 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 909 | SAXException, IOException
|
---|
| 910 | {
|
---|
| 911 | String[] titles = new String[docPIDs.length];
|
---|
| 912 | for(int i = 0; i < docPIDs.length; i++) {
|
---|
| 913 | titles[i] = getDocTitle(docPIDs[i]);
|
---|
| 914 | }
|
---|
| 915 | return titles;
|
---|
| 916 | }
|
---|
| 917 |
|
---|
| 918 | /** Gets the title metadata for a particular doc object in a collection
|
---|
| 919 | * denoted by docPID. The docPID already contains the collection name.
|
---|
| 920 | * @return the title for the fedora document item denoted by docPID
|
---|
| 921 | * @param docPID is the pid of the document in the fedora repository
|
---|
| 922 | * (docPID is of the form greenstone:<colName>-<doc-identifier> */
|
---|
| 923 | public String getDocTitle(String docPID)
|
---|
| 924 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 925 | SAXException, IOException
|
---|
| 926 | {
|
---|
| 927 | // We need the extracted metadata file, and find its
|
---|
| 928 | // documentElement's child
|
---|
| 929 | // <ex:metadata name="Title">sometitle</ex:metadata>
|
---|
| 930 | // where the title we return is sometitle
|
---|
| 931 |
|
---|
| 932 | String title = "";
|
---|
| 933 | MIMETypedStream exdata
|
---|
| 934 | = APIA.getDatastreamDissemination(docPID, EX, null);
|
---|
| 935 | String exStream = new String(exdata.getStream(), UTF8);
|
---|
| 936 | return getTitle(exStream);
|
---|
| 937 | }
|
---|
| 938 |
|
---|
| 939 | /** Given a string representation of a document's or document section's
|
---|
| 940 | * EX datastream -- which is a greenstone extracted metadata XML file --
|
---|
| 941 | * of the form:
|
---|
| 942 | * <ex>
|
---|
| 943 | * <ex:metadata name="Title">sometitle</ex:metadata>
|
---|
| 944 | * <ex:metadata name="...">....</ex:metadata>
|
---|
| 945 | * ...
|
---|
| 946 | * </ex>
|
---|
| 947 | * This method finds the <ex:metadata> where the name="Title" and
|
---|
| 948 | * returns the value embedded in that element ('sometitle' in
|
---|
| 949 | * the example above).
|
---|
| 950 | * @return the title metadata of the document/document section whose EX
|
---|
| 951 | * datastream is passed as parameter
|
---|
| 952 | * @param exStream the EX datastream in String form of the document or
|
---|
| 953 | * document section. */
|
---|
| 954 | protected String getTitle(String exStream)
|
---|
| 955 | throws SAXException, IOException
|
---|
| 956 | {
|
---|
| 957 | String title = "";
|
---|
| 958 | InputSource source = new InputSource(new StringReader(exStream));
|
---|
| 959 | Document doc = builder.parse(source);
|
---|
| 960 | Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
|
---|
| 961 | NodeList children = docEl.getChildNodes();
|
---|
| 962 |
|
---|
| 963 | // Cycle through all the *element* children of <ex:ex></ex:ex>
|
---|
| 964 | // which are all of the form:
|
---|
| 965 | // <ex:metadata name="somename">somevalue</ex:metadata>
|
---|
| 966 | // Find the one where name="Title", its value is the title
|
---|
| 967 | for(int i = 0; i < children.getLength(); i++ ) {
|
---|
| 968 | Node n = children.item(i);
|
---|
| 969 | if(n.getNodeType() == Node.ELEMENT_NODE) {
|
---|
| 970 | Element e = (Element)n;
|
---|
| 971 | if(e.hasAttribute(NAME)
|
---|
| 972 | && e.getAttribute(NAME).equals(TITLE)) {
|
---|
| 973 | title = FedoraCommons.getValue(e);
|
---|
| 974 | break;
|
---|
| 975 | }
|
---|
| 976 | }
|
---|
| 977 | }
|
---|
| 978 | return title;
|
---|
| 979 | }
|
---|
| 980 |
|
---|
| 981 | /** @return the title metadata for the given document sections.
|
---|
| 982 | * These titles are returned in the same order as the given docPIDs
|
---|
| 983 | * and associated sectionIDs.
|
---|
| 984 | * (The docPIDs already contain the collection name anyway.)
|
---|
| 985 | * @param docPIDs - a list of pids identifying documents stored in the
|
---|
| 986 | * fedora repository.
|
---|
| 987 | * @param sectionIDs - a list of sectionIDs identifying individual sections
|
---|
| 988 | * of documents stored in the fedora repository whose titles are requested. */
|
---|
| 989 | public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
|
---|
| 990 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 991 | SAXException, IOException
|
---|
| 992 | {
|
---|
| 993 | String[] titles = new String[docPIDs.length];
|
---|
| 994 | for(int i = 0; i < docPIDs.length; i++) {
|
---|
| 995 | titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
|
---|
| 996 | }
|
---|
| 997 | return titles;
|
---|
| 998 | }
|
---|
| 999 |
|
---|
| 1000 | /** @return the title metadata for the given document section.
|
---|
| 1001 | * (The docPID already contain the collection name anyway.)
|
---|
| 1002 | * @param docPID - a pid identifying a document in the fedora repository.
|
---|
| 1003 | * @param sectionID - the sectionID of the section of the
|
---|
| 1004 | * document whose title is requested. */
|
---|
| 1005 | public String getSectionTitle(String docPID, String sectionID)
|
---|
| 1006 | throws UnsupportedEncodingException, RemoteException,
|
---|
| 1007 | SAXException, IOException
|
---|
| 1008 | {
|
---|
| 1009 | String ex = this.getSectionEXMetadata(docPID, sectionID);
|
---|
| 1010 | return getTitle(ex);
|
---|
| 1011 | }
|
---|
| 1012 |
|
---|
| 1013 | /** Searches the fedora repository for all greenstone:<colPID>* and
|
---|
| 1014 | * returns the PIDs of the data objects found, with the exception of
|
---|
| 1015 | * greenstone:<colPID>-collection, which is not a document but a
|
---|
| 1016 | * collection PID.
|
---|
| 1017 | * That is, pids of objects whose pid is greenstone:<colName>*
|
---|
| 1018 | * (but not greenstone:<colName>-collection itself, because that represents
|
---|
| 1019 | * the collection and not an object of the same collection) are returned.
|
---|
| 1020 | * All pids that do not map to a collection are assumed to be documents!
|
---|
| 1021 | * @return a list of the pids of all the (doc) objects in a collection.
|
---|
| 1022 | * @param colPID is the pid of the greenstone collection stored in
|
---|
| 1023 | * the fedora repository. */
|
---|
| 1024 | public String[] getCollectionDocs(String colPID)
|
---|
| 1025 | throws RemoteException
|
---|
| 1026 | {
|
---|
| 1027 | String colName = getCollectionName(colPID);
|
---|
| 1028 | //LOG.debug("colName: " + colName);
|
---|
| 1029 |
|
---|
| 1030 | // Search fedora objects for pid=greenstone:<colName>-*
|
---|
| 1031 | final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
|
---|
| 1032 | // searches for "greenstone:"+colName+"-*";
|
---|
| 1033 | FieldSearchQuery query = new FieldSearchQuery();
|
---|
| 1034 | query.setTerms(queryStr);
|
---|
| 1035 | query.setConditions(null);
|
---|
| 1036 | String[] pids = null;
|
---|
| 1037 |
|
---|
| 1038 | FieldSearchResult objects = AutoFinder.findObjects(
|
---|
| 1039 | APIA, new String[]{"pid", "title"}, maxresults, query);
|
---|
| 1040 | ObjectFields[] results = objects.getResultList();
|
---|
| 1041 |
|
---|
| 1042 | // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
|
---|
| 1043 | // that's not a document object:
|
---|
| 1044 | pids = new String[results.length-1]; // not storing collection object
|
---|
| 1045 | int index = 0; // keeps track of docPid index
|
---|
| 1046 | for(int i = 0; i < results.length; i++) {
|
---|
| 1047 | // check it's not a collection object
|
---|
| 1048 | if(!results[i].getPid().endsWith(_COLLECTION)) {
|
---|
| 1049 | pids[index] = results[i].getPid();
|
---|
| 1050 | index++;
|
---|
| 1051 | }
|
---|
| 1052 | }
|
---|
| 1053 |
|
---|
| 1054 | return pids;
|
---|
| 1055 | }
|
---|
| 1056 |
|
---|
| 1057 | /** Given the pid of a document fedora data object, this method will return
|
---|
| 1058 | * all itemIDs that are part of that data object and are Sections. For further
|
---|
| 1059 | * information see interface Comparable (implemented by String), SortedSet
|
---|
| 1060 | * and TreeSet.
|
---|
| 1061 | * @return an array of itemIDs of the Sections of the document,
|
---|
| 1062 | * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
|
---|
| 1063 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
| 1064 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
|
---|
| 1065 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
|
---|
| 1066 | * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
|
---|
| 1067 | */
|
---|
| 1068 | public String[] getSectionNames(String docPID) throws RemoteException {
|
---|
| 1069 | // DatastreamDef[] listDatastreams(
|
---|
| 1070 | // java.lang.String pid, java.lang.String asOfDateTime)
|
---|
| 1071 |
|
---|
| 1072 | // listDatastreams returns information on each item (including itemID=dsID)
|
---|
| 1073 | // in the document object indicated by docPID
|
---|
| 1074 |
|
---|
| 1075 | // Need to give an object version number, because null for asOfDateTime
|
---|
| 1076 | // does not return any datastreams!
|
---|
| 1077 | String[] times = APIA.getObjectHistory(docPID);
|
---|
| 1078 |
|
---|
| 1079 | DatastreamDef[] datastreams = APIA.listDatastreams(
|
---|
| 1080 | docPID, times[times.length-1]);
|
---|
| 1081 |
|
---|
| 1082 | // TreeSet is a SortedSet. We're going to put Strings into it,
|
---|
| 1083 | // and Strings implement interface Comparable already.
|
---|
| 1084 | TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
|
---|
| 1085 | for(int i = 0; i < datastreams.length; i++) {
|
---|
| 1086 | String itemID = datastreams[i].getID();
|
---|
| 1087 | if (itemID.startsWith("SECTION"))
|
---|
| 1088 | orderedList.add(itemID);
|
---|
| 1089 | }
|
---|
| 1090 |
|
---|
| 1091 | String[] sectionNames = new String[orderedList.size()];
|
---|
| 1092 | orderedList.toArray(sectionNames);
|
---|
| 1093 | orderedList = null;
|
---|
| 1094 | return sectionNames;
|
---|
| 1095 | }
|
---|
| 1096 |
|
---|
| 1097 | /** Given the pid of a document fedora data object, this method will return all
|
---|
| 1098 | * itemIDs that are part of that data object and are Sections, but just the
|
---|
| 1099 | * Section numbers are returned. For further information see interface Comparable
|
---|
| 1100 | * (implemented by String), SortedSet and TreeSet.
|
---|
| 1101 | * @return an array of itemIDs of the Section numbers of the document
|
---|
| 1102 | * indicated by docPID, in ascending order. Return values are of form: "1.*".
|
---|
| 1103 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
| 1104 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
|
---|
| 1105 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
|
---|
| 1106 | * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
|
---|
| 1107 | */
|
---|
| 1108 | public String[] getSectionNumbers(String docPID) throws RemoteException {
|
---|
| 1109 | String[] times = APIA.getObjectHistory(docPID);
|
---|
| 1110 |
|
---|
| 1111 | DatastreamDef[] datastreams
|
---|
| 1112 | = APIA.listDatastreams(docPID, times[times.length-1]);
|
---|
| 1113 | //Vector v = new Vector(datastreams.length);
|
---|
| 1114 | TreeSet orderedList = new TreeSet();
|
---|
| 1115 |
|
---|
| 1116 | for(int i = 0; i < datastreams.length; i++) {
|
---|
| 1117 | String itemID = datastreams[i].getID();
|
---|
| 1118 | if (itemID.startsWith("SECTION")) {
|
---|
| 1119 | //int index = SECTION.length();
|
---|
| 1120 | //itemID = itemID.substring(index);
|
---|
| 1121 | itemID = removePrefix(itemID, SECTION);
|
---|
| 1122 | orderedList.add(itemID);
|
---|
| 1123 | }
|
---|
| 1124 | }
|
---|
| 1125 |
|
---|
| 1126 | String[] sectionNumbers = new String[orderedList.size()];
|
---|
| 1127 | orderedList.toArray(sectionNumbers);
|
---|
| 1128 | orderedList = null;
|
---|
| 1129 |
|
---|
| 1130 | return sectionNumbers;
|
---|
| 1131 | }
|
---|
| 1132 |
|
---|
| 1133 | /** @return the titles for the document sections denoted by the parameters.
|
---|
| 1134 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
| 1135 | * @param sectionIDs is a list of identifiers identifying sections in the
|
---|
| 1136 | * document denoted by docPID, whose titles need to be returned. Each
|
---|
| 1137 | * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
|
---|
| 1138 | * or a section number (eg. 1.5.1). */
|
---|
| 1139 | public String[] getTitles(String docPID, String[] sectionIDs)
|
---|
| 1140 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1141 | SAXException, IOException
|
---|
| 1142 | {
|
---|
| 1143 | String[] titles = new String[sectionIDs.length];
|
---|
| 1144 | for(int i = 0; i < titles.length; i++)
|
---|
| 1145 | titles[i] = getTitle(docPID, sectionIDs[i]);
|
---|
| 1146 | return titles;
|
---|
| 1147 | }
|
---|
| 1148 |
|
---|
| 1149 | /** @return the title for the document section denoted by the parameters.
|
---|
| 1150 | * @param docPID is a fedora pid identifying a greenstone document object.
|
---|
| 1151 | * @param sectionID identifies the particular section in the document denoted
|
---|
| 1152 | * by docPID, whose title needs to be returned. The sectionID may be either a
|
---|
| 1153 | * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
|
---|
| 1154 | public String getTitle(String docPID, String sectionID)
|
---|
| 1155 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1156 | SAXException, IOException
|
---|
| 1157 | {
|
---|
| 1158 | // Compose the itemID for the EX data stream from the number in the
|
---|
| 1159 | // sectionID:
|
---|
| 1160 | String exID = removePrefix(sectionID, SECTION);
|
---|
| 1161 | exID = EX+convertToMetaNumber(exID);
|
---|
| 1162 |
|
---|
| 1163 | // Retrieve the extracted metadata stream (EX, in XML) for the given
|
---|
| 1164 | // section
|
---|
| 1165 | String exStream = getItem(docPID, exID);
|
---|
| 1166 |
|
---|
| 1167 | // Extract the title from the XML, look for:
|
---|
| 1168 | // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
|
---|
| 1169 | InputSource source = new InputSource(new StringReader(exStream));
|
---|
| 1170 | Document doc = builder.parse(source);
|
---|
| 1171 | Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
|
---|
| 1172 | NodeList children = docEl.getElementsByTagName(
|
---|
| 1173 | EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
|
---|
| 1174 | for(int i = 0; i < children.getLength(); i++) {
|
---|
| 1175 | Element e = (Element)children.item(i);
|
---|
| 1176 | if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
|
---|
| 1177 | return FedoraCommons.getValue(e); // extract and return the title
|
---|
| 1178 | }
|
---|
| 1179 | return ""; // if we got here, then we couldn't find a title
|
---|
| 1180 | }
|
---|
| 1181 |
|
---|
| 1182 | /** @return the section's XML (as a String) as it is stored in fedora.
|
---|
| 1183 | * Works out if sectionID is a sectionName or sectionNumber.
|
---|
| 1184 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1185 | * @param sectionID - identifyies the particular section in the
|
---|
| 1186 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1187 | public String getSection(String docPID, String sectionID)
|
---|
| 1188 | throws RemoteException, UnsupportedEncodingException
|
---|
| 1189 | {
|
---|
| 1190 | if(!sectionID.startsWith(SECTION)) // then it has only section number
|
---|
| 1191 | sectionID = SECTION+sectionID;
|
---|
| 1192 |
|
---|
| 1193 | String sectionXML = this.getItem(docPID, sectionID);
|
---|
| 1194 | return sectionXML;
|
---|
| 1195 | }
|
---|
| 1196 |
|
---|
| 1197 | /** @return the required section's DC metadata XML datastream.
|
---|
| 1198 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1199 | * @param sectionID - identifyies the particular section in the
|
---|
| 1200 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1201 | public String getSectionDCMetadata(String docPID, String sectionID)
|
---|
| 1202 | throws RemoteException, UnsupportedEncodingException
|
---|
| 1203 | {
|
---|
| 1204 | String dcID = removePrefix(sectionID, SECTION);
|
---|
| 1205 | // ensure we have just the section number
|
---|
| 1206 | dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
|
---|
| 1207 |
|
---|
| 1208 | // now get the DC datastream for that number
|
---|
| 1209 | String dcXML = this.getItem(docPID, dcID);
|
---|
| 1210 | return dcXML;
|
---|
| 1211 | }
|
---|
| 1212 |
|
---|
| 1213 | /** Returns the section EX metadata XML datastream for SectionID which may be
|
---|
| 1214 | * a section name or number. Currently a few EX files are named awkwardly:
|
---|
| 1215 | * the EX file for section 1.* is actually associated with datastream EX.*.
|
---|
| 1216 | * But subsequent EX datastreams are named appropriately: for instance,
|
---|
| 1217 | * EX2.1.1 matches with section 2.1.1
|
---|
| 1218 | * @return the required section's EX metadata XML datastream.
|
---|
| 1219 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1220 | * @param sectionID - identifyies the particular section in the
|
---|
| 1221 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1222 | public String getSectionEXMetadata(String docPID, String sectionID)
|
---|
| 1223 | throws RemoteException, UnsupportedEncodingException
|
---|
| 1224 | {
|
---|
| 1225 | String exID = removePrefix(sectionID, SECTION);
|
---|
| 1226 | exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
|
---|
| 1227 |
|
---|
| 1228 | // now get the EX datastream for that for number
|
---|
| 1229 | String exXML = this.getItem(docPID, exID);
|
---|
| 1230 | return exXML;
|
---|
| 1231 | }
|
---|
| 1232 |
|
---|
| 1233 | /** @return the XML content of the TOC of just that portion of the TOC which
|
---|
| 1234 | * contains the section denoted by sectionID and its direct child subsections.
|
---|
| 1235 | * The children are returned in the order they are encountered, which
|
---|
| 1236 | * happens to be in the required order of ascending sectionID.
|
---|
| 1237 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1238 | * @param sectionID - identifyies the particular section in the
|
---|
| 1239 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1240 | public Element getChildrenOfSectionXML(String docPID, String sectionID)
|
---|
| 1241 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1242 | SAXException, IOException
|
---|
| 1243 | {
|
---|
| 1244 | // Store just the number
|
---|
| 1245 | String sectionNumber = removePrefix(sectionID, SECTION);
|
---|
| 1246 | // get the TOC XML datastream as a String
|
---|
| 1247 | String xmlTOC = getTOC(docPID);
|
---|
| 1248 |
|
---|
| 1249 | // convert it into a DOM document
|
---|
| 1250 | InputSource source = new InputSource(new StringReader(xmlTOC));
|
---|
| 1251 | Document doc = builder.parse(source);
|
---|
| 1252 | // toplevel element docEl = <Section id="1"></Section>
|
---|
| 1253 | Element docEl = doc.getDocumentElement();
|
---|
| 1254 |
|
---|
| 1255 | // check whether we're requested to return the toplevel element itself
|
---|
| 1256 | if(sectionID.equals("") || // subSection of entire docPID is requested
|
---|
| 1257 | (docEl.hasAttribute(ID) && docEl.getAttribute(ID).equals(sectionNumber)))
|
---|
| 1258 | return getSubstructure(docEl, false);
|
---|
| 1259 |
|
---|
| 1260 | // Otherwise, get all <Section> elements and find the
|
---|
| 1261 | // <Section id="sectionNumber"></Section> and return that and its
|
---|
| 1262 | // children
|
---|
| 1263 | NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
|
---|
| 1264 | for(int i = 0; i < sections.getLength(); i++) {
|
---|
| 1265 | Element e = (Element)sections.item(i);
|
---|
| 1266 | if(e.hasAttribute(ID)
|
---|
| 1267 | && e.getAttribute(ID).equals(sectionNumber))
|
---|
| 1268 | {
|
---|
| 1269 | //System.err.println("Found: " + e.getAttribute(ID));
|
---|
| 1270 | return getSubstructure(e, false); // false: get just e and children
|
---|
| 1271 | }
|
---|
| 1272 | }
|
---|
| 1273 | return null; // not found
|
---|
| 1274 | }
|
---|
| 1275 |
|
---|
| 1276 | /** @return a string representing the XML content of the TOC of just
|
---|
| 1277 | * that portion of the TOC which contains the section denoted by sectionID
|
---|
| 1278 | * and its direct child subsections.
|
---|
| 1279 | * The children are returned in the order they are encountered, which
|
---|
| 1280 | * happens to be in the required order of ascending sectionID.
|
---|
| 1281 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1282 | * @param sectionID - identifyies the particular section in the
|
---|
| 1283 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1284 | public String getChildrenOfSection(String docPID, String sectionID)
|
---|
| 1285 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1286 | SAXException, IOException, TransformerException
|
---|
| 1287 | {
|
---|
| 1288 | Element children = getChildrenOfSectionXML(docPID, sectionID);
|
---|
| 1289 | return (children == null) ? "" : FedoraCommons.elementToString(children);
|
---|
| 1290 | }
|
---|
| 1291 |
|
---|
| 1292 | /** @return the part of the TOC XML file (which outlines doc structure)
|
---|
| 1293 | * relating to the given section. This includes the section denoted by
|
---|
| 1294 | * sectionID as well as all descendent subsections thereof.
|
---|
| 1295 | * @param docPID - a fedora pid identifying a greenstone document object.
|
---|
| 1296 | * @param sectionID - identifyies the particular section in the
|
---|
| 1297 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1298 | public Element getSubsectionXML(String docPID, String sectionID)
|
---|
| 1299 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1300 | SAXException, IOException
|
---|
| 1301 | {
|
---|
| 1302 | // get the TableOfContents (TOC) XML datastream as a String
|
---|
| 1303 | String xmlTOC = getTOC(docPID);
|
---|
| 1304 |
|
---|
| 1305 | // convert it into a DOM document
|
---|
| 1306 | InputSource source = new InputSource(new StringReader(xmlTOC));
|
---|
| 1307 | Document doc = builder.parse(source);
|
---|
| 1308 | // toplevel element docEl = <Section id="1"></Section>
|
---|
| 1309 | Element docEl = doc.getDocumentElement();
|
---|
| 1310 |
|
---|
| 1311 | if(sectionID.equals("")) // subSection of entire docPID is requested
|
---|
| 1312 | return docEl;
|
---|
| 1313 |
|
---|
| 1314 | // Store just the number
|
---|
| 1315 | String sectionNumber = removePrefix(sectionID, SECTION);
|
---|
| 1316 | // Check whether we're requested to return the toplevel element itself
|
---|
| 1317 | // If sectionNumber=1, then the top-level element/document element
|
---|
| 1318 | // of the TOC XML is requested, so return the TOC as is.
|
---|
| 1319 | if(sectionNumber.equals("1"))
|
---|
| 1320 | return docEl;
|
---|
| 1321 |
|
---|
| 1322 | // Get all <Section> elements and find the
|
---|
| 1323 | // <Section id="sectionNumber"></Section> and return that
|
---|
| 1324 | NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
|
---|
| 1325 | for(int i = 0; i < sections.getLength(); i++) {
|
---|
| 1326 | Element e = (Element)sections.item(i);
|
---|
| 1327 | if(e.hasAttribute(ID)
|
---|
| 1328 | && e.getAttribute(ID).equals(sectionNumber)) {
|
---|
| 1329 | //System.err.println("Found: " + e.getAttribute(ID));
|
---|
| 1330 | return getSubstructure(e, true); // true:get all descendents
|
---|
| 1331 | }
|
---|
| 1332 | }
|
---|
| 1333 | return null; // not found
|
---|
| 1334 | }
|
---|
| 1335 |
|
---|
| 1336 | /** @return a String representation of the part of the TOC XML file
|
---|
| 1337 | * (which outlines doc structure) relating to the given section. This
|
---|
| 1338 | * includes the section denoted by sectionID as well as all descendent
|
---|
| 1339 | * subsections thereof.
|
---|
| 1340 | * @param docPID a fedora pid identifying a greenstone document object.
|
---|
| 1341 | * @param sectionID identifyies the particular section in the
|
---|
| 1342 | * document denoted by docPID, may be a section name or number. */
|
---|
| 1343 | public String getSubsection(String docPID, String sectionID)
|
---|
| 1344 | throws RemoteException, UnsupportedEncodingException, SAXException,
|
---|
| 1345 | IOException, TransformerException
|
---|
| 1346 | {
|
---|
| 1347 | // Store just the number
|
---|
| 1348 | String sectionNumber = removePrefix(sectionID, SECTION);
|
---|
| 1349 | // get the TableOfContents (TOC) XML datastream as a String
|
---|
| 1350 | String xmlTOC = getTOC(docPID);
|
---|
| 1351 |
|
---|
| 1352 | // Check whether we're requested to return the toplevel element itself
|
---|
| 1353 | // If sectionNumber=1, then the top-level element/document element
|
---|
| 1354 | // of the TOC XML is requested, so return the TOC as is.
|
---|
| 1355 | if(sectionNumber.equals("1"))
|
---|
| 1356 | return xmlTOC;
|
---|
| 1357 |
|
---|
| 1358 | // else
|
---|
| 1359 | Element subsection = getSubsectionXML(docPID, sectionID);
|
---|
| 1360 | return (subsection == null) ? "" : FedoraCommons.elementToString(subsection);
|
---|
| 1361 | }
|
---|
| 1362 |
|
---|
| 1363 | /** Implements browsing document titles of a greenstone collection stored in
|
---|
| 1364 | * the fedora repository by letter.
|
---|
| 1365 | * @return the document pids whose titles start with the given letter.
|
---|
| 1366 | * @param letter - the starting letter to browse by.
|
---|
| 1367 | */
|
---|
| 1368 | public String[] browseTitlesByLetter(final String collName, final String letter)
|
---|
| 1369 | throws RemoteException, FedoraVersionNotSupportedException
|
---|
| 1370 | {
|
---|
| 1371 | String[] pids = null;
|
---|
| 1372 |
|
---|
| 1373 | // We want to do the following kind of search (assuming letter=f
|
---|
| 1374 | // and collName=demo):
|
---|
| 1375 | // pid~greenstone:demo* title~f*
|
---|
| 1376 |
|
---|
| 1377 | // We don't need to normalise the letter first (to search titles starting
|
---|
| 1378 | // with both uppercase and lowercase versions of the letter), because
|
---|
| 1379 | // Fedora always searches for both.
|
---|
| 1380 | // HOWEVER, searching for title~f* returns all documents containing f (or F)
|
---|
| 1381 | // ANYWHERE in their titles!
|
---|
| 1382 | // SOLUTION: search the collection for all titles containing f as given,
|
---|
| 1383 | // retrieving pid and title fields. Then from the list of results, select
|
---|
| 1384 | // only those titles that start with the given letter.
|
---|
| 1385 | // This may seem an unnecessarily cumbersome job (when it looked like it
|
---|
| 1386 | // should have worked with just title~f*), BUT, at least the resulting
|
---|
| 1387 | // documents will be reduced to a set of titles containing f; rather than
|
---|
| 1388 | // having to search *all* documents in the collection.
|
---|
| 1389 | final String title = letter+WILDCARD;
|
---|
| 1390 |
|
---|
| 1391 | FieldSearchResult objects = findObjectsWithTitlesContaining(
|
---|
| 1392 | collName, title);
|
---|
| 1393 | ObjectFields[] results = objects.getResultList();
|
---|
| 1394 | TreeSet v = new TreeSet(); // TreeSet to return the results in
|
---|
| 1395 | //alphabetical order
|
---|
| 1396 | for(int i = 0; i < results.length; i++) {
|
---|
| 1397 | // from the result list, select those titles that don't
|
---|
| 1398 | // just *contain* the letter, but actually start with it:
|
---|
| 1399 | String resultTitle = results[i].getTitle(0);
|
---|
| 1400 | if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
|
---|
| 1401 | v.add(results[i].getPid());
|
---|
| 1402 | //LOG.debug(resultTitle);
|
---|
| 1403 | }
|
---|
| 1404 | }
|
---|
| 1405 | pids = new String[v.size()];
|
---|
| 1406 | v.toArray(pids);
|
---|
| 1407 | return pids;
|
---|
| 1408 | }
|
---|
| 1409 |
|
---|
| 1410 | /** Implements querying document DC titles of a greenstone collection stored in
|
---|
| 1411 | * the fedora repository for a term that may occur anywhere in their titles.
|
---|
| 1412 | * @return the document pids whose DC titles contain the parameter term.
|
---|
| 1413 | * @param titleContents - the word or phrase to search the collection's
|
---|
| 1414 | * document titles for. Only one word, and this method finds Greenstone
|
---|
| 1415 | * DOCUMENT titles CONTAINING that word (if any).
|
---|
| 1416 | * @param startsWith - if true, searches for titles that start with
|
---|
| 1417 | * titleContents. Else it searches for titles that contain titleContents. */
|
---|
| 1418 | public String[] searchDocumentTitles(String collName, String titleContents,
|
---|
| 1419 | boolean startsWith)
|
---|
| 1420 | throws RemoteException, FedoraVersionNotSupportedException
|
---|
| 1421 | {
|
---|
| 1422 | String[] pids = null;
|
---|
| 1423 |
|
---|
| 1424 | // We want to do the following kind of search (when written in Fedora's
|
---|
| 1425 | // REST format - see http://localhost:8080/fedora/search):
|
---|
| 1426 | // pid~greenstone:<colname>* title~<1st word of titleContents>
|
---|
| 1427 |
|
---|
| 1428 | // We don't need to normalise the word first (to search titles starting
|
---|
| 1429 | // with both uppercase and lowercase versions of it), because
|
---|
| 1430 | // Fedora always searches for the normalised word.
|
---|
| 1431 |
|
---|
| 1432 | // 2 difficulties:
|
---|
| 1433 | // - We can only search for single words with Fedora's Conditional Search.
|
---|
| 1434 | // Obtain pids and titles of documents containing the first word and then
|
---|
| 1435 | // we filter the titles to those containing the entire phrase of
|
---|
| 1436 | // titleContents.
|
---|
| 1437 | // - Searching for title~FirstWord returns all documents containing
|
---|
| 1438 | // this word ANYWHERE in their titles. If parameter startsWith is false,
|
---|
| 1439 | // then this is fine. But if parameter startsWith is true, then go
|
---|
| 1440 | // through all the resulting titles found (containing FirstWord), select
|
---|
| 1441 | // only pids of those titles that contain the entire phrase titleContents
|
---|
| 1442 |
|
---|
| 1443 | final String pid = GREENSTONE_+collName+WILDCARD;
|
---|
| 1444 |
|
---|
| 1445 | int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
|
---|
| 1446 | // if titleContents is a phrase (contains space), then it's not
|
---|
| 1447 | // a single word, in which case search for just the first word
|
---|
| 1448 | String title = titleContents; // assume it's a single word
|
---|
| 1449 | if(indexOfFirstSpace != -1) // if not single word but a phrase, store
|
---|
| 1450 | title = titleContents.substring(0, indexOfFirstSpace); // 1st word
|
---|
| 1451 |
|
---|
| 1452 | FieldSearchResult objects = findObjectsWithTitlesContaining(
|
---|
| 1453 | collName, title);
|
---|
| 1454 | if(objects == null) {
|
---|
| 1455 | final String[] empty = {};
|
---|
| 1456 | return empty;
|
---|
| 1457 | }
|
---|
| 1458 |
|
---|
| 1459 | // Go through all the titles found and for those that match the criteria*,
|
---|
| 1460 | // store their pid. *Criteria: titles that start with OR contain the
|
---|
| 1461 | // word OR phrase of titleContents.
|
---|
| 1462 | ObjectFields[] results = objects.getResultList();
|
---|
| 1463 | Vector v = new Vector(); // return pids in the order found
|
---|
| 1464 | for(int i = 0; i < results.length; i++) {
|
---|
| 1465 | // from the result list, select those titles that don't
|
---|
| 1466 | // just *contain* the first word, but the entire phrase of
|
---|
| 1467 | // words in titleContents:
|
---|
| 1468 | String resultTitle = results[i].getTitle(0);
|
---|
| 1469 | boolean accepted = false; // accept the resultTitle found
|
---|
| 1470 |
|
---|
| 1471 | // if titleContents is a single word and we are checking
|
---|
| 1472 | // whether resultTitle contains titleContents:
|
---|
| 1473 | if(indexOfFirstSpace == -1) { // titleContents is a single word
|
---|
| 1474 | if(!startsWith) // titles that *contain* the word titleContents
|
---|
| 1475 | accepted = true; //accept all titles found
|
---|
| 1476 | // else startWith: accept titles starting with word titleContents
|
---|
| 1477 | else if (resultTitle.toLowerCase().startsWith(
|
---|
| 1478 | titleContents.toLowerCase()))
|
---|
| 1479 | accepted = true;
|
---|
| 1480 |
|
---|
| 1481 | } else { // otherwise, titleContents is a phrase of >1 word, need
|
---|
| 1482 | // to check that the result title contains the entire phrase
|
---|
| 1483 | if(startsWith && resultTitle.toLowerCase().startsWith(
|
---|
| 1484 | titleContents.toLowerCase()))
|
---|
| 1485 | accepted = true;
|
---|
| 1486 | else if(!startsWith && resultTitle.toLowerCase().contains(
|
---|
| 1487 | titleContents.toLowerCase()))
|
---|
| 1488 | accepted = true;
|
---|
| 1489 | }
|
---|
| 1490 | // if the resultTitle fit the criteria, store its pid
|
---|
| 1491 | if(accepted) {
|
---|
| 1492 | v.add(results[i].getPid());
|
---|
| 1493 | //System.out.println(resultTitle);
|
---|
| 1494 | }
|
---|
| 1495 |
|
---|
| 1496 | }
|
---|
| 1497 | pids = new String[v.size()];
|
---|
| 1498 | v.toArray(pids);
|
---|
| 1499 | return pids;
|
---|
| 1500 | }
|
---|
| 1501 |
|
---|
| 1502 |
|
---|
| 1503 | /**
|
---|
| 1504 | * @param collName - the collection of documents we'll be searching in.
|
---|
| 1505 | * @param titleWord - the word we'll be searching the document titles for.
|
---|
| 1506 | * (Fedora's search returns all objects whose title contains that word).
|
---|
| 1507 | *
|
---|
| 1508 | * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
|
---|
| 1509 | * (see link):
|
---|
| 1510 | * <pre>
|
---|
| 1511 | * "There are two search methods: a search on all fields or a search on
|
---|
| 1512 | * specific fields. To search all fields the setTerms function of the
|
---|
| 1513 | * FieldSearchQuery must be used, with the paramater being the desired string.
|
---|
| 1514 | *
|
---|
| 1515 | * To search by specific fields, you must create an array of Condition
|
---|
| 1516 | * objects. Each condition consists of three parts:
|
---|
| 1517 | * the field to be searched (.setProperty()),
|
---|
| 1518 | * the operation to be used (.setOperator(ComparisonOperator. <operator>)),
|
---|
| 1519 | * and the search string (.setValue())"
|
---|
| 1520 | * </pre>
|
---|
| 1521 | * We want to use the second search method above when browsing and searching,
|
---|
| 1522 | * and search for: pid~greenstone:<collName>* title~<letter>*
|
---|
| 1523 | * or pid~greenstone:<collName>* title~<first word of search phrase>
|
---|
| 1524 | * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
|
---|
| 1525 | *
|
---|
| 1526 | * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
|
---|
| 1527 | * web services are defined. (The web.xml defines the "Servlets for REST-based
|
---|
| 1528 | * interfaces to the Fedora Repository Server").
|
---|
| 1529 | * Do a search on the word "search":
|
---|
| 1530 | * fedora.server.access.FieldSearchServlet is the class we need to look at
|
---|
| 1531 | * It accesses a different Condition.java class: fedora.server.search.Condition.java
|
---|
| 1532 | * The above is what is used by the REST-based interface in FieldSearchServlet.java
|
---|
| 1533 | * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
|
---|
| 1534 | * is what's used in the fedora client application that makes use of
|
---|
| 1535 | * the SOAP-based interface.
|
---|
| 1536 | *
|
---|
| 1537 | * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
|
---|
| 1538 | * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
|
---|
| 1539 | * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
|
---|
| 1540 | * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
|
---|
| 1541 | */
|
---|
| 1542 | protected FieldSearchResult findObjectsWithTitlesContaining(
|
---|
| 1543 | String collName, final String titleWord)
|
---|
| 1544 | throws RemoteException, FedoraVersionNotSupportedException
|
---|
| 1545 | {
|
---|
| 1546 | // Searching for pids of the form "greenstone:gs2mgdemo*";
|
---|
| 1547 | final String pid = GREENSTONE_+collName+WILDCARD;
|
---|
| 1548 |
|
---|
| 1549 | Condition[] conditions = new Condition[2];
|
---|
| 1550 | conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
|
---|
| 1551 | conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
|
---|
| 1552 |
|
---|
| 1553 | FieldSearchQuery query = new FieldSearchQuery();
|
---|
| 1554 | query.setConditions(conditions);
|
---|
| 1555 |
|
---|
| 1556 | // We'd like pid and title returned for each object, because we'll make
|
---|
| 1557 | // use of title. We pass maxResults=null to get all objects that match
|
---|
| 1558 | // (i.e. all collections).
|
---|
| 1559 | FieldSearchResult objects = null;
|
---|
| 1560 | try {
|
---|
| 1561 | objects = AutoFinder.findObjects(
|
---|
| 1562 | APIA, new String[]{"pid", "title"}, maxresults, query);
|
---|
| 1563 | // collection = APIA.findObjects(new String[]{"pid", "title"},
|
---|
| 1564 | // new NonNegativeInteger(Integer.toString(maxresults)), query);
|
---|
| 1565 | } catch(RemoteException ex) {
|
---|
| 1566 | if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
|
---|
| 1567 | // fedoraVersion is too low, searching/browsing is not possible
|
---|
| 1568 | // (because class Condition has changed after 2.0, from 2.1.1
|
---|
| 1569 | // onwards)
|
---|
| 1570 | throw new FedoraVersionNotSupportedException(fedoraVersion);
|
---|
| 1571 | } else {
|
---|
| 1572 | LOG.error(
|
---|
| 1573 | "Remote exception when calling web service operation " +
|
---|
| 1574 | "findObject() to execute search:\n" + ex.getMessage());
|
---|
| 1575 | ex.printStackTrace();
|
---|
| 1576 | throw ex;
|
---|
| 1577 | }
|
---|
| 1578 | }
|
---|
| 1579 | return objects; // return the FieldSearchResult objects found
|
---|
| 1580 | }
|
---|
| 1581 |
|
---|
| 1582 | /** @return the <docName> in the parameter docPID (which is of the form:
|
---|
| 1583 | * greenstone:<colname>-<docName>)
|
---|
| 1584 | * @param docPID - pid of a greenstone document in the fedora repository. */
|
---|
| 1585 | public String getDocName(String docPID) {
|
---|
| 1586 | return docPID.substring(docPID.indexOf('-')+1);
|
---|
| 1587 | }
|
---|
| 1588 |
|
---|
| 1589 | /** @return the <name> in the parameter collPID
|
---|
| 1590 | * (greenstone:<name>-collection)
|
---|
| 1591 | * If collPID is a docPID, this method does the same: return the <name>
|
---|
| 1592 | * in the docPID (greenstone:<name>-docID).
|
---|
| 1593 | * @param collPID - pid of a greenstone collection in the fedora repository. */
|
---|
| 1594 | public String getCollectionName(String collPID) {
|
---|
| 1595 | return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
|
---|
| 1596 | }
|
---|
| 1597 |
|
---|
| 1598 | /** Convert the given Element to a String representing the same XML.
|
---|
| 1599 | * @return an element containing a copy element e with either only its child
|
---|
| 1600 | * elements or with all its descendents (depending on whether parameter
|
---|
| 1601 | * descendents is true or false).
|
---|
| 1602 | * @param e - the element to start copying from.
|
---|
| 1603 | * @param descendents - if true, e is copied with all its descendetns into the
|
---|
| 1604 | * element that's returned. If false, only e and its direct children are copied
|
---|
| 1605 | * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
|
---|
| 1606 | */
|
---|
| 1607 | protected Element getSubstructure(Element e, boolean descendents)
|
---|
| 1608 | {
|
---|
| 1609 | Document doc = builder.newDocument();
|
---|
| 1610 | Node n = doc.importNode(e, descendents);
|
---|
| 1611 | // descendents=true: import/copy descendents.
|
---|
| 1612 | // Else, copy just current node e (later copy its direct children)
|
---|
| 1613 | doc.appendChild(n); // need to put the copied node into a document
|
---|
| 1614 | // else it won't have a parent doc (DOMSource can't work with it
|
---|
| 1615 | // without it having a document parent).
|
---|
| 1616 |
|
---|
| 1617 | // if we are not recursively copying all descendents, then copy just
|
---|
| 1618 | // the childnodes:
|
---|
| 1619 | if(!descendents) { // then copy just the children
|
---|
| 1620 | // get e's children and copy them into the new document
|
---|
| 1621 | NodeList children = e.getChildNodes();
|
---|
| 1622 | for(int i = 0; i < children.getLength(); i++) {
|
---|
| 1623 | // create copy
|
---|
| 1624 | n = doc.importNode(children.item(i), false);
|
---|
| 1625 | // attach it to parent
|
---|
| 1626 | doc.getDocumentElement().appendChild(n);
|
---|
| 1627 |
|
---|
| 1628 | // Now we need to indicate whether this new node (child) is a leaf
|
---|
| 1629 | // or not. (This is necessary for getChildrenOfSection(), else
|
---|
| 1630 | // it's hard to know if the children are leaves or have further
|
---|
| 1631 | // subsections.
|
---|
| 1632 | if(n.getNodeName().equals(SECTION_ELEMENT)) {
|
---|
| 1633 | // we're dealing only with section children
|
---|
| 1634 |
|
---|
| 1635 | // Check if the matching original had children:
|
---|
| 1636 | Element originalsChild = (Element)children.item(i);
|
---|
| 1637 | NodeList grandchildren =
|
---|
| 1638 | originalsChild.getElementsByTagName(SECTION_ELEMENT);
|
---|
| 1639 | if(grandchildren.getLength() > 0) {
|
---|
| 1640 | // original's child has children, so indicate this
|
---|
| 1641 | // in the copied child:
|
---|
| 1642 | Element child = (Element)n;
|
---|
| 1643 | child.setAttribute(TYPE, INTERNAL_NODE);
|
---|
| 1644 | }
|
---|
| 1645 | }
|
---|
| 1646 | }
|
---|
| 1647 | }
|
---|
| 1648 | return doc.getDocumentElement();
|
---|
| 1649 | }
|
---|
| 1650 |
|
---|
| 1651 |
|
---|
| 1652 | /**
|
---|
| 1653 | * Return a datastream of a document, given the document's id
|
---|
| 1654 | * and the item id of the datastream which is to be retrieved.
|
---|
| 1655 | * @return the XML (in String form) of the item denoted by itemID
|
---|
| 1656 | * that's part of the fedora data object denoted by docPID.
|
---|
| 1657 | * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
|
---|
| 1658 | * Can't retrieve images denoted by itemID using this method, only items
|
---|
| 1659 | * that are of XML format.
|
---|
| 1660 | * @param docPID - pid of a greenstone document in the fedora repository.
|
---|
| 1661 | * @param itemID - the itemID of a datastream of the fedora object
|
---|
| 1662 | * identified by docPID.
|
---|
| 1663 | */
|
---|
| 1664 | protected String getItem(String docPID, String itemID)
|
---|
| 1665 | throws RemoteException, UnsupportedEncodingException
|
---|
| 1666 | {
|
---|
| 1667 | // MIMETypedStream getDatastreamDissemination(
|
---|
| 1668 | // String pid, String dsID, asOfDateTime)
|
---|
| 1669 | MIMETypedStream datastream
|
---|
| 1670 | = APIA.getDatastreamDissemination(docPID, itemID, null);
|
---|
| 1671 | return new String(datastream.getStream(), UTF8);
|
---|
| 1672 | }
|
---|
| 1673 |
|
---|
| 1674 | /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
|
---|
| 1675 | * returns "1.2.1".
|
---|
| 1676 | * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
|
---|
| 1677 | * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
|
---|
| 1678 | * However, the string str is returned unchanged if the prefix does not occur
|
---|
| 1679 | * at the start of str.
|
---|
| 1680 | * @return the String parameter str without the prefix.
|
---|
| 1681 | * It can be used to return the number of an itemID of a greenstone document
|
---|
| 1682 | * stored in the fedora repository without the given prefix.
|
---|
| 1683 | * @param prefix - the prefix which ought to be removed from the itemID.
|
---|
| 1684 | * @param str - the value of the itemID.
|
---|
| 1685 | */
|
---|
| 1686 | protected String removePrefix(String str, String prefix) {
|
---|
| 1687 | // do nothing in those cases where the prefix is not in param str
|
---|
| 1688 | if(!str.startsWith(prefix))
|
---|
| 1689 | return str;
|
---|
| 1690 | // otherwise:
|
---|
| 1691 | if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
|
---|
| 1692 | return "1" + str.substring(prefix.length());
|
---|
| 1693 | } else {
|
---|
| 1694 | return str.substring(prefix.length());
|
---|
| 1695 | }
|
---|
| 1696 | }
|
---|
| 1697 |
|
---|
| 1698 | /** Given a number of the form x(.y.z), this method returns this number
|
---|
| 1699 | * as is, except when x = 1, in which case, it would return .y.z
|
---|
| 1700 | * That is, given number=3.2.1, this method would return 3.2.1
|
---|
| 1701 | * But, given number=1.2.3, this method would return .2.3.
|
---|
| 1702 | * When number=1, it is NOT a special case: "" is returned as explained.
|
---|
| 1703 | * @param number - a proper (fedora-greenstone document) section number
|
---|
| 1704 | * @return the same number as it ought to be for the associated EX, DC datastreama.
|
---|
| 1705 | */
|
---|
| 1706 | protected String convertToMetaNumber(String number) {
|
---|
| 1707 | if(number.startsWith("1.") || number.equals("1"))
|
---|
| 1708 | return number.substring(1); // remove the first char: the initial '1'
|
---|
| 1709 | else return number;
|
---|
| 1710 | }
|
---|
| 1711 |
|
---|
| 1712 | /** @return fedora's baseURL. It's of the form
|
---|
| 1713 | * "http://localhost:8080/fedora" */
|
---|
| 1714 | public String getBaseURL() { return baseURL; }
|
---|
| 1715 |
|
---|
| 1716 | /** @return the portAddressURL (in use) of the Fedora APIA
|
---|
| 1717 | * web service (should be the endpoint location in the APIA's
|
---|
| 1718 | * WSDL file).
|
---|
| 1719 | * It's usually of the form baseURL+"/services/access" */
|
---|
| 1720 | public String getPortAddressURL() {
|
---|
| 1721 | return this.baseURL + this.portAddressSuffix;
|
---|
| 1722 | }
|
---|
| 1723 |
|
---|
| 1724 | /** @return the baseURL for gsdlAssocFiles */
|
---|
| 1725 | public String getAssocFileBaseURL() { return baseURL + "/get/"; }
|
---|
| 1726 |
|
---|
| 1727 | public static void main(String args[]) {
|
---|
| 1728 | try {
|
---|
| 1729 | FedoraConnection fedoraCon
|
---|
| 1730 | = new FedoraConnection(new File("fedoraGS3.properties"));
|
---|
| 1731 |
|
---|
| 1732 | String[] pids = null;
|
---|
| 1733 | pids = fedoraCon.getCollections();
|
---|
| 1734 | String[] titles = fedoraCon.getCollectionTitles(pids);
|
---|
| 1735 | for(int i = 0; i < pids.length; i++) {
|
---|
| 1736 | System.out.println("extracted title:" + titles[i]);
|
---|
| 1737 | String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
|
---|
| 1738 | String[] docTitles = fedoraCon.getDocTitles(docPIDs);
|
---|
| 1739 | for(int j = 0; j < docPIDs.length; j++) {
|
---|
| 1740 | System.out.println("\tExtr doc title: " + docTitles[j]);
|
---|
| 1741 | }
|
---|
| 1742 | }
|
---|
| 1743 |
|
---|
| 1744 | String PID = "greenstone:gs2mgdemo-collection";
|
---|
| 1745 | String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
|
---|
| 1746 | String dcXML = fedoraCon.getDC(PID);
|
---|
| 1747 | String exXML = fedoraCon.getEX(PID);
|
---|
| 1748 | String tocXML = fedoraCon.getTOC(docPID);
|
---|
| 1749 | System.out.println("Dublin Core Metadata for " + PID
|
---|
| 1750 | + " is:\n" + dcXML);
|
---|
| 1751 | System.out.println("GS3 extracted metadata for " + PID
|
---|
| 1752 | + " is:\n" + exXML);
|
---|
| 1753 | System.out.println("Table of Contents for " + docPID
|
---|
| 1754 | + " is:\n" + tocXML);
|
---|
| 1755 |
|
---|
| 1756 |
|
---|
| 1757 | String[] sectionNames = fedoraCon.getSectionNames(docPID);
|
---|
| 1758 | System.out.println("\nSection names for " + docPID + " are:");
|
---|
| 1759 | for(int i = 0; i < sectionNames.length; i++)
|
---|
| 1760 | System.out.println(sectionNames[i]);
|
---|
| 1761 |
|
---|
| 1762 | String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
|
---|
| 1763 | //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
|
---|
| 1764 | String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
|
---|
| 1765 | System.out.println("\nSection numbers for " + docPID + " are:");
|
---|
| 1766 | for(int i = 0; i < sectionNumbers.length; i++) {
|
---|
| 1767 | //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
|
---|
| 1768 | System.out.println(sectionNames[i] + " " + sectionTitles[i]);
|
---|
| 1769 | }
|
---|
| 1770 |
|
---|
| 1771 | String sectionID = "SECTION1"; //SECTION1.5
|
---|
| 1772 | System.out.println("\n");
|
---|
| 1773 | System.out.println(sectionID+ " - entire subsection:\n"
|
---|
| 1774 | + fedoraCon.getSubsection(docPID, sectionID));
|
---|
| 1775 |
|
---|
| 1776 | System.out.println(sectionID + " and children:\n"
|
---|
| 1777 | + fedoraCon.getChildrenOfSection(docPID, sectionID));
|
---|
| 1778 |
|
---|
| 1779 | System.out.println(
|
---|
| 1780 | "browsing greenstone's gs2mgdemo collection by (first) letter F:");
|
---|
| 1781 | pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
|
---|
| 1782 | for(int i = 0; i < pids.length; i++)
|
---|
| 1783 | System.out.println(pids[i]);
|
---|
| 1784 |
|
---|
| 1785 | System.out.println(
|
---|
| 1786 | "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
|
---|
| 1787 | pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
|
---|
| 1788 | for(int i = 0; i < pids.length; i++)
|
---|
| 1789 | System.out.println(pids[i]);
|
---|
| 1790 |
|
---|
| 1791 | System.out.println("\nDone - exiting.");
|
---|
| 1792 | System.exit(0);
|
---|
| 1793 | } catch(RemoteException re) {
|
---|
| 1794 | System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
|
---|
| 1795 | re.printStackTrace();
|
---|
| 1796 | } catch(Exception e) {
|
---|
| 1797 | System.out.println("Unable to instantiate FedoraConnection\n" + e);
|
---|
| 1798 | e.printStackTrace();
|
---|
| 1799 | //LOG.error("Unable to instantiate FedoraConnection\n" + e);
|
---|
| 1800 | }
|
---|
| 1801 | }
|
---|
| 1802 | } |
---|