root/other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java @ 26270

Revision 26270, 86.2 KB (checked in by ak19, 6 years ago)

1. Now checks request for nodeStructureInfo documentType, as is needed to get it working with GS3 again. 2. Added in reusable constants of gsdl3/util/AbstractBasicDocument.java since these have now been made public constants.

Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the  * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
23import org.greenstone.gsdl3.util.GSXML;
24
25import fedora.client.utility.AutoFinder;
26import fedora.server.access.FedoraAPIAServiceLocator;
27// The object for accessing FedoraAPI-A web services:
28import fedora.server.access.FedoraAPIA;
29
30// The definitions for all complex fedora types:
31import fedora.server.types.gen.MIMETypedStream;
32import fedora.server.types.gen.RepositoryInfo;
33import fedora.server.types.gen.FieldSearchResult;
34import fedora.server.types.gen.FieldSearchQuery;
35import fedora.server.types.gen.DatastreamDef;
36import fedora.server.types.gen.ObjectFields;
37import fedora.server.types.gen.Condition;
38import fedora.server.types.gen.ComparisonOperator;
39//import fedora.server.types.gen.*;
40
41import javax.net.ssl.SSLHandshakeException;
42import java.net.ConnectException;
43import org.xml.sax.SAXException;
44import java.io.UnsupportedEncodingException;
45import java.io.IOException;
46import javax.xml.parsers.ParserConfigurationException;
47import java.net.MalformedURLException;
48import java.rmi.RemoteException;
49   
50import java.io.StringReader;
51import java.io.FileInputStream;
52import java.io.File;
53import java.util.TreeSet;
54import java.util.Properties;
55import java.util.Vector;
56
57import java.awt.GridLayout;
58import javax.swing.JLabel;
59import javax.swing.JOptionPane;
60import javax.swing.JPanel;
61import javax.swing.JPasswordField;
62import javax.swing.JTextField;
63
64import org.apache.log4j.Logger;
65import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
66import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
68import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
69import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
70
71import javax.xml.parsers.DocumentBuilderFactory;
72import javax.xml.parsers.DocumentBuilder;
73import javax.xml.transform.*;
74
75import org.xml.sax.InputSource;
76import org.w3c.dom.Document;
77import org.w3c.dom.Element;
78import org.w3c.dom.NodeList;
79import org.w3c.dom.Node;
80
81/** Class that establishes a connection with Fedora's web services (via
82 * Java stub classes for the same) and then provides methods to retrieve
83 * Greenstone-specific data, such as the TOC, EX, DC,and Section
84 * datastreams of the Greenstone documents stored in Fedora's repository.
85 * These datastreams are returned as Strings without any changes being
86 * made to them.
87 * @author ak19
88*/
89public class FedoraConnection implements FedoraGS3DL {
90    /** The logging instance for this class */
91    private static final Logger LOG = Logger.getLogger(
92              FedoraConnection.class.getName());
93   
94    /** The version of fedora that is supported by class FedoraConnection */
95        protected static final String SUPPORTED_VERSION = "3.3"; //"2.2.1";
96                                     // 3.3 works with genericSearch version 2.2
97
98    /* Some fixed strings of known literals */
99    protected static final String GET = "/get/";
100   
101    // The DemoSOAPClient declares and uses the following as a static member 
102    // Probably none of the APIA methods (web service methods) remembers   
103    // state, that might explain why we can use it as a static member then.
104    /** The object used to access the Fedora API-A web service methods */
105    protected FedoraAPIA APIA;
106
107    /** Version of the running fedora server */
108    protected String fedoraVersion;
109    /** The location of the fedora server, usually of the form: protocol://host:port/fedora
110     * e.g. (and default) http://localhost:8080/fedora */
111    protected String baseURL;
112   
113    /** The user-specified portAddressSuffix of the Fedora Access web services
114     * (endpoint URL in the WSDL), usually of the form
115     * http://localhost:8080/fedora/services/access
116     * Users can tell FedoraGS3 to try accessing that first by setting
117     * the "port.address.suffix" property in the properties file.
118     * FedoraGS3 itself will not write the portAddressSuffix currently used in
119     * the file for next time, but leave whatever value was entered in the
120     * properties file. The portAddress--not just suffix--currently in use (once
121     * the FedoraAPIA handle has been instantiated) can be obtained through
122     * getPortAddressURL() method. */
123    protected String portAddressSuffix;
124   
125    /** The part of the portAddress that comes after the baseURL. By default and
126     * usually this is: "/services/access" */
127    protected static final String defaultPortAddressSuffix = "/services/access";
128   
129    /** The preferred language of the display content */
130    protected String lang;
131    /** The maximum number of collections to retrieve */
132    protected int maxresults;
133    /** DocumentBuilder used to create and parse XML documents */
134    protected DocumentBuilder builder;
135   
136    /** Static method that returns the version of Fedora supported by this
137     * class FedoraConnection. */
138    public static String getSupportedVersion() { return SUPPORTED_VERSION; }
139    /** The version of the running Fedora server, which may or may not
140     * match the supported version. */
141    public String getFedoraVersion() { return fedoraVersion; }
142   
143    /** @return the default language used to query for titles (and anything else
144     * where there are multiple language options). Upon initialisation, this
145     * defaults to English. */
146    public String getLanguage() { return lang; }
147   
148    /** Sets the the default language used to query for titles (and anything else
149     * where there are multiple language options). If the default language for any
150     * query is not available, then English ("en") is used. If that's not available
151     * then the first other available language is used.
152     * @param lang - the two-letter language code to set the default language to.
153    */
154    public void setLanguage(String lang) { this.lang = lang; }
155   
156    /** The default maximum number of search results returned for a search. Upon
157     * initialisation, this defaults to Java's Integer.MAX_VALUE. */
158    public int getMaxResults() { return maxresults; }
159   
160    /** Set the default maximum number of search results returned for a search.
161     * @param maxresults - the new default maximum number of search results to
162     * be returned. */
163    public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
164   
165    /** Code for this constructor is from DemoSOAPClient.java.
166     * Instantiates the APIA handle using the protocol, host, port, fedora
167     * server repository username and password.
168     * @param host - the fedora server host (may be prefixed with http:// or 
169     * https:// if parameter protocol is empty). If there's no protocol, and
170     * no protocol prefixed to the host, then the protocol defaults to http. 
171     * @param protocol - either http or https (or empty "")
172     * @param port - the port on which fedora is running.
173     * @param fedoraServerUsername - the administrator username required to
174     * access the fedora server's repository. ("fedoraAdmin" unless changed).
175     * @param fedoraServerPassword - the fedora server repository's
176     * administrator password. If none was set on fedora installation, this
177     * can be empty (""). */
178    public FedoraConnection(String protocol, String host, int port,
179            String fedoraServerUsername, String fedoraServerPassword)
180        throws ParserConfigurationException, MalformedURLException,
181            SSLHandshakeException, RemoteException, AuthenticationFailedException,   
182            NotAFedoraServerException, ConnectException, Exception
183    {
184        try {
185            this.portAddressSuffix = "";
186            init(protocol, host, Integer.toString(port),
187                fedoraServerUsername, fedoraServerPassword);
188        } /*catch(RemoteException re) { //subclass of IOException
189            throw re;
190        } catch(SSLHandshakeException ssle) { //subclass of IOException
191            // this is also of type IOException
192            throw ssle;
193        }*/ catch(IOException ioe) { // connected to the wrong server
194            String exceptMsg = ioe.getMessage().toLowerCase();
195            if(exceptMsg.indexOf("request failed") != -1
196                    || exceptMsg.indexOf("404") != -1)
197                throw new NotAFedoraServerException();
198            else // the IOException is not due the cause we thought it was, so
199                throw ioe; // rethrow whatever other IOException was caught (which
200                    // could have been RemoteException or SSLHandshakeException 
201                    // or some other cause)
202        }
203    }
204   
205    /** Default constructor which takes input from the user to get host, port,
206     * fedora username and password.
207     * It keeps looping to display authentication popup, until valid values are 
208     * entered:
209     * (a) if password is wrong, a RemoteException is thrown and popup reappears;
210     * This popup keeps appearing until the password and username are correct (as
211     * long as there's indeed a fedora server listening at the given host and port).
212     * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
213     * the 'https' protocol to the host string when it should have been 'http';
214     * OR the ssl connection failed for some other reason.
215     * Allowing for the 1st case, the authentication popup is displayed just once
216     * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
217     * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
218     * it takes a long time for the SSLHandshakeException to be thrown.
219     * (c) if the connection is refused, then a ConnectException is thrown.
220     * In that case, it's 
221     * EITHER because the host and port values that were entered are wrong (and
222     * the authentication popup dialog is redisplayed just once more allowing
223     * the user to correct host/port values)
224     * OR the entered host and part were right but the fedora server at this
225     * host and port is not running.
226     * On the second consecutive attempt where a ConnectionException is thrown,
227     * it's no longer processed but rethrown, as there's no use in redisplaying 
228     * the authentication popup when the problem is not an authentication issue.
229     * (d) Another IOException (other than the SSLHandshakeException of (b))
230     * occurs when there is indeed a server listening at the host and port 
231     * entered, but it's not a Fedora server, because it is unable to process
232     * Fedora requests. If the expected message is found in the exception, than
233     * the authentication popup is displayed. However, other causes for an 
234     * IOException are not handled. In such cases, the IOException is rethrown.
235     * (Note that IOException is not in the throws clause - other causes for
236     * it being unknown, it can be be considered as the more generic Exception.
237    */
238    public FedoraConnection()
239        throws ParserConfigurationException, MalformedURLException,
240            CancelledException, ConnectException, RemoteException,
241            SSLHandshakeException, Exception
242    {
243        Properties properties = new Properties();
244        // loop to display fedora server authentication popup to
245        // get user input
246        setInitialisationProperties(properties);
247        properties = null; // finished
248    }
249   
250    /** Single argument constructor that takes the name of the properties file
251     * defining the values of the initialisation parameters required to
252     * instantiate a FedoraConnection. These are fedora server username, password,
253     * host and port. If these values are not present in the file, they are set
254     * to "" before showing the initialisation input dialog.
255     * @param propertyFile is the name of the properties file specifying the
256     * values for Fedora server username, password, host and port. */
257    public FedoraConnection(File propertyFile)
258        throws ParserConfigurationException, MalformedURLException,
259            CancelledException, ConnectException, RemoteException,
260            SSLHandshakeException, Exception
261    {
262        Properties properties = new Properties();
263        // Load the properties from the given file
264        try{
265            if(propertyFile.exists()) {
266                properties.load(new FileInputStream(propertyFile));
267            }
268        } catch(Exception e) {
269            // If the file didn't exist or could not be located,
270            // then we just continue by creating empty properties
271            LOG.warn("Exception loading from propertyFile "
272                    + propertyFile + ": " + e);
273        }
274       
275        // Go through the process of showing the initialisation dialog
276        setInitialisationProperties(properties);
277       
278        // Now let's save whatever values the user may have entered into the
279        // input dialog as the default values for next time the dialog shows
280        try {       
281            java.io.FileOutputStream out = new java.io.FileOutputStream(
282                    propertyFile); // same file as properties loading file 
283            // First make sure errormessage gets stored as "" and doesn't 
284            // cause problems next time.
285            properties.setProperty("errormessage", "");
286            // Don't save passwords
287            properties.setProperty("password", "");
288            // If the portAddressSuffix is in the file already, then it's
289            // user-specified and we shouldn't change it. But if there is no
290            // such property in the file, then create it and write it to the file
291            // with an empty string value:
292            String portSuffix = properties.getProperty("port.address.suffix");
293            if(portSuffix == null) {
294                properties.setProperty("port.address.suffix", "");
295            }
296           
297            properties.store(out, "fedoraGS3 properties"); // write properties
298            // Javadoc states that "The output stream remains open after this 
299            // method (Properties.store) returns." So we close it here
300            out.close();
301        } catch(Exception e) {
302            LOG.warn("Exception writing to propertyFile "
303                    + propertyFile + ": " + e);
304        }
305        properties = null; // finished
306    }
307   
308    /** Method that loops to display the dialog that retrieves the
309     * fedora server initialisation properties from the user. If there
310     * is a property file with values set already, it will display
311     * the previously entered values by loading them from that file.
312     * Otherwise, input fields in the dialog are empty.
313     * @param properties the Properties Hashmap storing values for
314     * username, password, host and port (and any errormessage). */
315    protected void setInitialisationProperties(Properties properties)
316        throws ParserConfigurationException, MalformedURLException,
317            CancelledException, ConnectException, RemoteException,
318            SSLHandshakeException, Exception
319    {
320        // keep looping to display authentication popup, until valid values are 
321        // entered (except when a ConnectionRefused Exception is caught - this
322        // needs to be rethrown):
323        boolean authenticated = true;
324        // reset any error messages that may have been stored (should not be
325        // the case, but if there had been any difficulty during storing, it
326        // may not have written out an empty errorMessage)
327        properties.setProperty("errormessage", "");
328        do{
329            // show the Authentication-popup:
330            // By passing the HashMap Properties, user-updated values will
331            // be persistent in the authentication-popup fields (rather than
332            // reset to the default initial values).
333            properties = showAuthenticationPopup(properties);
334            String fedoraServerUsername = properties.getProperty("username", "");
335            String fedoraServerPassword = properties.getProperty("password", "");
336            String host = properties.getProperty("host", "");
337            String port = properties.getProperty("port", "");
338            //String protocol = host.startsWith("http") ? "" : "http://";
339            String protocol = "http://";
340            if(host.startsWith("http") || host.startsWith("https"))
341                protocol = "";
342            // NOTE THAT: if a fedora server at https:// is not accessible,
343            // it takes a long time for the authentication popup to reappear.
344           
345            try{
346                this.portAddressSuffix
347                    = properties.getProperty("port.address.suffix", "");
348                // Use the FedoraClient utility to get the SOAP stub for APIA.
349                // This SOAP stub enables the client to connect to a Fedora
350                // repository via the API-A web service interface.
351                init(protocol, host, port,
352                    fedoraServerUsername, fedoraServerPassword);
353                    // will throw Exception if it can't instantiate APIA
354           
355                // if no exception thrown in the initialisation statement above,
356                // then we have been authenticated:
357                authenticated = true;
358            } catch(AuthenticationFailedException afe) {
359                authenticated = false;
360                properties.setProperty("errormessage", afe.getMessage());
361            } catch(RemoteException e) { // causes could be various
362                String reason = e.getMessage();
363                if(e.getCause() != null) {
364                    // For instance, if a ConnectException indicating
365                    // 'Connection Refused' or a java.net.UnknownHostException
366                    // caused the RemoteException
367                   
368                    // Strip out prefix "Nested exception is..." from the
369                    // encapsulating Exception's message, by using the Cause's
370                    // message. Keep Exception classname to give it some context:
371                    reason = e.getCause().getClass().getName() + ": "
372                        + e.getCause().getMessage();
373                    // Give some more information if the connection was refused.
374                    // (This can also happen when the Fedora server is not running)
375                    if(e.getCause().getClass().equals(ConnectException.class)) {
376                        reason += FedoraGS3Exception.connectionRefusedMessage;
377                    }
378                }
379                // if the message indicates that a server was running there,
380                // then we tell the user it was not a Fedora server
381                if(reason.toLowerCase().contains("404")
382                        || reason.toLowerCase().contains("request failed"))
383                {
384                    reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
385                }
386                authenticated = false;
387                properties.setProperty("errormessage", reason);
388            } catch(ConnectException e) {
389                properties.setProperty("errormessage",
390                    FedoraGS3Exception.connectionRefusedMessage);
391                authenticated = false;
392            } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
393                // be handled before IOException, as it's an IOException subclass.
394                authenticated = false;
395                properties.setProperty("errormessage",
396                    FedoraGS3Exception.sslHandshakeExceptionMessage);
397                // we won't prefix the host with http for the user, as https
398                // might be right after all, and something else might have gone
399                // during the connection attempt instead.
400                    //host = host.replace("https", "http"); //setting it for them
401                    //properties.setProperty("host", host);
402            } catch(IOException ioe) { // occurs when we try to connect to a
403                // host/port where some server other than Fedora's is listening
404                // (e.g. if we end up connecting to GS3's host and port).
405                // In that case, we can get exception messages like a 404:
406                // "Unable to instantiate FedoraConnection
407                // java.io.IOException: Request failed [404 /fedora/describe]"
408                // Test this by trying to connect to localhost at 9090 where GS3 is
409                String exceptMsg = ioe.getMessage().toLowerCase();
410                if(exceptMsg.indexOf("request failed") != -1
411                    || exceptMsg.indexOf("404") != -1)
412                {
413                    properties.setProperty("errormessage",
414                        NotAFedoraServerException.MESSAGE
415                        + "\n(" + ioe.getMessage() + ")");
416                } else if(exceptMsg.indexOf("401") != -1
417                    || exceptMsg.indexOf("500") != -1)
418                {
419                    authenticated = false;
420                    properties.setProperty("errormessage", ioe.getMessage());
421                } else { // the exception occurred for some other reason, rethrow it
422                    throw ioe;
423                }   
424            }
425        } while(!authenticated); // will keep showing popup until auhentication
426                // and connection input values are valid
427    }
428   
429    /**
430     * Static method that displays a popup to allow the user to provide Fedora 
431     * authentication (username, pwd) and connection (protocol+host, port) details.
432     * @param properties is a Properties HashMap where the property Keys which must
433     * have been put in here in advance (even with "" Values if appropriate) are:
434     * <pre>
435     * - username
436     * - password
437     * - host (may - but need not - be prefixed with either of the protocols
438     *   "http://" and "https://"
439     * - port
440     * - errorMessage (displayed near the top of the popup dialog). Can be "".
441     * </pre>
442     * The values stored in the properties HashMap for the above property are 
443     * initially displayed in the fields and the user can overwrite them.
444     * This is useful in such cases where invalid values were entered and this
445     * popup must be redisplayed to allow the user to correct their previous input.
446     * @return the same HashMap Properties which was passed as parameter. */
447    protected static Properties showAuthenticationPopup(Properties properties)
448        throws CancelledException
449    {
450        // Retrieve all the properties -- defaults to "" if any are null
451        JTextField usernameField = new JTextField(
452                properties.getProperty("username", "fedoraAdmin"));
453        JTextField passwordField = new JPasswordField(
454                properties.getProperty("password", ""));
455        JTextField hostField = new JTextField(
456                properties.getProperty("host", "localhost"));
457        JTextField portField = new JTextField(
458                properties.getProperty("port", "8080"));
459       
460        JPanel panel = new JPanel(new GridLayout(4,2));
461        panel.add(new JLabel("User Name"));
462        panel.add(usernameField);
463        panel.add(new JLabel("Password"));
464        panel.add(passwordField);
465        panel.add(new JLabel("Host"));
466        panel.add(hostField);
467        panel.add(new JLabel("Port"));
468        panel.add(portField);
469       
470        String heading = "Fedora Server Admin Authentication:";
471        String errorMessage = properties.getProperty("errormessage", "");
472        if(!errorMessage.equals("")) {
473            heading = "=> " + errorMessage + "\n\n" + heading;
474        }
475        int option = JOptionPane.showConfirmDialog(null, new Object[] {
476            heading, panel},
477            "Enter Network Password",
478            JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
479       
480        if (option == JOptionPane.OK_OPTION) {
481            String fedoraServerUsername = usernameField.getText();
482            String fedoraServerPassword = passwordField.getText();
483            String host = hostField.getText();
484            String port = portField.getText();
485            properties.setProperty("username", fedoraServerUsername);
486            properties.setProperty("password", fedoraServerPassword);
487            properties.setProperty("host", host);
488            properties.setProperty("port", port);
489        } else { // Cancel option
490            throw new CancelledException();
491        }
492        return properties;
493    }
494   
495    /** Init method that is called by the constructor to set some
496     * important member variables including instantiating the APIA object
497     * used to invoke the Fedora APIA web service operations.
498     * @param protocol can be http or https
499     * @param host is the name of the Fedora server host
500     * @param port is the port number (String form) of the Fedora server
501     * @param fedoraServerUsername is the user name to access the Fedora
502     * Server
503     * @param fedoraServerPassword is the password needed to access the
504     * Fedora Server
505    */
506    protected void init(String protocol, String host, String port,
507            String fedoraServerUsername, String fedoraServerPassword)
508        throws ParserConfigurationException, MalformedURLException,
509            AuthenticationFailedException, RemoteException, Exception
510    {
511        // initialise member variables
512        lang = ENGLISH;
513        maxresults = Integer.MAX_VALUE;
514        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
515        builder = factory.newDocumentBuilder();
516       
517        // (protocol is "" if host already contains protocol)
518        if(!protocol.equals("") && !protocol.endsWith("://"))
519            protocol += "://";
520        // now create baseURL = protocol://host:port/fedora
521        this.baseURL = protocol + host + ":" + port + "/fedora";
522       
523        // Get the FedoraAPIA handle to/stub of the Fedora web services
524        // New way of instantiating connection to Fedora is dependent on
525        // fewer files of FedoraClient.jar
526        FedoraAPIAServiceLocator serviceLocator
527            = new FedoraAPIAServiceLocator(fedoraServerUsername,
528                           fedoraServerPassword);
529       
530        APIA = null;
531        boolean isUserSpecifiedPortAddressSuffix = false;
532        // try any portAddressSuffix specified by the user
533        if(!this.portAddressSuffix.equals("")) {
534            isUserSpecifiedPortAddressSuffix = true;
535            this.createAPIA(serviceLocator, this.portAddressSuffix,
536                "user-specified", isUserSpecifiedPortAddressSuffix);
537        }
538       
539        // If the user-specified portAddressSuffix failed or if there was none 
540        // given, then APIA will be null, so we will try with the default
541        // portAddressSuffix. This time all exceptions will be passed on.
542        if(APIA == null) {
543            isUserSpecifiedPortAddressSuffix = false;
544            this.createAPIA(serviceLocator, defaultPortAddressSuffix,
545                "default", isUserSpecifiedPortAddressSuffix);
546        }
547       
548    }
549   
550    /** Tries to create the FedoraAPIA instance using the serviceLocator
551     * and the given portSuffix. The APIA instance is obtained for the
552     * baseURL+portSuffix. Any exceptions are (processed and) rethrown
553     * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
554     * Remote Exception from AXIS that it can't find the target service to
555     * invoke is ignored so that the caller can retry with the default port- 
556     * address suffix first before giving up. */
557    protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
558            String portSuffix, String messageInsert,
559            boolean isUserSpecifiedPortAddressSuffix)
560        throws Exception
561    {
562        //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
563            //  this.portAddressSuffix : defaultPortAddressSuffix;
564           
565        try {
566            LOG.debug( "Trying to connect to Fedora using the given"
567                + " baseURL and the " + messageInsert + " portAddress suffix:\n"
568                + baseURL + portSuffix);
569            APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
570                    new java.net.URL(baseURL+portSuffix));
571            // let's test whether we're authenticated (otherwise a
572            // RemoteException will be thrown to indicate that the 
573            // password was incorrect.)
574            RepositoryInfo repositoryInfo = APIA.describeRepository();
575            // throws RemoteException if pwd wrong or for other reasons
576            // in which case describeRepository() service is unavailable
577            this.fedoraVersion = repositoryInfo.getRepositoryVersion();
578            // If we come all the way here, no exceptions were thrown:
579            this.portAddressSuffix = portSuffix; // store the one currently in use
580        } catch(RemoteException re) {
581            // if we're here, then APIA was unable to call the web service
582            // If this was because the fedora authentication failed, then
583            // let's throw a custom exception
584            String message = re.getMessage().toLowerCase();
585            // Looking for something Unauthorized(401)
586            if(message.indexOf("unauthorized") != -1
587                    || message.indexOf("401") != -1)
588            {
589                throw new AuthenticationFailedException();
590            } else if(isUserSpecifiedPortAddressSuffix
591                && re.getMessage().contains(
592                    FedoraGS3Exception.missingTargetService))
593            {
594                LOG.warn("Failed to connect to Fedora APIA services at given"
595                        + " port address:\n" + portSuffix
596                        + "\nException: " + re.getMessage());
597                // APIA.describeRepository can throw a remote exception
598                // whereby AXIS says the target service is missing and can't
599                // be invoked (FedoraGS3Exception.missingTargetService)
600                // Don't rethrow this, if AXIS can't find the user-specified
601                // portAddressSuffix, we will try with the default suffix next
602                APIA = null;
603            } else { // if trying default portAddressSuffix or if any other
604                // RemoteException was generated (whose cause is something
605                // other than an authentication failure) rethrow it.
606                throw re;
607            }   
608        } catch(Exception e) { // Other Exceptions
609            // Could possibly be a ServiceException when using ServiceLocator
610            if(isUserSpecifiedPortAddressSuffix) {
611                APIA = null; // we won't throw other exceptions yet until
612                // we have tried the default PortAddressSuffix for the baseURL
613            } else {
614                throw new FedoraGS3InitFailureException(e);
615            }
616        }
617    }
618   
619    /** Gets all greenstone collections. Searches for greenstone:*-collection.
620     * Method getCollections() defaults to getting only those objects in fedora's 
621     * repository whose pids are of the format greenstone:*-collection.
622     * The use of AutoFinder and findObjects is shown in
623     * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
624     * The Fedora-APIA's method definition of findObjects is:
625     * <pre>
626     * fedora-types:FieldSearchResult findObjects(
627     *              fedora-types:ArrayOfString resultFields,
628     *              xsd:nonNegativeInteger maxResults,
629     *              fedora-types:FieldSearchQuery query )
630     * </pre>
631     * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
632     * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
633     * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
634     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html">Type definition of 2.2.1 FieldSearchQuery</a>
635     * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
636     * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
637     * 
638     * @return an array of Strings containing the pids of all collections
639     * matching the format greenstone:*-collection.
640    */
641    public String[] getCollections() throws RemoteException
642    {
643        // Available constructors:
644        // FieldSearchQuery(java.util.List conditions)
645        // FieldSearchQuery(java.lang.String terms)
646        final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
647        FieldSearchQuery query = new FieldSearchQuery();
648        query.setTerms(queryStr);
649        query.setConditions(null);
650        // we'd like pid and title returned for each object
651        // we pass maxResults=null to get all objects that match
652        // (i.e. all collections)
653        String[] pids = null;
654       
655        FieldSearchResult collection = AutoFinder.findObjects(
656                APIA, new String[]{"pid", "title"}, maxresults, query);
657        ObjectFields[] results = collection.getResultList();
658        pids = new String[results.length];
659        for(int i = 0; i < results.length; i++) {
660            pids[i] = results[i].getPid();
661        }
662        return pids;
663    }
664   
665    /** All objects (incl "greenstone:*" objects) in fedora - be they collections,   
666     * top-level documents or document sections - have a DC datastream. This
667     * method returns the content (XML) of the DC datastream as it is stored in
668     * fedora's repository.
669     * (The pid/DC call is one of the default fedora-system 3 disseminations.)
670     * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
671     * To obtain the DC/any datastream, we use method getDatastreamDissemination()
672     * of the interface FedoraAPIA. This method returns a MIMETypedStream.
673     * The method signature is:
674     * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
675     * where dsID = itemID (look at datastreams page of running fedora instance)
676     * To access the XML content of the MIMETypedObject returned, we use its method
677     * bytes[] getStream(), but when instantiating a String from this, we have to
678     * use the String() contructor where we can specify the charset encoding (in
679     * this case, it must be UTF-8). Else getStream() returns gobbledygook.
680     * @return a String version of the XML in the DC datastream for the fedora
681     * object denoted by pid.
682     * @param pid - the fedora persistent identifier for an item in the fedora
683     * repository.
684     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
685     * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
686     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
687     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
688    */
689    public String getDC(String pid)
690            throws RemoteException, UnsupportedEncodingException
691    {
692        // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
693        // datastream ID, dsID = itemID, look at a running fedora
694        MIMETypedStream dcStream
695            = APIA.getDatastreamDissemination(pid, DC, null);
696            //asOfDateTime = null to get the current version of the dataStream
697       
698        // need to set the charset encoding to UTF8
699        return new String(dcStream.getStream(), UTF8);
700    }
701   
702    /** All "greenstone:*" objects in fedora (be they collections be they
703     * collections, top-level documents or document sections) have an EX
704     * datastream. This method returns the content (XML) of the EX datastream as
705     * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
706     * @return a String version of the XML in the EX datastream for the fedora
707     * object denoted by pid.
708     * @param pid - the fedora persistent identifier for an item in the fedora
709     * repository. 
710     * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
711     * asOfDateTime).
712     * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
713     * @see String getDC(String pid) throws Exception
714     * */
715    public String getEX(String pid)
716            throws RemoteException, UnsupportedEncodingException
717    {
718        MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
719            //asOfDateTime = null to get the current version of the dataStream
720       
721        // need to set the charset encoding to UTF8
722        return new String(exStream.getStream(), UTF8);
723    }
724   
725    /** Some "greenstone:*" top-level documents in the fedora repository (but not
726     * greenstone collections or document sections) have a DLS metadata datastream.
727     * This method returns the content (XML) of the DLS datastream as is. (It calls
728     * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
729     * @return a String version of the XML in the DLS datastream for the fedora
730     * object denoted by pid, or "" if the document given by pid has no DLS datastream.
731     * @param pid - the fedora persistent identifier for an item in the fedora
732     * repository. 
733     * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
734     * asOfDateTime).
735     * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
736     * @see String getDC(String pid) throws Exception
737     * */
738    public String getDLS(String pid)
739            throws RemoteException, UnsupportedEncodingException
740    {
741        MIMETypedStream dlsStream = null;
742        // If there is no DLS datastream, it throws an exception (whose class
743        // fedora.server.errors.DatastreamNotFoundException can't be imported
744        // here (it's not in the client side fedora.server.* package, but on
745        // the server side package of that name):
746        try{
747            dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
748                //asOfDateTime=null to get the current version of the dataStream
749        } catch(RemoteException e) {
750            //These two don't work:
751            //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
752            //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
753           
754            if(e.getMessage().contains("No datastream could be returned.") || e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException")) 
755            {   // there is no DLS data stream for this document
756                return "";
757            }
758            else { // different problem, exception due to different cause
759                throw(e);
760            }
761        }
762        if(dlsStream == null)
763            return "";
764        // need to set the charset encoding to UTF8
765        return new String(dlsStream.getStream(), UTF8);
766    }
767   
768    /** All "greenstone:*" objects in fedora (be they collections or documents)
769     * have a TOC datastream, unless they have only 1 section (SECTION1).
770     * This method returns the content (XML) of the TOC datastream as is.
771     * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
772     * @return a String version of the XML in the TOC datastream for the fedora
773     * object denoted by pid.
774     * @param pid - the fedora persistent identifier for an item in the fedora
775     * repository.
776     * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
777     * asOfDateTime)
778     * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
779     * @see String getDC(String pid) throws Exception
780     * */
781    public String getTOC(String pid)
782        throws RemoteException, UnsupportedEncodingException
783    {
784        try {
785        MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
786            //asOfDateTime = null to get the current version of the dataStream
787        // need to set the charset encoding to UTF8
788        return new String(tocStream.getStream(), UTF8);
789        } catch(RemoteException re) {
790        // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
791        return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
792        }       
793    }
794   
795    /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
796     * for the collections indicated by collPIDs.
797     * @param collPIDs - an array of Strings denoting the pids for greenstone
798     * collections stored in the fedora repositoryl. These should be of the
799     * format "greenstone:&lt;collectionName&gt;-collection". */
800    public String[] getCollectionNames(String[] collPIDs) {
801        String[] collNames = new String[collPIDs.length];
802        for(int i = 0; i < collPIDs.length; i++)
803            collNames[i] = getCollectionName(collPIDs[i]);
804        return collNames;
805    }
806   
807    /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
808     * in the parameter collNames.
809     * @param collNames - a list of names of greenstone collections
810     * stored in the fedora repository. */
811    public String[] getCollectionPIDs(String[] collNames) {
812        String[] collPIDs = new String[collNames.length];
813        for(int i = 0; i < collNames.length; i++)
814            collPIDs[i] = getCollectionName(collNames[i]);
815        return collPIDs;
816    }
817   
818    /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
819     * denoted by parameter collName.
820     * @param collName - the name of a greenstone collection stored
821     * stored in the fedora repository. */
822    public String getCollectionPID(String collName) {
823        return GREENSTONE_+collName+_COLLECTION;
824    }
825   
826    /**
827     * Gets the title of the collection denoted by the given collection's pid by
828     * retrieving the title metadata for it from the collection's EX datastream.
829     * @return the title (in the default language, else English, else the
830     * first title found) for the particular collection denoted by its PID.
831     * @param collPID is the pid of a greenstone collection in the fedora
832     * repository. */
833    public String getCollectionTitle(String collPID)
834        throws RemoteException, UnsupportedEncodingException,
835            SAXException, IOException
836    {
837        String title = null; // has to be null initially, we do a check on it
838        // Parse the EX datastream (XML), and in its DOM, find the
839        // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
840        // There might be one OR several of those with attribute
841        // name="collectionname". If there's only one, then get that.
842        // If there are several, there would possibly a be qualifier attribute,
843        // in which case get qualifier=lang (where lang is the member variable)
844        // If there is no qualifier with the requested language, then get the
845        // english one which is likely to be there, else return the title for
846        // the first collectionname .
847       
848        MIMETypedStream exdata
849            = APIA.getDatastreamDissemination(collPID, EX, null);
850        String exStream = new String(exdata.getStream(), UTF8);
851       
852        InputSource source = new InputSource(new StringReader(exStream));
853        Document doc = builder.parse(source);
854        Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
855        NodeList children = docEl.getChildNodes();
856       
857        String firstName = "";
858        String englishName = "";
859        for(int i = 0; i < children.getLength(); i++ ) {
860            Node n = children.item(i);
861            if(n.getNodeType() == Node.ELEMENT_NODE) {
862                Element e = (Element)n;
863                if(e.hasAttribute(NAME)
864                        && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
865                    firstName = FedoraCommons.getValue(e);
866                    if(!e.hasAttribute(QUALIFIER)) {
867                        title = FedoraCommons.getValue(e);
868                        break;
869                    }
870                    else if(e.getAttribute(QUALIFIER).equals(lang)) {
871                        title = FedoraCommons.getValue(e);
872                        break;
873                    } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
874                        englishName = FedoraCommons.getValue(e);
875                    }
876                }
877            }
878        }
879       
880        // if the title is still not set to that of the requested language,
881        // then try setting it to the collection name in English. If English
882        // isn't available, then set it to the first collection name provided
883        // (in whichever language).
884        if(title == null) {
885            title = englishName.equals("") ? firstName : englishName;
886        }
887        doc = null;
888        return title;
889    }
890   
891    /** @return the collection titles for all the collections indicated by
892     * collPIDs.
893     * @param collPIDs - a list of pids identifying greenstone collections
894     * stored in the fedora repository. */
895    public String[] getCollectionTitles(String[] collPIDs)
896        throws RemoteException, UnsupportedEncodingException,
897            SAXException, IOException
898    {
899        String[] titles = new String[collPIDs.length];
900       
901        // parse each EX datastream (XML) which contains the gs3-extracted meta.
902        for(int i = 0; i < collPIDs.length; i++) {
903            titles[i] = getCollectionTitle(collPIDs[i]);
904        }
905        return titles;
906    }
907   
908    /** @return the title metadata for the given doc objects of a collection.
909     * These titles are returned in the same order as the given docIDs.
910     * (The docPIDs already contain the collection name anyway.)
911     * @param docPIDs - a list of pids identifying documents stored in the
912     * fedora repository. */
913    public String[] getDocTitles(String[] docPIDs)
914        throws RemoteException, UnsupportedEncodingException,
915            SAXException, IOException
916    {
917        String[] titles = new String[docPIDs.length];
918        for(int i = 0; i < docPIDs.length; i++) {
919            titles[i] = getDocTitle(docPIDs[i]);
920        }
921        return titles;
922    }
923   
924    /** Gets the title metadata for a particular doc object in a collection
925     * denoted by docPID. The docPID already contains the collection name.
926     * @return the title for the fedora document item denoted by docPID
927     * @param docPID is the pid of the document in the fedora repository
928     * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
929    public String getDocTitle(String docPID)
930        throws RemoteException, UnsupportedEncodingException,
931            SAXException, IOException
932    {
933        // We need the extracted metadata file, and find its 
934        // documentElement's child
935        // <ex:metadata name="Title">sometitle</ex:metadata>
936        // where the title we return is sometitle
937       
938        String title = "";
939        MIMETypedStream exdata
940            = APIA.getDatastreamDissemination(docPID, EX, null);
941        String exStream = new String(exdata.getStream(), UTF8);
942        return getTitle(exStream);
943    }
944   
945    /** Given a string representation of a document's or document section's 
946     * EX datastream -- which is a greenstone extracted metadata XML file --
947     * of the form:
948     * &lt;ex&gt;
949     * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
950     * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
951     * ...
952     * &lt;/ex&gt;
953     * This method finds the &lt;ex:metadata&gt; where the name="Title" and
954     * returns the value embedded in that element ('sometitle' in
955     * the example above).
956     * @return the title metadata of the document/document section whose EX
957     * datastream is passed as parameter
958     * @param exStream the EX datastream in String form of the document or
959     * document section. */
960    protected String getTitle(String exStream)
961        throws SAXException, IOException
962    {
963        String title = "";
964        InputSource source = new InputSource(new StringReader(exStream));
965        Document doc = builder.parse(source);
966        Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
967        NodeList children = docEl.getChildNodes();
968       
969        // Cycle through all the *element* children of <ex:ex></ex:ex>
970        // which are all of the form:
971        // <ex:metadata name="somename">somevalue</ex:metadata>
972        // Find the one where name="Title", its value is the title
973        for(int i = 0; i < children.getLength(); i++ ) {
974            Node n = children.item(i);
975            if(n.getNodeType() == Node.ELEMENT_NODE) {
976                Element e = (Element)n;
977                if(e.hasAttribute(NAME)
978                        && e.getAttribute(NAME).equals(TITLE)) {
979                    title = FedoraCommons.getValue(e);
980                        break;
981                }
982            }
983        }
984        return title;
985    }
986   
987
988    /** Used to obtain the dc:title value (hashID) of the DC stream of a digital
989     * object whose fedoraID is of a special sort: greenstone-http:<colname>-id. */
990    protected String getDCTitle(String fedoraPID)
991    throws RemoteException, UnsupportedEncodingException,
992            SAXException, IOException
993    {
994    String title = "";
995    MIMETypedStream dcdata = APIA.getDatastreamDissemination(fedoraPID, DC, null);
996    if(dcdata == null || dcdata.equals("")) {
997        return title;
998    }
999    String dcStream = new String(dcdata.getStream(), UTF8);
1000   
1001    InputSource source = new InputSource(new StringReader(dcStream));
1002    Document doc = builder.parse(source);
1003    Element docEl = doc.getDocumentElement(); // docEl=<oai_dc:dc></oai_dc:dc>
1004    NodeList children = docEl.getElementsByTagName("dc:title");
1005    if(children != null && children.getLength() > 0) {
1006        Node n = children.item(0); // <dc:title>
1007        Element e = (Element)n;
1008        title = FedoraCommons.getValue(e);
1009    }
1010    return title;
1011    }
1012
1013    /** @return the title metadata for the given document sections.
1014     * These titles are returned in the same order as the given docPIDs
1015     * and associated sectionIDs.
1016     * (The docPIDs already contain the collection name anyway.)
1017     * @param docPIDs - a list of pids identifying documents stored in the
1018     * fedora repository.
1019     * @param sectionIDs - a list of sectionIDs identifying individual sections
1020     * of documents stored in the fedora repository whose titles are requested. */
1021    public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
1022        throws RemoteException, UnsupportedEncodingException,
1023            SAXException, IOException
1024    {
1025        String[] titles = new String[docPIDs.length];
1026        for(int i = 0; i < docPIDs.length; i++) {
1027            titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1028        }
1029        return titles;
1030    }
1031   
1032    /** @return the title metadata for the given document section.
1033     * (The docPID already contain the collection name anyway.)
1034     * @param docPID - a pid identifying a document in the fedora repository.
1035     * @param sectionID - the sectionID of the section of the
1036     * document whose title is requested. */
1037    public String getSectionTitle(String docPID, String sectionID)
1038        throws UnsupportedEncodingException, RemoteException,
1039            SAXException, IOException 
1040    {
1041        String ex = this.getSectionEXMetadata(docPID, sectionID);
1042        return getTitle(ex);
1043    }
1044       
1045    /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1046     * returns the PIDs of the data objects found, with the exception of
1047     * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1048     * collection PID.
1049     * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1050     * (but not greenstone:&lt;colName&gt;-collection itself, because that represents 
1051     * the collection and not an object of the same collection) are returned.
1052     * All pids that do not map to a collection are assumed to be documents! 
1053     * @return a list of the pids of all the (doc) objects in a collection.
1054     * @param colPID is the pid of the greenstone collection stored in
1055     * the fedora repository. */
1056    public String[] getCollectionDocs(String colPID)
1057        throws RemoteException
1058    {
1059        String colName = getCollectionName(colPID);
1060        //LOG.debug("colName: " + colName);
1061       
1062        // Search fedora objects for pid=greenstone:<colName>-*
1063        final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1064                // searches for "greenstone:"+colName+"-*";
1065        FieldSearchQuery query = new FieldSearchQuery();
1066        query.setTerms(queryStr);
1067        query.setConditions(null);
1068        String[] pids = null;
1069       
1070        FieldSearchResult objects = AutoFinder.findObjects(
1071                APIA, new String[]{"pid", "title"}, maxresults, query);
1072        ObjectFields[] results = objects.getResultList();
1073       
1074        // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1075        // that's not a document object:
1076        pids = new String[results.length-1]; // not storing collection object
1077        int index = 0; // keeps track of docPid index
1078        for(int i = 0; i < results.length; i++) {
1079            // check it's not a collection object
1080            if(!results[i].getPid().endsWith(_COLLECTION)) {
1081                pids[index] = results[i].getPid();
1082                index++;
1083            }
1084        }
1085       
1086        return pids;
1087    }
1088
1089    /** Given the pid of a document fedora data object, this method will return
1090     * all itemIDs that are part of that data object and are Sections. For further
1091     * information see interface Comparable (implemented by String), SortedSet
1092     * and TreeSet.
1093     * @return an array of itemIDs of the Sections of the document,
1094     * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1095     * @param docPID is a fedora pid identifying a greenstone document object.
1096     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1097     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1098     * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1099    */
1100    public String[] getSectionNames(String docPID) throws RemoteException {
1101        // DatastreamDef[] listDatastreams(
1102            // java.lang.String pid, java.lang.String asOfDateTime)
1103       
1104        // listDatastreams returns information on each item (including itemID=dsID) 
1105        // in the document object indicated by docPID
1106       
1107        // Need to give an object version number, because null for asOfDateTime 
1108        // does not return any datastreams!
1109        String[] times = APIA.getObjectHistory(docPID);
1110       
1111        DatastreamDef[] datastreams = APIA.listDatastreams(
1112                docPID, times[times.length-1]);
1113       
1114        // TreeSet is a SortedSet. We're going to put Strings into it,
1115        // and Strings implement interface Comparable already.
1116        TreeSet orderedList = new TreeSet();  //TreeSet(new RankComparator())
1117        for(int i = 0; i < datastreams.length; i++) {
1118            String itemID = datastreams[i].getID();
1119            if (itemID.startsWith("SECTION"))
1120                orderedList.add(itemID);
1121        }
1122
1123        String[] sectionNames = new String[orderedList.size()];
1124        orderedList.toArray(sectionNames);
1125        orderedList = null;
1126        return sectionNames;
1127    }
1128   
1129    /** Given the pid of a document fedora data object, this method will return all
1130     * itemIDs that are part of that data object and are Sections, but just the
1131     * Section numbers are returned. For further information see interface Comparable
1132     * (implemented by String), SortedSet and TreeSet.
1133     * @return an array of itemIDs of the Section numbers of the document
1134     * indicated by docPID, in ascending order. Return values are of form: "1.*".
1135     * @param docPID is a fedora pid identifying a greenstone document object.
1136     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1137     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1138     * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a> 
1139    */
1140    public String[] getSectionNumbers(String docPID) throws RemoteException {
1141        String[] times = APIA.getObjectHistory(docPID);
1142       
1143        DatastreamDef[] datastreams
1144            = APIA.listDatastreams(docPID, times[times.length-1]);
1145        //Vector v = new Vector(datastreams.length);
1146        TreeSet orderedList = new TreeSet();
1147       
1148        for(int i = 0; i < datastreams.length; i++) {
1149            String itemID = datastreams[i].getID();
1150            if (itemID.startsWith("SECTION")) {
1151                //int index = SECTION.length();
1152                //itemID = itemID.substring(index);
1153                itemID = removePrefix(itemID, SECTION);
1154                orderedList.add(itemID);
1155            }
1156        }
1157
1158        String[] sectionNumbers = new String[orderedList.size()];
1159        orderedList.toArray(sectionNumbers);
1160        orderedList = null;
1161   
1162        return sectionNumbers;
1163    }   
1164   
1165    /** @return the titles for the document sections denoted by the parameters.
1166     * @param docPID is a fedora pid identifying a greenstone document object.
1167     * @param sectionIDs is a list of identifiers identifying sections in the
1168     * document denoted by docPID, whose titles need to be returned. Each
1169     * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1170     * or a section number (eg. 1.5.1). */
1171    public String[] getTitles(String docPID, String[] sectionIDs)
1172        throws RemoteException, UnsupportedEncodingException,
1173            SAXException, IOException
1174    {
1175        String[] titles = new String[sectionIDs.length];
1176        for(int i = 0; i < titles.length; i++)
1177            titles[i] = getTitle(docPID, sectionIDs[i]);
1178        return titles;
1179    }
1180   
1181    /** @return the title for the document section denoted by the parameters.
1182     * @param docPID is a fedora pid identifying a greenstone document object.
1183     * @param sectionID identifies the particular section in the document denoted
1184     * by docPID, whose title needs to be returned. The sectionID may be either a 
1185     * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1186    public String getTitle(String docPID, String sectionID)
1187        throws RemoteException, UnsupportedEncodingException,
1188            SAXException, IOException
1189    {
1190        // Compose the itemID for the EX data stream from the number in the
1191        // sectionID:
1192        String exID = removePrefix(sectionID, SECTION);
1193        exID = EX+convertToMetaNumber(exID);
1194       
1195        // Retrieve the extracted metadata stream (EX, in XML) for the given
1196        // section
1197        String exStream = getItem(docPID, exID);
1198       
1199        // Extract the title from the XML, look for:
1200        // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1201        InputSource source = new InputSource(new StringReader(exStream));
1202        Document doc = builder.parse(source);
1203        Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1204        NodeList children = docEl.getElementsByTagName(
1205                EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1206        for(int i = 0; i < children.getLength(); i++) {
1207            Element e = (Element)children.item(i);
1208            if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1209                return FedoraCommons.getValue(e); // extract and return the title
1210        }
1211        return ""; // if we got here, then we couldn't find a title
1212    }
1213   
1214    /** @return the section's XML (as a String) as it is stored in fedora.
1215     * Works out if sectionID is a sectionName or sectionNumber.
1216     * @param docPID - a fedora pid identifying a greenstone document object.
1217     * @param sectionID - identifyies the particular section in the
1218     * document denoted by docPID, may be a section name or number. */
1219    public String getSection(String docPID, String sectionID)
1220        throws RemoteException, UnsupportedEncodingException
1221    {
1222        if(!sectionID.startsWith(SECTION)) // then it has only section number
1223            sectionID = SECTION+sectionID;
1224       
1225        String sectionXML = this.getItem(docPID, sectionID);
1226        return sectionXML;
1227    }
1228   
1229    /** @return the required section's DC metadata XML datastream.
1230     * @param docPID - a fedora pid identifying a greenstone document object.
1231     * @param sectionID - identifyies the particular section in the
1232     * document denoted by docPID, may be a section name or number. */
1233    public String getSectionDCMetadata(String docPID, String sectionID)
1234        throws RemoteException, UnsupportedEncodingException
1235    {
1236        String dcID = removePrefix(sectionID, SECTION);
1237                // ensure we have just the section number
1238        dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1239       
1240        // now get the DC datastream for that number
1241        String dcXML = this.getItem(docPID, dcID);
1242        return dcXML;
1243    }
1244   
1245    /** Returns the section EX metadata XML datastream for SectionID which may be
1246     * a section name or number. Currently a few EX files are named awkwardly:
1247     * the EX file for section 1.* is actually associated with datastream EX.*.
1248     * But subsequent EX datastreams are named appropriately: for instance,
1249     * EX2.1.1 matches with section 2.1.1
1250     * @return the required section's EX metadata XML datastream.
1251     * @param docPID - a fedora pid identifying a greenstone document object.
1252     * @param sectionID - identifyies the particular section in the
1253     * document denoted by docPID, may be a section name or number. */
1254    public String getSectionEXMetadata(String docPID, String sectionID)
1255        throws RemoteException, UnsupportedEncodingException
1256    {
1257        String exID = removePrefix(sectionID, SECTION);
1258        exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1259               
1260        // now get the EX datastream for that for number
1261        String exXML = this.getItem(docPID, exID);
1262        return exXML;
1263    }
1264
1265    /** Given a documentNode element, adds the nodetype attribute to all of its
1266     * docNode descendants. The nodetype is either Root, Internal or Leaf to indicate
1267     * whether the docnode is a toplevel document Node, or has children or has none.
1268     * @param e - the documentNode element whose descendants' nodetypes will be set
1269     * at method's end. */
1270    protected void addNodeTypeToDescendants(Element e) {
1271    NodeList sections = e.getElementsByTagName(SECTION_ELEMENT);
1272    for(int i = 0; i < sections.getLength(); i++) {
1273        Element section = (Element)sections.item(i);
1274        NodeList descendants = section.getElementsByTagName(SECTION_ELEMENT);
1275        if(descendants.getLength() > 0) {
1276        // if there are any descendants (which includes children) that are SECTIONS
1277        section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
1278        } else {
1279        section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
1280        }
1281    }
1282    }
1283
1284
1285    /** @return the part of the TOC XML file (which outlines doc structure) 
1286     * relating to the given section. This includes the section denoted by
1287     * sectionID as well as all descendent subsections thereof.
1288     * @param docPID - a fedora pid identifying a greenstone document object.
1289     * @param sectionID - identifyies the particular section in the
1290     * document denoted by docPID, may be a section name or number.
1291     * @param structure can contain any combination of: ancestors, parent,
1292     * siblings, children, descendants, entire, specifying the portion of
1293     * the structure to retrieve.
1294     * @param info can contain any combination of: siblingPosition, numSiblings,
1295     * numChildren, documentType, requesting additional information about the structure. */
1296    public Element getSectionStructureXML(String docPID, String sectionID, String structure, String info)
1297        throws RemoteException, UnsupportedEncodingException, SAXException, IOException
1298    {
1299    // get the TableOfContents (TOC) XML datastream as a String
1300    String xmlTOC = getTOC(docPID);
1301   
1302    // convert it into a DOM document
1303    InputSource source = new InputSource(new StringReader(xmlTOC));
1304    Document doc = builder.parse(source);
1305    // toplevel element docEl = <Section id="1"></Section>
1306    Element docEl = doc.getDocumentElement();
1307    addNodeTypeToDescendants(docEl);
1308    docEl.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
1309
1310    if(structure.indexOf("entire") != -1) { // don't need to find the specific section, doc root is what's required
1311        docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1312        return docEl;
1313    }
1314
1315    if(sectionID.equals("")) {
1316        sectionID = "1";
1317    }
1318   
1319    // Store just the number
1320    String sectionNumber = removePrefix(sectionID, SECTION);
1321    // Check whether we're requested to return the toplevel element itself
1322    // If sectionNumber=1, then the top-level element/document element
1323    // of the TOC XML is requested, so return the TOC as is.   
1324    if(sectionNumber.equals("1") && structure.indexOf("descendants") != -1) {
1325        docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1326        return docEl;       
1327    }
1328   
1329    // if the root is the section required, return that
1330    if(docEl.getTagName().equals(SECTION_ELEMENT)
1331       && docEl.getAttribute(ID).equals(sectionNumber)) {
1332        Element substructure = getSubstructure(docEl, structure);
1333        return getStructureInfo(substructure.getOwnerDocument(), docEl, info);
1334        //return docEl;
1335    }
1336   
1337
1338    // Else, get all <Section> elements and find the
1339    // <Section id="sectionNumber"></Section> and return that
1340    NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1341    for(int i = 0; i < sections.getLength(); i++) {
1342       
1343        Element e = (Element)sections.item(i);
1344        if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) {
1345        Element substructure = getSubstructure(e, structure);
1346        return getStructureInfo(substructure.getOwnerDocument(), e, info);
1347        }
1348    }
1349
1350    return null; // not found
1351    }
1352
1353   
1354    /** Implements browsing document titles of a greenstone collection stored in
1355     * the fedora repository by letter.
1356     * @return the document pids whose titles start with the given letter.
1357     * @param collName - the name of the collection.
1358     * @param letter - the starting letter to browse by.
1359     */
1360    public String[] browseTitlesByLetter(final String collName, final String letter)
1361        throws RemoteException, FedoraVersionNotSupportedException
1362    {
1363        String[] pids = null;
1364         
1365        // We want to do the following kind of search (assuming letter=f
1366        // and collName=demo):
1367        // pid~greenstone:demo* title~f*
1368       
1369        // We don't need to normalise the letter first (to search titles starting
1370        // with both uppercase and lowercase versions of the letter), because 
1371        // Fedora always searches for both.
1372        // HOWEVER, searching for title~f* returns all documents containing f (or F) 
1373        // ANYWHERE in their titles!
1374        // SOLUTION: search the collection for all titles containing f as given,
1375        // retrieving pid and title fields. Then from the list of results, select
1376        // only those titles that start with the given letter.
1377        // This may seem an unnecessarily cumbersome job (when it looked like it
1378        // should have worked with just title~f*), BUT, at least the resulting
1379        // documents will be reduced to a set of titles containing f; rather than
1380        // having to search *all* documents in the collection.
1381        final String title = letter+WILDCARD;
1382       
1383        FieldSearchResult objects = findObjectsWithTitlesContaining(
1384                collName, title);
1385        ObjectFields[] results = objects.getResultList();
1386        TreeSet v = new TreeSet(); // TreeSet to return the results in
1387                                //alphabetical order
1388        for(int i = 0; i < results.length; i++) {
1389            // from the result list, select those titles that don't
1390            // just *contain* the letter, but actually start with it:
1391            String resultTitle = results[i].getTitle(0);
1392            if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1393                String pid = results[i].getPid();
1394                // skip the collection object itself
1395                if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1396                    v.add(pid);
1397                    //LOG.debug(resultTitle);
1398                }
1399            }
1400        }
1401        pids = new String[v.size()];
1402        v.toArray(pids);
1403        return pids;
1404    }
1405       
1406    /** Implements querying document DC titles of a greenstone collection stored in
1407     * the fedora repository for a term that may occur anywhere in their titles.
1408     * @return the document pids whose DC titles contain the parameter term.
1409     * @param titleContents - the word or phrase to search the collection's
1410     * document titles for. Only one word, and this method finds Greenstone
1411     * DOCUMENT titles CONTAINING that word (if any).
1412     * @param startsWith - if true, searches for titles that start with
1413     * titleContents. Else it searches for titles that contain titleContents. */
1414    public String[] searchDocumentTitles(String collName, String titleContents,
1415            boolean startsWith)
1416        throws RemoteException, FedoraVersionNotSupportedException
1417    {
1418        String[] pids = null;
1419         
1420        // We want to do the following kind of search (when written in Fedora's
1421        // REST format - see http://localhost:8080/fedora/search):
1422        // pid~greenstone:<colname>-* title~<1st word of titleContents>
1423       
1424        // We don't need to normalise the word first (to search titles starting
1425        // with both uppercase and lowercase versions of it), because 
1426        // Fedora always searches for the normalised word.
1427       
1428        // 2 difficulties:
1429        // - We can only search for single words with Fedora's Conditional Search.
1430        // Obtain pids and titles of documents containing the first word and then 
1431        // we filter the titles to those containing the entire phrase of
1432        // titleContents.
1433        // - Searching for title~FirstWord returns all documents containing 
1434        // this word ANYWHERE in their titles. If parameter startsWith is false,
1435        // then this is fine. But if parameter startsWith is true, then go 
1436        // through all the resulting titles found (containing FirstWord), select
1437        // only pids of those titles that contain the entire phrase titleContents
1438       
1439        final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1440       
1441        int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1442        // if titleContents is a phrase (contains space), then it's not 
1443        // a single word, in which case search for just the first word
1444        String title = titleContents; // assume it's a single word
1445        if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1446            title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1447   
1448        FieldSearchResult objects = findObjectsWithTitlesContaining(
1449                collName, title);
1450        if(objects == null) {
1451            final String[] empty = {};
1452            return empty;
1453        }
1454       
1455        // Go through all the titles found and for those that match the criteria*,
1456        // store their pid. *Criteria: titles that start with OR contain the
1457        // word OR phrase of titleContents.
1458        ObjectFields[] results = objects.getResultList();
1459        Vector v = new Vector(); // return pids in the order found
1460        for(int i = 0; i < results.length; i++) {
1461            // from the result list, select those titles that don't
1462            // just *contain* the first word, but the entire phrase of
1463            // words in titleContents:
1464            String resultTitle = results[i].getTitle(0);
1465            boolean accepted = false; // accept the resultTitle found
1466           
1467            String resultPID = results[i].getPid();
1468            // skip the collection object itself, since it's not a document
1469            if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1470                accepted = false;
1471            }
1472            // if titleContents is a single word and we are checking
1473            // whether resultTitle contains titleContents:
1474            else if(indexOfFirstSpace == -1) { // titleContents is a single word
1475                if(!startsWith) // titles that *contain* the word titleContents
1476                    accepted = true; //accept all titles found
1477                // else startWith: accept titles starting with word titleContents
1478                else if (resultTitle.toLowerCase().startsWith(
1479                            titleContents.toLowerCase()))
1480                    accepted = true;
1481               
1482            }
1483            else { // otherwise, titleContents is a phrase of >1 word, need
1484                // to check that the result title contains the entire phrase
1485                if(startsWith && resultTitle.toLowerCase().startsWith(
1486                        titleContents.toLowerCase()))
1487                    accepted = true;
1488                else if(!startsWith && resultTitle.toLowerCase().contains(
1489                        titleContents.toLowerCase()))
1490                    accepted = true;
1491            }
1492           
1493            // if the resultTitle fit the criteria, store its pid
1494            if(accepted) {
1495                v.add(resultPID);
1496                //System.out.println(resultTitle);
1497            }
1498           
1499        }
1500        pids = new String[v.size()];
1501        v.toArray(pids);
1502        return pids;
1503    }
1504
1505
1506    /**
1507     * @param collName - the collection of documents we'll be searching in.
1508     * @param titleWord - the word we'll be searching the document titles for.
1509     * (Fedora's search returns all objects whose title contains that word).
1510     *
1511     * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1512     * (see link):
1513     * <pre>
1514     * "There are two search methods: a search on all fields or a search on
1515     * specific fields. To search all fields the setTerms function of the
1516     * FieldSearchQuery must be used, with the paramater being the desired string.
1517     *
1518     * To search by specific fields, you must create an array of Condition
1519     * objects. Each condition consists of three parts:
1520     * the field to be searched (.setProperty()),
1521     * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1522     * and the search string (.setValue())"
1523     * </pre>
1524     * We want to use the second search method above when browsing and searching,
1525     * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1526     * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1527     * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1528     *
1529     * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1530     * web services are defined. (The web.xml defines the "Servlets for REST-based
1531     * interfaces to the Fedora Repository Server").
1532     * Do a search on the word "search":
1533     * fedora.server.access.FieldSearchServlet is the class we need to look at
1534     * It accesses a different Condition.java class: fedora.server.search.Condition.java
1535     * The above is what is used by the REST-based interface in FieldSearchServlet.java
1536     * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1537     * is what's used in the fedora client application that makes use of
1538     * the SOAP-based interface.
1539     *
1540     * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1541     * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1542     * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1543     * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1544     */
1545    protected FieldSearchResult findObjectsWithTitlesContaining(
1546            String collName, final String titleWord)
1547        throws RemoteException, FedoraVersionNotSupportedException
1548    {
1549        // Searching for pids of the form "greenstone:gs2mgdemo-*";
1550        final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1551       
1552        Condition[] conditions = new Condition[2];
1553        conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1554        conditions[1] = new Condition("title", ComparisonOperator.has, titleWord); 
1555       
1556        FieldSearchQuery query = new FieldSearchQuery();
1557        query.setConditions(conditions);
1558       
1559        // We'd like pid and title returned for each object, because we'll make
1560        // use of title. We pass maxResults=null to get all objects that match
1561        // (i.e. all collections).
1562        FieldSearchResult objects = null;
1563        final String[] retrieveFields = {"pid", "title"};
1564        try {
1565            objects = AutoFinder.findObjects(
1566                    APIA, retrieveFields, maxresults, query);
1567            // collection = APIA.findObjects(new String[]{"pid", "title"},
1568                // new NonNegativeInteger(Integer.toString(maxresults)), query);
1569        } catch(RemoteException ex) {
1570            if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1571                // fedoraVersion is too low, searching/browsing is not possible
1572                // (because class Condition has changed after 2.0, from 2.1.1
1573                // onwards)
1574                throw new FedoraVersionNotSupportedException(fedoraVersion);
1575            } else {
1576                LOG.error(
1577                    "Remote exception when calling web service operation " +
1578                    "findObject() to execute search:\n" + ex.getMessage());
1579                ex.printStackTrace();
1580                throw ex;
1581            }
1582        }
1583        return objects; // return the FieldSearchResult objects found
1584    }
1585   
1586    /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1587     *  greenstone:&lt;colname&gt;-&lt;docName&gt;)
1588     *  @param docPID - pid of a greenstone document in the fedora repository. */
1589    public String getDocName(String docPID) {
1590        return docPID.substring(docPID.indexOf('-')+1);
1591    }
1592   
1593    /** @return the &lt;name&gt; in the parameter collPID
1594     * (greenstone:&lt;name&gt;-collection)
1595     * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1596     * in the docPID (greenstone:&lt;name&gt;-docID).
1597     * @param collPID - pid of a greenstone collection in the fedora repository. */
1598    public String getCollectionName(String collPID) {
1599        return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1600    }
1601   
1602   
1603    /** Return the TOC substructure requested
1604     * @return an element containing a copy if element e with either only its child
1605     * elements or with all its descendants and/or its ancestors or only its parent
1606     * and/or its siblings (depending on what the parameter structure specifies).
1607     * @param e - the element to start copying from and whose structure is requested.
1608     * @param structure - a string containing any combination of the values:
1609     * ancestors, parent, siblings, children, descendants,
1610     * specifying the portion of the structure to retrieve.
1611     * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1612     */
1613    protected Element getSubstructure(Element original, String structure)
1614    {
1615    Document doc = builder.newDocument();
1616   
1617    boolean descendants = (structure.indexOf("descendants") != -1) ? true : false;
1618    Node current = doc.importNode(original, descendants);
1619
1620    // descendants=true: import/copy descendants.
1621    // Else, copy just current node original (later copy its direct children)   
1622   
1623    Node parentOfCurrent = null;
1624    Node parentOfOriginal = original.getParentNode();
1625    if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot)
1626        parentOfOriginal = null;
1627    }
1628   
1629    if(parentOfOriginal == null) { // no parentNode, so current is the root node.
1630        // can't get ancestors/parent/siblings, since all these need parentNode
1631        doc.appendChild(current);
1632    } else { // siblings, ancestors and parent requests all require parent node to exist
1633        // First check if we need to get ancestors, else for whether parent is required
1634        if(structure.indexOf("ancestors") != -1) {
1635        parentOfCurrent = doc.importNode(parentOfOriginal, false);
1636       
1637        Node child = null;
1638        Node parent = parentOfCurrent;      // the copy
1639        Node n = parentOfOriginal.getParentNode(); // the doc to copy from
1640       
1641        while(n != null && n != original.getOwnerDocument()) {
1642            child = parent;
1643            parent = doc.importNode(n, false); // no descendants
1644            parent.appendChild(child);
1645            n = n.getParentNode();
1646        }
1647       
1648        doc.appendChild(parent); // need to put the copied node into a document
1649        // else it won't have a parent doc (DOMSource can't work with it
1650        // without it having a document parent).
1651       
1652        } else if(structure.indexOf("parent") != -1) {
1653        parentOfCurrent = doc.importNode(parentOfOriginal, false);
1654        //parentOfCurrent.appendChild(current);
1655        doc.appendChild(parentOfCurrent);
1656        }
1657       
1658        // a request for siblings is independently tested for
1659        if(structure.indexOf("siblings") != -1) {
1660        // only import parent if we didn't already import
1661        // it for a request for ancestors or parent
1662        if(parentOfCurrent == null) {
1663            parentOfCurrent = doc.importNode(parentOfOriginal, false);
1664            doc.appendChild(parentOfCurrent);     // this becomes the root
1665        }
1666        // now the siblings of current (children of parentOfCurrent)
1667        NodeList children = parentOfOriginal.getChildNodes();           
1668        for(int i = 0; i < children.getLength(); i++) {
1669            Node n = children.item(i);
1670
1671            if(n.getNodeName().equals(SECTION_ELEMENT)) {
1672            if((Element)n != original) { // skip original which was already imported
1673                Node child = doc.importNode(n, false); // no descendants
1674                parentOfCurrent.appendChild(child);
1675            } else { // already imported Current element, insert at this position               
1676                parentOfCurrent.appendChild(current);
1677            }
1678           
1679            }           
1680        }
1681        } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests
1682        // (sibling request adds the current node into a particular position)
1683        parentOfCurrent.appendChild(current);
1684             // need to put the copied node into a document
1685             // else it won't have a parent doc (DOMSource can't work with it
1686             // without it having a document parent).
1687        } else { // when only children or descendants were requested, current becomes root document
1688        doc.appendChild(current);
1689        }
1690    }
1691
1692    // if we are not recursively copying all descendants, then copy just
1693    // the childnodes of current:
1694    if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children
1695
1696        // get e's children and copy them into the new document
1697        NodeList children = original.getChildNodes();
1698        for(int i = 0; i < children.getLength(); i++) {
1699        // create copy
1700        Node n = doc.importNode(children.item(i), false);
1701        // attach it to parent
1702        current.appendChild(n);
1703       
1704        // Now we need to indicate whether this new node (child) is a leaf
1705        // or not. (This is necessary for getChildrenOfSection(), else
1706        // it's hard to know if the children are leaves or have further
1707        // subsections.
1708        if(n.getNodeName().equals(SECTION_ELEMENT)) {
1709            // we're dealing only with section children
1710           
1711            // Check if the matching original had children:
1712            Element originalsChild = (Element)children.item(i);
1713            NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT);
1714            if(grandchildren.getLength() > 0) {
1715            // original's child has children, so indicate this
1716            // in the copied child:
1717            Element child = (Element)current;           
1718            //  child.setAttribute(TYPE, INTERNAL_NODE);
1719           
1720            }
1721        }
1722        }
1723    }
1724
1725    return doc.getDocumentElement();
1726    }   
1727
1728
1729    /** Return the TOC substructure with the requested structural info.
1730     * @return an element containing a copy if element e with either only its child
1731     * elements or with all its descendants and/or its ancestors or only its parent
1732     * and/or its siblings (depending on what the parameter structure specifies).
1733     * Returns null if the element, e, passed in is null.
1734     * @param doc - the new document into whose root element the structural information
1735     * will be inserted as attributes.
1736     * @param e - the element to start copying from and whose structure is requested.
1737     * @param info - a string containing any combination of the values: numChildren,
1738     * numSiblings, siblingPosition. The requested info gets added as attributes to
1739     * the returned root element.
1740     * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1741     */
1742    protected Element getStructureInfo(Document doc, Element e, String info)
1743    {
1744    if(e == null) {
1745        return null;
1746    }
1747   
1748    Element root = doc.getDocumentElement();
1749
1750    if(!info.equals("")) {   
1751        if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1
1752           || info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) {
1753        //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength();
1754        int numChildren = 0;
1755       
1756        NodeList children = e.getChildNodes();
1757        for(int i = 0; i < children.getLength(); i++) {
1758            Node n = children.item(i);
1759            if(n.getNodeName().equals(SECTION_ELEMENT)) {
1760            numChildren++;
1761            }
1762        }
1763       
1764        if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1) {
1765            root.setAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN, Integer.toString(numChildren));
1766        }
1767        if(info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) {
1768            //String docType = (numChildren > 0) ? "hierarchy" : "simple";
1769            String docType = "hierarchy";
1770            root.setAttribute(AbstractBasicDocument.INFO_DOC_TYPE, docType);
1771        }
1772        }
1773       
1774        if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings
1775        int numSiblings = 0;
1776        int siblingPosition = 0;
1777           
1778        Node parent = e.getParentNode();
1779        if(parent == null) {
1780            numSiblings = 0;
1781            siblingPosition = 1;
1782        } else {
1783            //numSiblings = parent.getChildNodes().getLength();
1784            NodeList siblings = parent.getChildNodes();
1785           
1786            for(int i = 0; i < siblings.getLength(); i++) {
1787            Node n = siblings.item(i);
1788            if(n.getNodeName().equals(SECTION_ELEMENT)) {
1789                if(e == (Element)n) {
1790                siblingPosition = numSiblings+1;
1791                } else { // count every sibling section element, except e itself
1792                numSiblings++;
1793                }               
1794            }
1795            }
1796        }       
1797       
1798        if(info.indexOf(AbstractBasicDocument.INFO_NUM_SIBS) != -1) {
1799            root.setAttribute(AbstractBasicDocument.INFO_NUM_SIBS, Integer.toString(numSiblings));
1800        }
1801       
1802        if(info.indexOf(AbstractBasicDocument.INFO_SIB_POS) != -1) {
1803            root.setAttribute(AbstractBasicDocument.INFO_SIB_POS, Integer.toString(siblingPosition));
1804        }
1805        }
1806    }
1807   
1808    return root;
1809    }
1810
1811
1812    /**
1813     * Return a datastream of a document, given the document's id
1814     * and the item id of the datastream which is to be retrieved.
1815     * @return the XML (in String form) of the item denoted by itemID 
1816     * that's part of the fedora data object denoted by docPID.
1817     * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1818     * Can't retrieve images denoted by itemID using this method, only items
1819     * that are of XML format.
1820     * @param docPID - pid of a greenstone document in the fedora repository.
1821     * @param itemID - the itemID of a datastream of the fedora object
1822     * identified by docPID.
1823    */
1824    protected String getItem(String docPID, String itemID) 
1825        throws RemoteException, UnsupportedEncodingException
1826    {
1827        // MIMETypedStream getDatastreamDissemination(
1828            // String pid, String dsID, asOfDateTime)
1829        MIMETypedStream datastream
1830            = APIA.getDatastreamDissemination(docPID, itemID, null);
1831        return new String(datastream.getStream(), UTF8);
1832    }
1833   
1834    /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1835     * returns "1.2.1".
1836     * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1837     * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1838     * However, the string str is returned unchanged if the prefix does not occur
1839     * at the start of str.
1840     * @return the String parameter str without the prefix.
1841     * It can be used to return the number of an itemID of a greenstone document
1842     * stored in the fedora repository without the given prefix. 
1843     * @param prefix - the prefix which ought to be removed from the itemID.
1844     * @param str - the value of the itemID.
1845    */
1846    protected String removePrefix(String str, String prefix) {
1847        // do nothing in those cases where the prefix is not in param str
1848        if(!str.startsWith(prefix))
1849            return str;
1850        // otherwise:
1851        if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1852            return "1" + str.substring(prefix.length());   
1853        } else {
1854            return str.substring(prefix.length());
1855        }
1856    }
1857       
1858    /** Given a number of the form x(.y.z), this method returns this number
1859     * as is, except when x = 1, in which case, it would return .y.z
1860     * That is, given number=3.2.1, this method would return 3.2.1
1861     * But, given number=1.2.3, this method would return .2.3.
1862     * When number=1, it is NOT a special case: "" is returned as explained.
1863     * @param number - a proper (fedora-greenstone document) section number
1864     * @return the same number as it ought to be for the associated EX, DC datastreama.
1865    */
1866    protected String convertToMetaNumber(String number) {
1867        if(number.startsWith("1.") || number.equals("1"))
1868            return number.substring(1); // remove the first char: the initial '1'
1869        else return number;
1870    }
1871   
1872    /** @return fedora's baseURL. It's of the form
1873     * "http://localhost:8080/fedora" */
1874    public String getBaseURL() { return baseURL; }
1875   
1876    /** @return the portAddressURL (in use) of the Fedora APIA
1877     * web service (should be the endpoint location in the APIA's
1878     * WSDL file).
1879     * It's usually of the form baseURL+"/services/access" */
1880    public String getPortAddressURL() {
1881        return this.baseURL + this.portAddressSuffix;
1882    }
1883   
1884    /** @return the baseURL for gsdlAssocFiles */
1885    public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1886   
1887    public static void main(String args[]) {
1888        try {
1889            FedoraConnection fedoraCon
1890                = new FedoraConnection(new File("fedoraGS3.properties"));
1891           
1892            String[] pids = null;
1893            pids = fedoraCon.getCollections();
1894            String[] titles = fedoraCon.getCollectionTitles(pids);
1895            for(int i = 0; i < pids.length; i++) {
1896                System.out.println("extracted title:" + titles[i]);
1897                String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1898                String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1899                for(int j = 0; j < docPIDs.length; j++) {
1900                    System.out.println("\tExtr doc title: " + docTitles[j]);
1901                }
1902            }   
1903           
1904            String PID = "greenstone:gs2mgdemo-collection";
1905            String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1906            String dcXML = fedoraCon.getDC(PID);
1907            String exXML = fedoraCon.getEX(PID);
1908            String tocXML = fedoraCon.getTOC(docPID);
1909            System.out.println("Dublin Core Metadata for " + PID
1910                    + " is:\n" + dcXML);
1911            System.out.println("GS3 extracted metadata for " + PID
1912                    + " is:\n" + exXML);
1913            System.out.println("Table of Contents for " + docPID
1914                    + " is:\n" + tocXML);
1915           
1916           
1917            String[] sectionNames = fedoraCon.getSectionNames(docPID);
1918            System.out.println("\nSection names for " + docPID + " are:");
1919            for(int i = 0; i < sectionNames.length; i++)
1920                System.out.println(sectionNames[i]);
1921           
1922            String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1923            //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1924            String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1925            System.out.println("\nSection numbers for " + docPID + " are:");
1926            for(int i = 0; i < sectionNumbers.length; i++) {
1927                //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1928                System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1929            }
1930           
1931            String sectionID = "SECTION1"; //SECTION1.5
1932            System.out.println("\n");
1933                   
1934            System.out.println(
1935                "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1936            pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1937            for(int i = 0; i < pids.length; i++)
1938                System.out.println(pids[i]);
1939           
1940            System.out.println(
1941                "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1942            pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1943            for(int i = 0; i < pids.length; i++)
1944                System.out.println(pids[i]);
1945           
1946            System.out.println("\nDone - exiting.");
1947            System.exit(0);
1948        } catch(RemoteException re) {
1949            System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1950            re.printStackTrace();
1951        } catch(Exception e) {
1952            System.out.println("Unable to instantiate FedoraConnection\n" + e);
1953            e.printStackTrace();
1954            //LOG.error("Unable to instantiate FedoraConnection\n" + e, e);
1955        }
1956    }
1957}
Note: See TracBrowser for help on using the browser.