source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 22300

Last change on this file since 22300 was 22300, checked in by ak19, 14 years ago
  1. Changes to get Fedora to work with Greenstone3: to let the Greenstone3 Reader Interface work with a Fedora Repository behind the scenes. 2. No longer returns XML Strings formatted for display, but unformatted, since when it's converted to XML DOM on the Greenstone end, new lines introduced due to whitespace interfere with Greenstone 3's default parsing of the XML.
File size: 84.5 KB
Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22import org.greenstone.gsdl3.util.GSXML;
23
24import fedora.client.utility.AutoFinder;
25import fedora.server.access.FedoraAPIAServiceLocator;
26// The object for accessing FedoraAPI-A web services:
27import fedora.server.access.FedoraAPIA;
28
29// The definitions for all complex fedora types:
30import fedora.server.types.gen.MIMETypedStream;
31import fedora.server.types.gen.RepositoryInfo;
32import fedora.server.types.gen.FieldSearchResult;
33import fedora.server.types.gen.FieldSearchQuery;
34import fedora.server.types.gen.DatastreamDef;
35import fedora.server.types.gen.ObjectFields;
36import fedora.server.types.gen.Condition;
37import fedora.server.types.gen.ComparisonOperator;
38//import fedora.server.types.gen.*;
39
40import javax.net.ssl.SSLHandshakeException;
41import java.net.ConnectException;
42import org.xml.sax.SAXException;
43import java.io.UnsupportedEncodingException;
44import java.io.IOException;
45import javax.xml.parsers.ParserConfigurationException;
46import java.net.MalformedURLException;
47import java.rmi.RemoteException;
48
49import java.io.StringReader;
50import java.io.FileInputStream;
51import java.io.File;
52import java.util.TreeSet;
53import java.util.Properties;
54import java.util.Vector;
55
56import java.awt.GridLayout;
57import javax.swing.JLabel;
58import javax.swing.JOptionPane;
59import javax.swing.JPanel;
60import javax.swing.JPasswordField;
61import javax.swing.JTextField;
62
63import org.apache.log4j.Logger;
64import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
65import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
66import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
68import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
69
70import javax.xml.parsers.DocumentBuilderFactory;
71import javax.xml.parsers.DocumentBuilder;
72import javax.xml.transform.*;
73
74import org.xml.sax.InputSource;
75import org.w3c.dom.Document;
76import org.w3c.dom.Element;
77import org.w3c.dom.NodeList;
78import org.w3c.dom.Node;
79
80/** Class that establishes a connection with Fedora's web services (via
81 * Java stub classes for the same) and then provides methods to retrieve
82 * Greenstone-specific data, such as the TOC, EX, DC,and Section
83 * datastreams of the Greenstone documents stored in Fedora's repository.
84 * These datastreams are returned as Strings without any changes being
85 * made to them.
86 * @author ak19
87*/
88public class FedoraConnection implements FedoraGS3DL {
89 /** The logging instance for this class */
90 private static final Logger LOG = Logger.getLogger(
91 FedoraConnection.class.getName());
92
93 /** The version of fedora that is supported by class FedoraConnection */
94 protected static final String SUPPORTED_VERSION = "2.2.1";
95
96 /* Some fixed strings of known literals */
97 protected static final String GET= "/get/";
98
99 // The DemoSOAPClient declares and uses the following as a static member
100 // Probably none of the APIA methods (web service methods) remembers
101 // state, that might explain why we can use it as a static member then.
102 /** The object used to access the Fedora API-A web service methods */
103 protected static FedoraAPIA APIA;
104
105 /** Version of the running fedora server */
106 protected String fedoraVersion;
107 /** The location of the fedora server, usually of the form
108 * http://localhost:8080/fedora */
109 protected String baseURL;
110
111 /** The user-specified portAddressSuffix of the Fedora Access web services
112 * (endpoint URL in the WSDL), usually of the form
113 * http://localhost:8080/fedora/services/access
114 * Users can tell FedoraGS3 to try accessing that first by setting
115 * the "port.address.suffix" property in the properties file.
116 * FedoraGS3 itself will not write the portAddressSuffix currently used in
117 * the file for next time, but leave whatever value was entered in the
118 * properties file. The portAddress--not just suffix--currently in use (once
119 * the FedoraAPIA handle has been instantiated) can be obtained through
120 * getPortAddressURL() method. */
121 protected String portAddressSuffix;
122
123 /** The part of the portAddress that comes after the baseURL. It is usually:
124 * "/services/access" */
125 protected static final String defaultPortAddressSuffix = "/services/access";
126
127 /** The preferred language of the displat content */
128 protected String lang;
129 /** The maximum number of collections to retrieve */
130 protected int maxresults;
131 /** DocumentBuilder used to create and parse XML documents */
132 protected DocumentBuilder builder;
133
134 /** Static method that returns the version of Fedora supported by this
135 * class FedoraConnection. */
136 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
137 /** The version of the running Fedora server, which may or may not
138 * match the supported version. */
139 public String getFedoraVersion() { return fedoraVersion; }
140
141 /** @return the default language used to query for titles (and anything else
142 * where there are multiple language options). Upon initialisation, this
143 * defaults to English. */
144 public String getLanguage() { return lang; }
145
146 /** Sets the the default language used to query for titles (and anything else
147 * where there are multiple language options). If the default language for any
148 * query is not available, then English ("en") is used. If that's not available
149 * then the first other available language is used.
150 * @param lang - the two-letter language code to set the default language to.
151 */
152 public void setLanguage(String lang) { this.lang = lang; }
153
154 /** The default maximum number of search results returned for a search. Upon
155 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
156 public int getMaxResults() { return maxresults; }
157
158 /** Set the default maximum number of search results returned for a search.
159 * @param maxresults - the new default maximum number of search results to
160 * be returned. */
161 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
162
163 /** Code for this constructor is from DemoSOAPClient.java.
164 * Instantiates the APIA handle using the protocol, host, port, fedora
165 * server repository username and password.
166 * @param host - the fedora server host (may be prefixed with http:// or
167 * https:// if parameter protocol is empty). If there's no protocol, and
168 * no protocol prefixed to the host, then the protocol defaults to http.
169 * @param protocol - either http or https (or empty "")
170 * @param port - the port on which fedora is running.
171 * @param fedoraServerUsername - the administrator username required to
172 * access the fedora server's repository. ("fedoraAdmin" unless changed).
173 * @param fedoraServerPassword - the fedora server repository's
174 * administrator password. If none was set on fedora installation, this
175 * can be empty (""). */
176 public FedoraConnection(String protocol, String host, int port,
177 String fedoraServerUsername, String fedoraServerPassword)
178 throws ParserConfigurationException, MalformedURLException,
179 SSLHandshakeException, RemoteException, AuthenticationFailedException,
180 NotAFedoraServerException, ConnectException, Exception
181 {
182 try {
183 this.portAddressSuffix = "";
184 init(protocol, host, Integer.toString(port),
185 fedoraServerUsername, fedoraServerPassword);
186 } /*catch(RemoteException re) { //subclass of IOException
187 throw re;
188 } catch(SSLHandshakeException ssle) { //subclass of IOException
189 // this is also of type IOException
190 throw ssle;
191 }*/ catch(IOException ioe) { // connected to the wrong server
192 String exceptMsg = ioe.getMessage().toLowerCase();
193 if(exceptMsg.indexOf("request failed") != -1
194 || exceptMsg.indexOf("404") != -1)
195 throw new NotAFedoraServerException();
196 else // the IOException is not due the cause we thought it was, so
197 throw ioe; // rethrow whatever other IOException was caught (which
198 // could have been RemoteException or SSLHandshakeException
199 // or some other cause)
200 }
201 }
202
203 /** Default constructor which takes input from the user to get host, port,
204 * fedora username and password.
205 * It keeps looping to display authentication popup, until valid values are
206 * entered:
207 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
208 * This popup keeps appearing until the password and username are correct (as
209 * long as there's indeed a fedora server listening at the given host and port).
210 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
211 * the 'https' protocol to the host string when it should have been 'http';
212 * OR the ssl connection failed for some other reason.
213 * Allowing for the 1st case, the authentication popup is displayed just once
214 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
215 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
216 * it takes a long time for the SSLHandshakeException to be thrown.
217 * (c) if the connection is refused, then a ConnectException is thrown.
218 * In that case, it's
219 * EITHER because the host and port values that were entered are wrong (and
220 * the authentication popup dialog is redisplayed just once more allowing
221 * the user to correct host/port values)
222 * OR the entered host and part were right but the fedora server at this
223 * host and port is not running.
224 * On the second consecutive attempt where a ConnectionException is thrown,
225 * it's no longer processed but rethrown, as there's no use in redisplaying
226 * the authentication popup when the problem is not an authentication issue.
227 * (d) Another IOException (other than the SSLHandshakeException of (b))
228 * occurs when there is indeed a server listening at the host and port
229 * entered, but it's not a Fedora server, because it is unable to process
230 * Fedora requests. If the expected message is found in the exception, than
231 * the authentication popup is displayed. However, other causes for an
232 * IOException are not handled. In such cases, the IOException is rethrown.
233 * (Note that IOException is not in the throws clause - other causes for
234 * it being unknown, it can be be considered as the more generic Exception.
235 */
236 public FedoraConnection()
237 throws ParserConfigurationException, MalformedURLException,
238 CancelledException, ConnectException, RemoteException,
239 SSLHandshakeException, Exception
240 {
241 Properties properties = new Properties();
242 // loop to display fedora server authentication popup to
243 // get user input
244 setInitialisationProperties(properties);
245 properties = null; // finished
246 }
247
248 /** Single argument constructor that takes the name of the properties file
249 * defining the values of the initialisation parameters required to
250 * instantiate a FedoraConnection. These are fedora server username, password,
251 * host and port. If these values are not present in the file, they are set
252 * to "" before showing the initialisation input dialog.
253 * @param propertyFile is the name of the properties file specifying the
254 * values for Fedora server username, password, host and port. */
255 public FedoraConnection(File propertyFile)
256 throws ParserConfigurationException, MalformedURLException,
257 CancelledException, ConnectException, RemoteException,
258 SSLHandshakeException, Exception
259 {
260 Properties properties = new Properties();
261 // Load the properties from the given file
262 try{
263 if(propertyFile.exists()) {
264 properties.load(new FileInputStream(propertyFile));
265 }
266 } catch(Exception e) {
267 // If the file didn't exist or could not be located,
268 // then we just continue by creating empty properties
269 LOG.warn("Exception loading from propertyFile "
270 + propertyFile + ": " + e);
271 }
272
273 // Go through the process of showing the initialisation dialog
274 setInitialisationProperties(properties);
275
276 // Now let's save whatever values the user may have entered into the
277 // input dialog as the default values for next time the dialog shows
278 try {
279 java.io.FileOutputStream out = new java.io.FileOutputStream(
280 propertyFile); // same file as properties loading file
281 // First make sure errormessage gets stored as "" and doesn't
282 // cause problems next time.
283 properties.setProperty("errormessage", "");
284 // Don't save passwords
285 properties.setProperty("password", "");
286 // If the portAddressSuffix is in the file already, then it's
287 // user-specified and we shouldn't change it. But if there is no
288 // such property in the file, then create it and write it to the file
289 // with an empty string value:
290 String portSuffix = properties.getProperty("port.address.suffix");
291 if(portSuffix == null) {
292 properties.setProperty("port.address.suffix", "");
293 }
294
295 properties.store(out, "fedoraGS3 properties"); // write properties
296 // Javadoc states that "The output stream remains open after this
297 // method (Properties.store) returns." So we close it here
298 out.close();
299 } catch(Exception e) {
300 LOG.warn("Exception writing to propertyFile "
301 + propertyFile + ": " + e);
302 }
303 properties = null; // finished
304 }
305
306 /** Method that loops to display the dialog that retrieves the
307 * fedora server initialisation properties from the user. If there
308 * is a property file with values set already, it will display
309 * the previously entered values by loading them from that file.
310 * Otherwise, input fields in the dialog are empty.
311 * @param properties the Properties Hashmap storing values for
312 * username, password, host and port (and any errormessage). */
313 protected void setInitialisationProperties(Properties properties)
314 throws ParserConfigurationException, MalformedURLException,
315 CancelledException, ConnectException, RemoteException,
316 SSLHandshakeException, Exception
317 {
318 // keep looping to display authentication popup, until valid values are
319 // entered (except when a ConnectionRefused Exception is caught - this
320 // needs to be rethrown):
321 boolean authenticated = true;
322 // reset any error messages that may have been stored (should not be
323 // the case, but if there had been any difficulty during storing, it
324 // may not have written out an empty errorMessage)
325 properties.setProperty("errormessage", "");
326 do{
327 // show the Authentication-popup:
328 // By passing the HashMap Properties, user-updated values will
329 // be persistent in the authentication-popup fields (rather than
330 // reset to the default initial values).
331 properties = showAuthenticationPopup(properties);
332 String fedoraServerUsername = properties.getProperty("username", "");
333 String fedoraServerPassword = properties.getProperty("password", "");
334 String host = properties.getProperty("host", "");
335 String port = properties.getProperty("port", "");
336 //String protocol = host.startsWith("http") ? "" : "http://";
337 String protocol = "http://";
338 if(host.startsWith("http") || host.startsWith("https"))
339 protocol = "";
340 // NOTE THAT: if a fedora server at https:// is not accessible,
341 // it takes a long time for the authentication popup to reappear.
342
343 try{
344 this.portAddressSuffix
345 = properties.getProperty("port.address.suffix", "");
346 // Use the FedoraClient utility to get the SOAP stub for APIA.
347 // This SOAP stub enables the client to connect to a Fedora
348 // repository via the API-A web service interface.
349 init(protocol, host, port,
350 fedoraServerUsername, fedoraServerPassword);
351 // will throw Exception if it can't instantiate APIA
352
353 // if no exception thrown in the initialisation statement above,
354 // then we have been authenticated:
355 authenticated = true;
356 } catch(AuthenticationFailedException afe) {
357 authenticated = false;
358 properties.setProperty("errormessage", afe.getMessage());
359 } catch(RemoteException e) { // causes could be various
360 String reason = e.getMessage();
361 if(e.getCause() != null) {
362 // For instance, if a ConnectException indicating
363 // 'Connection Refused' or a java.net.UnknownHostException
364 // caused the RemoteException
365
366 // Strip out prefix "Nested exception is..." from the
367 // encapsulating Exception's message, by using the Cause's
368 // message. Keep Exception classname to give it some context:
369 reason = e.getCause().getClass().getName() + ": "
370 + e.getCause().getMessage();
371 // Give some more information if the connection was refused.
372 // (This can also happen when the Fedora server is not running)
373 if(e.getCause().getClass().equals(ConnectException.class)) {
374 reason += FedoraGS3Exception.connectionRefusedMessage;
375 }
376 }
377 // if the message indicates that a server was running there,
378 // then we tell the user it was not a Fedora server
379 if(reason.toLowerCase().contains("404")
380 || reason.toLowerCase().contains("request failed"))
381 {
382 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
383 }
384 authenticated = false;
385 properties.setProperty("errormessage", reason);
386 } catch(ConnectException e) {
387 properties.setProperty("errormessage",
388 FedoraGS3Exception.connectionRefusedMessage);
389 authenticated = false;
390 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
391 // be handled before IOException, as it's an IOException subclass.
392 authenticated = false;
393 properties.setProperty("errormessage",
394 FedoraGS3Exception.sslHandshakeExceptionMessage);
395 // we won't prefix the host with http for the user, as https
396 // might be right after all, and something else might have gone
397 // during the connection attempt instead.
398 //host = host.replace("https", "http"); //setting it for them
399 //properties.setProperty("host", host);
400 } catch(IOException ioe) { // occurs when we try to connect to a
401 // host/port where some server other than Fedora's is listening
402 // (e.g. if we end up connecting to GS3's host and port).
403 // In that case, we can get exception messages like a 404:
404 // "Unable to instantiate FedoraConnection
405 // java.io.IOException: Request failed [404 /fedora/describe]"
406 // Test this by trying to connect to localhost at 9090 where GS3 is
407 String exceptMsg = ioe.getMessage().toLowerCase();
408 if(exceptMsg.indexOf("request failed") != -1
409 || exceptMsg.indexOf("404") != -1)
410 {
411 properties.setProperty("errormessage",
412 NotAFedoraServerException.MESSAGE
413 + "\n(" + ioe.getMessage() + ")");
414 } else if(exceptMsg.indexOf("401") != -1
415 || exceptMsg.indexOf("500") != -1)
416 {
417 authenticated = false;
418 properties.setProperty("errormessage", ioe.getMessage());
419 } else { // the exception occurred for some other reason, rethrow it
420 throw ioe;
421 }
422 }
423 } while(!authenticated); // will keep showing popup until auhentication
424 // and connection input values are valid
425 }
426
427 /**
428 * Static method that displays a popup to allow the user to provide Fedora
429 * authentication (username, pwd) and connection (protocol+host, port) details.
430 * @param properties is a Properties HashMap where the property Keys which must
431 * have been put in here in advance (even with "" Values if appropriate) are:
432 * <pre>
433 * - username
434 * - password
435 * - host (may - but need not - be prefixed with either of the protocols
436 * "http://" and "https://"
437 * - port
438 * - errorMessage (displayed near the top of the popup dialog). Can be "".
439 * </pre>
440 * The values stored in the properties HashMap for the above property are
441 * initially displayed in the fields and the user can overwrite them.
442 * This is useful in such cases where invalid values were entered and this
443 * popup must be redisplayed to allow the user to correct their previous input.
444 * @return the same HashMap Properties which was passed as parameter. */
445 protected static Properties showAuthenticationPopup(Properties properties)
446 throws CancelledException
447 {
448 // Retrieve all the properties -- defaults to "" if any are null
449 JTextField usernameField = new JTextField(
450 properties.getProperty("username", "fedoraAdmin"));
451 JTextField passwordField = new JPasswordField(
452 properties.getProperty("password", ""));
453 JTextField hostField = new JTextField(
454 properties.getProperty("host", "localhost"));
455 JTextField portField = new JTextField(
456 properties.getProperty("port", "8080"));
457
458 JPanel panel = new JPanel(new GridLayout(4,2));
459 panel.add(new JLabel("User Name"));
460 panel.add(usernameField);
461 panel.add(new JLabel("Password"));
462 panel.add(passwordField);
463 panel.add(new JLabel("Host"));
464 panel.add(hostField);
465 panel.add(new JLabel("Port"));
466 panel.add(portField);
467
468 String heading = "Fedora Server Admin Authentication:";
469 String errorMessage = properties.getProperty("errormessage", "");
470 if(!errorMessage.equals("")) {
471 heading = "=> " + errorMessage + "\n\n" + heading;
472 }
473 int option = JOptionPane.showConfirmDialog(null, new Object[] {
474 heading, panel},
475 "Enter Network Password",
476 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
477
478 if (option == JOptionPane.OK_OPTION) {
479 String fedoraServerUsername = usernameField.getText();
480 String fedoraServerPassword = passwordField.getText();
481 String host = hostField.getText();
482 String port = portField.getText();
483 properties.setProperty("username", fedoraServerUsername);
484 properties.setProperty("password", fedoraServerPassword);
485 properties.setProperty("host", host);
486 properties.setProperty("port", port);
487 } else { // Cancel option
488 throw new CancelledException();
489 }
490 return properties;
491 }
492
493 /** Init method that is called by the constructor to set some
494 * important member variables including instantiating the APIA object
495 * used to invoke the Fedora APIA web service operations.
496 * @param protocol can be http or https
497 * @param host is the name of the Fedora server host
498 * @param port is the port number (String form) of the Fedora server
499 * @param fedoraServerUsername is the user name to access the Fedora
500 * Server
501 * @param fedoraServerPassword is the password needed to access the
502 * Fedora Server
503 */
504 protected void init(String protocol, String host, String port,
505 String fedoraServerUsername, String fedoraServerPassword)
506 throws ParserConfigurationException, MalformedURLException,
507 AuthenticationFailedException, RemoteException, Exception
508 {
509 // initialise member variables
510 lang = ENGLISH;
511 maxresults = Integer.MAX_VALUE;
512 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
513 builder = factory.newDocumentBuilder();
514
515 // (protocol is "" if host already contains protocol)
516 if(!protocol.equals("") && !protocol.endsWith("://"))
517 protocol += "://";
518 // now create baseURL = protocol://host:port/fedora
519 this.baseURL = protocol + host + ":" + port + "/fedora";
520
521 // Get the FedoraAPIA handle to/stub of the Fedora web services
522 // New way of instantiating connection to Fedora is dependent on
523 // fewer files of FedoraClient.jar
524 FedoraAPIAServiceLocator serviceLocator
525 = new FedoraAPIAServiceLocator(fedoraServerUsername,
526 fedoraServerPassword);
527
528 APIA = null;
529 boolean isUserSpecifiedPortAddressSuffix = false;
530 // try any portAddressSuffix specified by the user
531 if(!this.portAddressSuffix.equals("")) {
532 isUserSpecifiedPortAddressSuffix = true;
533 this.createAPIA(serviceLocator, this.portAddressSuffix,
534 "user-specified", isUserSpecifiedPortAddressSuffix);
535 }
536
537 // If the user-specified portAddressSuffix failed or if there was none
538 // given, then APIA will be null, so we will try with the default
539 // portAddressSuffix. This time all exceptions will be passed on.
540 if(APIA == null) {
541 isUserSpecifiedPortAddressSuffix = false;
542 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
543 "default", isUserSpecifiedPortAddressSuffix);
544 }
545
546 }
547
548 /** Tries to create the FedoraAPIA instance using the serviceLocator
549 * and the given portSuffix. The APIA instance is obtained for the
550 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
551 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
552 * Remote Exception from AXIS that it can't find the target service to
553 * invoke is ignored so that the caller can retry with the default port-
554 * address suffix first before giving up. */
555 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
556 String portSuffix, String messageInsert,
557 boolean isUserSpecifiedPortAddressSuffix)
558 throws Exception
559 {
560 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
561 // this.portAddressSuffix : defaultPortAddressSuffix;
562
563 try {
564 LOG.debug( "Trying to connect to Fedora using the given"
565 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
566 + baseURL + portSuffix);
567 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
568 new java.net.URL(baseURL+portSuffix));
569 // let's test whether we're authenticated (otherwise a
570 // RemoteException will be thrown to indicate that the
571 // password was incorrect.)
572 RepositoryInfo repositoryInfo = APIA.describeRepository();
573 // throws RemoteException if pwd wrong or for other reasons
574 // in which case describeRepository() service is unavailable
575 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
576 // If we come all the way here, no exceptions were thrown:
577 this.portAddressSuffix = portSuffix; // store the one currently in use
578 } catch(RemoteException re) {
579 // if we're here, then APIA was unable to call the web service
580 // If this was because the fedora authentication failed, then
581 // let's throw a custom exception
582 String message = re.getMessage().toLowerCase();
583 // Looking for something Unauthorized(401)
584 if(message.indexOf("unauthorized") != -1
585 || message.indexOf("401") != -1)
586 {
587 throw new AuthenticationFailedException();
588 } else if(isUserSpecifiedPortAddressSuffix
589 && re.getMessage().contains(
590 FedoraGS3Exception.missingTargetService))
591 {
592 LOG.warn("Failed to connect to Fedora APIA services at given"
593 + " port address:\n" + portSuffix
594 + "\nException: " + re.getMessage());
595 // APIA.describeRepository can throw a remote exception
596 // whereby AXIS says the target service is missing and can't
597 // be invoked (FedoraGS3Exception.missingTargetService)
598 // Don't rethrow this, if AXIS can't find the user-specified
599 // portAddressSuffix, we will try with the default suffix next
600 APIA = null;
601 } else { // if trying default portAddressSuffix or if any other
602 // RemoteException was generated (whose cause is something
603 // other than an authentication failure) rethrow it.
604 throw re;
605 }
606 } catch(Exception e) { // Other Exceptions
607 // Could possibly be a ServiceException when using ServiceLocator
608 if(isUserSpecifiedPortAddressSuffix) {
609 APIA = null; // we won't throw other exceptions yet until
610 // we have tried the default PortAddressSuffix for the baseURL
611 } else {
612 throw new FedoraGS3InitFailureException(e);
613 }
614 }
615 }
616
617 /** Gets all greenstone collections. Searches for greenstone:*-collection.
618 * Method getCollections() defaults to getting only those objects in fedora's
619 * repository whose pids are of the format greenstone:*-collection.
620 * The use of AutoFinder and findObjects is shown in
621 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
622 * The Fedora-APIA's method definition of findObjects is:
623 * <pre>
624 * fedora-types:FieldSearchResult findObjects(
625 * fedora-types:ArrayOfString resultFields,
626 * xsd:nonNegativeInteger maxResults,
627 * fedora-types:FieldSearchQuery query )
628 * </pre>
629 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
630 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
631 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
632 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html">Type definition of 2.2.1 FieldSearchQuery</a>
633 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
634 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
635 *
636 * @return an array of Strings containing the pids of all collections
637 * matching the format greenstone:*-collection.
638 */
639 public String[] getCollections() throws RemoteException
640 {
641 // Available constructors:
642 // FieldSearchQuery(java.util.List conditions)
643 // FieldSearchQuery(java.lang.String terms)
644 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
645 FieldSearchQuery query = new FieldSearchQuery();
646 query.setTerms(queryStr);
647 query.setConditions(null);
648 // we'd like pid and title returned for each object
649 // we pass maxResults=null to get all objects that match
650 // (i.e. all collections)
651 String[] pids = null;
652
653 FieldSearchResult collection = AutoFinder.findObjects(
654 APIA, new String[]{"pid", "title"}, maxresults, query);
655 ObjectFields[] results = collection.getResultList();
656 pids = new String[results.length];
657 for(int i = 0; i < results.length; i++) {
658 pids[i] = results[i].getPid();
659 }
660 return pids;
661 }
662
663 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
664 * top-level documents or document sections - have a DC datastream. This
665 * method returns the content (XML) of the DC datastream as it is stored in
666 * fedora's repository.
667 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
668 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
669 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
670 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
671 * The method signature is:
672 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
673 * where dsID = itemID (look at datastreams page of running fedora instance)
674 * To access the XML content of the MIMETypedObject returned, we use its method
675 * bytes[] getStream(), but when instantiating a String from this, we have to
676 * use the String() contructor where we can specify the charset encoding (in
677 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
678 * @return a String version of the XML in the DC datastream for the fedora
679 * object denoted by pid.
680 * @param pid - the fedora persistent identifier for an item in the fedora
681 * repository.
682 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
683 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
684 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
685 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
686 */
687 public String getDC(String pid)
688 throws RemoteException, UnsupportedEncodingException
689 {
690 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
691 // datastream ID, dsID = itemID, look at a running fedora
692 MIMETypedStream dcStream
693 = APIA.getDatastreamDissemination(pid, DC, null);
694 //asOfDateTime = null to get the current version of the dataStream
695
696 // need to set the charset encoding to UTF8
697 return new String(dcStream.getStream(), UTF8);
698 }
699
700 /** All "greenstone:*" objects in fedora (be they collections be they
701 * collections, top-level documents or document sections) have an EX
702 * datastream. This method returns the content (XML) of the EX datastream as
703 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
704 * @return a String version of the XML in the EX datastream for the fedora
705 * object denoted by pid.
706 * @param pid - the fedora persistent identifier for an item in the fedora
707 * repository.
708 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
709 * asOfDateTime).
710 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
711 * @see String getDC(String pid) throws Exception
712 * */
713 public String getEX(String pid)
714 throws RemoteException, UnsupportedEncodingException
715 {
716 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
717 //asOfDateTime = null to get the current version of the dataStream
718
719 // need to set the charset encoding to UTF8
720 return new String(exStream.getStream(), UTF8);
721 }
722
723 /** Some "greenstone:*" top-level documents in the fedora repository (but not
724 * greenstone collections or document sections) have a DLS metadata datastream.
725 * This method returns the content (XML) of the DLS datastream as is. (It calls
726 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
727 * @return a String version of the XML in the DLS datastream for the fedora
728 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
729 * @param pid - the fedora persistent identifier for an item in the fedora
730 * repository.
731 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
732 * asOfDateTime).
733 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
734 * @see String getDC(String pid) throws Exception
735 * */
736 public String getDLS(String pid)
737 throws RemoteException, UnsupportedEncodingException
738 {
739 MIMETypedStream dlsStream = null;
740 // If there is no DLS datastream, it throws an exception (whose class
741 // fedora.server.errors.DatastreamNotFoundException can't be imported
742 // here (it's not in the client side fedora.server.* package, but on
743 // the server side package of that name):
744 try{
745 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
746 //asOfDateTime=null to get the current version of the dataStream
747 } catch(RemoteException e) {
748 //These two don't work:
749 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
750 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
751
752 if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
753 { // there is no DLS data stream for this document
754 return "";
755 }
756 else { // different problem, exception due to different cause
757 throw(e);
758 }
759 }
760 if(dlsStream == null)
761 return "";
762 // need to set the charset encoding to UTF8
763 return new String(dlsStream.getStream(), UTF8);
764 }
765
766 /** All "greenstone:*" objects in fedora (be they collections or documents)
767 * have a TOC datastream, unless they have only 1 section (SECTION1).
768 * This method returns the content (XML) of the TOC datastream as is.
769 * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
770 * @return a String version of the XML in the TOC datastream for the fedora
771 * object denoted by pid.
772 * @param pid - the fedora persistent identifier for an item in the fedora
773 * repository.
774 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
775 * asOfDateTime)
776 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
777 * @see String getDC(String pid) throws Exception
778 * */
779 public String getTOC(String pid)
780 throws RemoteException, UnsupportedEncodingException
781 {
782 try {
783 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
784 //asOfDateTime = null to get the current version of the dataStream
785 // need to set the charset encoding to UTF8
786 return new String(tocStream.getStream(), UTF8);
787 } catch(RemoteException re) {
788 // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
789 return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
790 }
791 }
792
793 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
794 * for the collections indicated by collPIDs.
795 * @param collPIDs - an array of Strings denoting the pids for greenstone
796 * collections stored in the fedora repositoryl. These should be of the
797 * format "greenstone:&lt;collectionName&gt;-collection". */
798 public String[] getCollectionNames(String[] collPIDs) {
799 String[] collNames = new String[collPIDs.length];
800 for(int i = 0; i < collPIDs.length; i++)
801 collNames[i] = getCollectionName(collPIDs[i]);
802 return collNames;
803 }
804
805 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
806 * in the parameter collNames.
807 * @param collNames - a list of names of greenstone collections
808 * stored in the fedora repository. */
809 public String[] getCollectionPIDs(String[] collNames) {
810 String[] collPIDs = new String[collNames.length];
811 for(int i = 0; i < collNames.length; i++)
812 collPIDs[i] = getCollectionName(collNames[i]);
813 return collPIDs;
814 }
815
816 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
817 * denoted by parameter collName.
818 * @param collName - the name of a greenstone collection stored
819 * stored in the fedora repository. */
820 public String getCollectionPID(String collName) {
821 return GREENSTONE_+collName+_COLLECTION;
822 }
823
824 /**
825 * Gets the title of the collection denoted by the given collection's pid by
826 * retrieving the title metadata for it from the collection's EX datastream.
827 * @return the title (in the default language, else English, else the
828 * first title found) for the particular collection denoted by its PID.
829 * @param collPID is the pid of a greenstone collection in the fedora
830 * repository. */
831 public String getCollectionTitle(String collPID)
832 throws RemoteException, UnsupportedEncodingException,
833 SAXException, IOException
834 {
835 String title = null; // has to be null initially, we do a check on it
836 // Parse the EX datastream (XML), and in its DOM, find the
837 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
838 // There might be one OR several of those with attribute
839 // name="collectionname". If there's only one, then get that.
840 // If there are several, there would possibly a be qualifier attribute,
841 // in which case get qualifier=lang (where lang is the member variable)
842 // If there is no qualifier with the requested language, then get the
843 // english one which is likely to be there, else return the title for
844 // the first collectionname .
845
846 MIMETypedStream exdata
847 = APIA.getDatastreamDissemination(collPID, EX, null);
848 String exStream = new String(exdata.getStream(), UTF8);
849
850 InputSource source = new InputSource(new StringReader(exStream));
851 Document doc = builder.parse(source);
852 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
853 NodeList children = docEl.getChildNodes();
854
855 String firstName = "";
856 String englishName = "";
857 for(int i = 0; i < children.getLength(); i++ ) {
858 Node n = children.item(i);
859 if(n.getNodeType() == Node.ELEMENT_NODE) {
860 Element e = (Element)n;
861 if(e.hasAttribute(NAME)
862 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
863 firstName = FedoraCommons.getValue(e);
864 if(!e.hasAttribute(QUALIFIER)) {
865 title = FedoraCommons.getValue(e);
866 break;
867 }
868 else if(e.getAttribute(QUALIFIER).equals(lang)) {
869 title = FedoraCommons.getValue(e);
870 break;
871 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
872 englishName = FedoraCommons.getValue(e);
873 }
874 }
875 }
876 }
877
878 // if the title is still not set to that of the requested language,
879 // then try setting it to the collection name in English. If English
880 // isn't available, then set it to the first collection name provided
881 // (in whichever language).
882 if(title == null) {
883 title = englishName.equals("") ? firstName : englishName;
884 }
885 doc = null;
886 return title;
887 }
888
889 /** @return the collection titles for all the collections indicated by
890 * collPIDs.
891 * @param collPIDs - a list of pids identifying greenstone collections
892 * stored in the fedora repository. */
893 public String[] getCollectionTitles(String[] collPIDs)
894 throws RemoteException, UnsupportedEncodingException,
895 SAXException, IOException
896 {
897 String[] titles = new String[collPIDs.length];
898
899 // parse each EX datastream (XML) which contains the gs3-extracted meta.
900 for(int i = 0; i < collPIDs.length; i++) {
901 titles[i] = getCollectionTitle(collPIDs[i]);
902 }
903 return titles;
904 }
905
906 /** @return the title metadata for the given doc objects of a collection.
907 * These titles are returned in the same order as the given docIDs.
908 * (The docPIDs already contain the collection name anyway.)
909 * @param docPIDs - a list of pids identifying documents stored in the
910 * fedora repository. */
911 public String[] getDocTitles(String[] docPIDs)
912 throws RemoteException, UnsupportedEncodingException,
913 SAXException, IOException
914 {
915 String[] titles = new String[docPIDs.length];
916 for(int i = 0; i < docPIDs.length; i++) {
917 titles[i] = getDocTitle(docPIDs[i]);
918 }
919 return titles;
920 }
921
922 /** Gets the title metadata for a particular doc object in a collection
923 * denoted by docPID. The docPID already contains the collection name.
924 * @return the title for the fedora document item denoted by docPID
925 * @param docPID is the pid of the document in the fedora repository
926 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
927 public String getDocTitle(String docPID)
928 throws RemoteException, UnsupportedEncodingException,
929 SAXException, IOException
930 {
931 // We need the extracted metadata file, and find its
932 // documentElement's child
933 // <ex:metadata name="Title">sometitle</ex:metadata>
934 // where the title we return is sometitle
935
936 String title = "";
937 MIMETypedStream exdata
938 = APIA.getDatastreamDissemination(docPID, EX, null);
939 String exStream = new String(exdata.getStream(), UTF8);
940 return getTitle(exStream);
941 }
942
943 /** Given a string representation of a document's or document section's
944 * EX datastream -- which is a greenstone extracted metadata XML file --
945 * of the form:
946 * &lt;ex&gt;
947 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
948 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
949 * ...
950 * &lt;/ex&gt;
951 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
952 * returns the value embedded in that element ('sometitle' in
953 * the example above).
954 * @return the title metadata of the document/document section whose EX
955 * datastream is passed as parameter
956 * @param exStream the EX datastream in String form of the document or
957 * document section. */
958 protected String getTitle(String exStream)
959 throws SAXException, IOException
960 {
961 String title = "";
962 InputSource source = new InputSource(new StringReader(exStream));
963 Document doc = builder.parse(source);
964 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
965 NodeList children = docEl.getChildNodes();
966
967 // Cycle through all the *element* children of <ex:ex></ex:ex>
968 // which are all of the form:
969 // <ex:metadata name="somename">somevalue</ex:metadata>
970 // Find the one where name="Title", its value is the title
971 for(int i = 0; i < children.getLength(); i++ ) {
972 Node n = children.item(i);
973 if(n.getNodeType() == Node.ELEMENT_NODE) {
974 Element e = (Element)n;
975 if(e.hasAttribute(NAME)
976 && e.getAttribute(NAME).equals(TITLE)) {
977 title = FedoraCommons.getValue(e);
978 break;
979 }
980 }
981 }
982 return title;
983 }
984
985 /** @return the title metadata for the given document sections.
986 * These titles are returned in the same order as the given docPIDs
987 * and associated sectionIDs.
988 * (The docPIDs already contain the collection name anyway.)
989 * @param docPIDs - a list of pids identifying documents stored in the
990 * fedora repository.
991 * @param sectionIDs - a list of sectionIDs identifying individual sections
992 * of documents stored in the fedora repository whose titles are requested. */
993 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
994 throws RemoteException, UnsupportedEncodingException,
995 SAXException, IOException
996 {
997 String[] titles = new String[docPIDs.length];
998 for(int i = 0; i < docPIDs.length; i++) {
999 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1000 }
1001 return titles;
1002 }
1003
1004 /** @return the title metadata for the given document section.
1005 * (The docPID already contain the collection name anyway.)
1006 * @param docPID - a pid identifying a document in the fedora repository.
1007 * @param sectionID - the sectionID of the section of the
1008 * document whose title is requested. */
1009 public String getSectionTitle(String docPID, String sectionID)
1010 throws UnsupportedEncodingException, RemoteException,
1011 SAXException, IOException
1012 {
1013 String ex = this.getSectionEXMetadata(docPID, sectionID);
1014 return getTitle(ex);
1015 }
1016
1017 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1018 * returns the PIDs of the data objects found, with the exception of
1019 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1020 * collection PID.
1021 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1022 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1023 * the collection and not an object of the same collection) are returned.
1024 * All pids that do not map to a collection are assumed to be documents!
1025 * @return a list of the pids of all the (doc) objects in a collection.
1026 * @param colPID is the pid of the greenstone collection stored in
1027 * the fedora repository. */
1028 public String[] getCollectionDocs(String colPID)
1029 throws RemoteException
1030 {
1031 String colName = getCollectionName(colPID);
1032 //LOG.debug("colName: " + colName);
1033
1034 // Search fedora objects for pid=greenstone:<colName>-*
1035 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1036 // searches for "greenstone:"+colName+"-*";
1037 FieldSearchQuery query = new FieldSearchQuery();
1038 query.setTerms(queryStr);
1039 query.setConditions(null);
1040 String[] pids = null;
1041
1042 FieldSearchResult objects = AutoFinder.findObjects(
1043 APIA, new String[]{"pid", "title"}, maxresults, query);
1044 ObjectFields[] results = objects.getResultList();
1045
1046 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1047 // that's not a document object:
1048 pids = new String[results.length-1]; // not storing collection object
1049 int index = 0; // keeps track of docPid index
1050 for(int i = 0; i < results.length; i++) {
1051 // check it's not a collection object
1052 if(!results[i].getPid().endsWith(_COLLECTION)) {
1053 pids[index] = results[i].getPid();
1054 index++;
1055 }
1056 }
1057
1058 return pids;
1059 }
1060
1061 /** Given the pid of a document fedora data object, this method will return
1062 * all itemIDs that are part of that data object and are Sections. For further
1063 * information see interface Comparable (implemented by String), SortedSet
1064 * and TreeSet.
1065 * @return an array of itemIDs of the Sections of the document,
1066 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1067 * @param docPID is a fedora pid identifying a greenstone document object.
1068 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1069 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1070 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1071 */
1072 public String[] getSectionNames(String docPID) throws RemoteException {
1073 // DatastreamDef[] listDatastreams(
1074 // java.lang.String pid, java.lang.String asOfDateTime)
1075
1076 // listDatastreams returns information on each item (including itemID=dsID)
1077 // in the document object indicated by docPID
1078
1079 // Need to give an object version number, because null for asOfDateTime
1080 // does not return any datastreams!
1081 String[] times = APIA.getObjectHistory(docPID);
1082
1083 DatastreamDef[] datastreams = APIA.listDatastreams(
1084 docPID, times[times.length-1]);
1085
1086 // TreeSet is a SortedSet. We're going to put Strings into it,
1087 // and Strings implement interface Comparable already.
1088 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1089 for(int i = 0; i < datastreams.length; i++) {
1090 String itemID = datastreams[i].getID();
1091 if (itemID.startsWith("SECTION"))
1092 orderedList.add(itemID);
1093 }
1094
1095 String[] sectionNames = new String[orderedList.size()];
1096 orderedList.toArray(sectionNames);
1097 orderedList = null;
1098 return sectionNames;
1099 }
1100
1101 /** Given the pid of a document fedora data object, this method will return all
1102 * itemIDs that are part of that data object and are Sections, but just the
1103 * Section numbers are returned. For further information see interface Comparable
1104 * (implemented by String), SortedSet and TreeSet.
1105 * @return an array of itemIDs of the Section numbers of the document
1106 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1107 * @param docPID is a fedora pid identifying a greenstone document object.
1108 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1109 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1110 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1111 */
1112 public String[] getSectionNumbers(String docPID) throws RemoteException {
1113 String[] times = APIA.getObjectHistory(docPID);
1114
1115 DatastreamDef[] datastreams
1116 = APIA.listDatastreams(docPID, times[times.length-1]);
1117 //Vector v = new Vector(datastreams.length);
1118 TreeSet orderedList = new TreeSet();
1119
1120 for(int i = 0; i < datastreams.length; i++) {
1121 String itemID = datastreams[i].getID();
1122 if (itemID.startsWith("SECTION")) {
1123 //int index = SECTION.length();
1124 //itemID = itemID.substring(index);
1125 itemID = removePrefix(itemID, SECTION);
1126 orderedList.add(itemID);
1127 }
1128 }
1129
1130 String[] sectionNumbers = new String[orderedList.size()];
1131 orderedList.toArray(sectionNumbers);
1132 orderedList = null;
1133
1134 return sectionNumbers;
1135 }
1136
1137 /** @return the titles for the document sections denoted by the parameters.
1138 * @param docPID is a fedora pid identifying a greenstone document object.
1139 * @param sectionIDs is a list of identifiers identifying sections in the
1140 * document denoted by docPID, whose titles need to be returned. Each
1141 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1142 * or a section number (eg. 1.5.1). */
1143 public String[] getTitles(String docPID, String[] sectionIDs)
1144 throws RemoteException, UnsupportedEncodingException,
1145 SAXException, IOException
1146 {
1147 String[] titles = new String[sectionIDs.length];
1148 for(int i = 0; i < titles.length; i++)
1149 titles[i] = getTitle(docPID, sectionIDs[i]);
1150 return titles;
1151 }
1152
1153 /** @return the title for the document section denoted by the parameters.
1154 * @param docPID is a fedora pid identifying a greenstone document object.
1155 * @param sectionID identifies the particular section in the document denoted
1156 * by docPID, whose title needs to be returned. The sectionID may be either a
1157 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1158 public String getTitle(String docPID, String sectionID)
1159 throws RemoteException, UnsupportedEncodingException,
1160 SAXException, IOException
1161 {
1162 // Compose the itemID for the EX data stream from the number in the
1163 // sectionID:
1164 String exID = removePrefix(sectionID, SECTION);
1165 exID = EX+convertToMetaNumber(exID);
1166
1167 // Retrieve the extracted metadata stream (EX, in XML) for the given
1168 // section
1169 String exStream = getItem(docPID, exID);
1170
1171 // Extract the title from the XML, look for:
1172 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1173 InputSource source = new InputSource(new StringReader(exStream));
1174 Document doc = builder.parse(source);
1175 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1176 NodeList children = docEl.getElementsByTagName(
1177 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1178 for(int i = 0; i < children.getLength(); i++) {
1179 Element e = (Element)children.item(i);
1180 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1181 return FedoraCommons.getValue(e); // extract and return the title
1182 }
1183 return ""; // if we got here, then we couldn't find a title
1184 }
1185
1186 /** @return the section's XML (as a String) as it is stored in fedora.
1187 * Works out if sectionID is a sectionName or sectionNumber.
1188 * @param docPID - a fedora pid identifying a greenstone document object.
1189 * @param sectionID - identifyies the particular section in the
1190 * document denoted by docPID, may be a section name or number. */
1191 public String getSection(String docPID, String sectionID)
1192 throws RemoteException, UnsupportedEncodingException
1193 {
1194 if(!sectionID.startsWith(SECTION)) // then it has only section number
1195 sectionID = SECTION+sectionID;
1196
1197 String sectionXML = this.getItem(docPID, sectionID);
1198 return sectionXML;
1199 }
1200
1201 /** @return the required section's DC metadata XML datastream.
1202 * @param docPID - a fedora pid identifying a greenstone document object.
1203 * @param sectionID - identifyies the particular section in the
1204 * document denoted by docPID, may be a section name or number. */
1205 public String getSectionDCMetadata(String docPID, String sectionID)
1206 throws RemoteException, UnsupportedEncodingException
1207 {
1208 String dcID = removePrefix(sectionID, SECTION);
1209 // ensure we have just the section number
1210 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1211
1212 // now get the DC datastream for that number
1213 String dcXML = this.getItem(docPID, dcID);
1214 return dcXML;
1215 }
1216
1217 /** Returns the section EX metadata XML datastream for SectionID which may be
1218 * a section name or number. Currently a few EX files are named awkwardly:
1219 * the EX file for section 1.* is actually associated with datastream EX.*.
1220 * But subsequent EX datastreams are named appropriately: for instance,
1221 * EX2.1.1 matches with section 2.1.1
1222 * @return the required section's EX metadata XML datastream.
1223 * @param docPID - a fedora pid identifying a greenstone document object.
1224 * @param sectionID - identifyies the particular section in the
1225 * document denoted by docPID, may be a section name or number. */
1226 public String getSectionEXMetadata(String docPID, String sectionID)
1227 throws RemoteException, UnsupportedEncodingException
1228 {
1229 String exID = removePrefix(sectionID, SECTION);
1230 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1231
1232 // now get the EX datastream for that for number
1233 String exXML = this.getItem(docPID, exID);
1234 return exXML;
1235 }
1236
1237 /** Given a documentNode element, adds the nodetype attribute to all of its
1238 * docNode descendants. The nodetype is either Root, Internal or Leaf to indicate
1239 * whether the docnode is a toplevel document Node, or has children or has none.
1240 * @param e - the documentNode element whose descendants' nodetypes will be set
1241 * at method's end. */
1242 protected void addNodeTypeToDescendants(Element e) {
1243 NodeList sections = e.getElementsByTagName(SECTION_ELEMENT);
1244 for(int i = 0; i < sections.getLength(); i++) {
1245 Element section = (Element)sections.item(i);
1246 NodeList descendants = section.getElementsByTagName(SECTION_ELEMENT);
1247 if(descendants.getLength() > 0) {
1248 // if there are any descendants (which includes children) that are SECTIONS
1249 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
1250 } else {
1251 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
1252 }
1253 }
1254 }
1255
1256
1257 /** @return the part of the TOC XML file (which outlines doc structure)
1258 * relating to the given section. This includes the section denoted by
1259 * sectionID as well as all descendent subsections thereof.
1260 * @param docPID - a fedora pid identifying a greenstone document object.
1261 * @param sectionID - identifyies the particular section in the
1262 * document denoted by docPID, may be a section name or number.
1263 * @param structure can contain any combination of: ancestors, parent,
1264 * siblings, children, descendants, entire, specifying the portion of
1265 * the structure to retrieve.
1266 * @param info can contain any combination of: siblingPosition, numSiblings,
1267 * numChildren, requesting additional information about the structure. */
1268 public Element getSectionStructureXML(String docPID, String sectionID, String structure, String info)
1269 throws RemoteException, UnsupportedEncodingException, SAXException, IOException
1270 {
1271 // get the TableOfContents (TOC) XML datastream as a String
1272 String xmlTOC = getTOC(docPID);
1273
1274 // convert it into a DOM document
1275 InputSource source = new InputSource(new StringReader(xmlTOC));
1276 Document doc = builder.parse(source);
1277 // toplevel element docEl = <Section id="1"></Section>
1278 Element docEl = doc.getDocumentElement();
1279 addNodeTypeToDescendants(docEl);
1280 docEl.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
1281
1282 if(structure.indexOf("entire") != -1) { // don't need to find the specific section, doc root is what's required
1283 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1284 return docEl;
1285 }
1286
1287 if(sectionID.equals("")) {
1288 sectionID = "1";
1289 }
1290
1291 // Store just the number
1292 String sectionNumber = removePrefix(sectionID, SECTION);
1293 // Check whether we're requested to return the toplevel element itself
1294 // If sectionNumber=1, then the top-level element/document element
1295 // of the TOC XML is requested, so return the TOC as is.
1296 if(sectionNumber.equals("1") && structure.indexOf("descendants") != -1) {
1297 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1298 return docEl;
1299 }
1300
1301 // if the root is the section required, return that
1302 if(docEl.getTagName().equals(SECTION_ELEMENT)
1303 && docEl.getAttribute(ID).equals(sectionNumber)) {
1304 Element substructure = getSubstructure(docEl, structure);
1305 return getStructureInfo(substructure.getOwnerDocument(), docEl, info);
1306 //return docEl;
1307 }
1308
1309
1310 // Else, get all <Section> elements and find the
1311 // <Section id="sectionNumber"></Section> and return that
1312 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1313 for(int i = 0; i < sections.getLength(); i++) {
1314
1315 Element e = (Element)sections.item(i);
1316 if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) {
1317 Element substructure = getSubstructure(e, structure);
1318 return getStructureInfo(substructure.getOwnerDocument(), e, info);
1319 }
1320 }
1321
1322 return null; // not found
1323 }
1324
1325
1326 /** Implements browsing document titles of a greenstone collection stored in
1327 * the fedora repository by letter.
1328 * @return the document pids whose titles start with the given letter.
1329 * @param collName - the name of the collection.
1330 * @param letter - the starting letter to browse by.
1331 */
1332 public String[] browseTitlesByLetter(final String collName, final String letter)
1333 throws RemoteException, FedoraVersionNotSupportedException
1334 {
1335 String[] pids = null;
1336
1337 // We want to do the following kind of search (assuming letter=f
1338 // and collName=demo):
1339 // pid~greenstone:demo* title~f*
1340
1341 // We don't need to normalise the letter first (to search titles starting
1342 // with both uppercase and lowercase versions of the letter), because
1343 // Fedora always searches for both.
1344 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1345 // ANYWHERE in their titles!
1346 // SOLUTION: search the collection for all titles containing f as given,
1347 // retrieving pid and title fields. Then from the list of results, select
1348 // only those titles that start with the given letter.
1349 // This may seem an unnecessarily cumbersome job (when it looked like it
1350 // should have worked with just title~f*), BUT, at least the resulting
1351 // documents will be reduced to a set of titles containing f; rather than
1352 // having to search *all* documents in the collection.
1353 final String title = letter+WILDCARD;
1354
1355 FieldSearchResult objects = findObjectsWithTitlesContaining(
1356 collName, title);
1357 ObjectFields[] results = objects.getResultList();
1358 TreeSet v = new TreeSet(); // TreeSet to return the results in
1359 //alphabetical order
1360 for(int i = 0; i < results.length; i++) {
1361 // from the result list, select those titles that don't
1362 // just *contain* the letter, but actually start with it:
1363 String resultTitle = results[i].getTitle(0);
1364 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1365 String pid = results[i].getPid();
1366 // skip the collection object itself
1367 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1368 v.add(pid);
1369 //LOG.debug(resultTitle);
1370 }
1371 }
1372 }
1373 pids = new String[v.size()];
1374 v.toArray(pids);
1375 return pids;
1376 }
1377
1378 /** Implements querying document DC titles of a greenstone collection stored in
1379 * the fedora repository for a term that may occur anywhere in their titles.
1380 * @return the document pids whose DC titles contain the parameter term.
1381 * @param titleContents - the word or phrase to search the collection's
1382 * document titles for. Only one word, and this method finds Greenstone
1383 * DOCUMENT titles CONTAINING that word (if any).
1384 * @param startsWith - if true, searches for titles that start with
1385 * titleContents. Else it searches for titles that contain titleContents. */
1386 public String[] searchDocumentTitles(String collName, String titleContents,
1387 boolean startsWith)
1388 throws RemoteException, FedoraVersionNotSupportedException
1389 {
1390 String[] pids = null;
1391
1392 // We want to do the following kind of search (when written in Fedora's
1393 // REST format - see http://localhost:8080/fedora/search):
1394 // pid~greenstone:<colname>-* title~<1st word of titleContents>
1395
1396 // We don't need to normalise the word first (to search titles starting
1397 // with both uppercase and lowercase versions of it), because
1398 // Fedora always searches for the normalised word.
1399
1400 // 2 difficulties:
1401 // - We can only search for single words with Fedora's Conditional Search.
1402 // Obtain pids and titles of documents containing the first word and then
1403 // we filter the titles to those containing the entire phrase of
1404 // titleContents.
1405 // - Searching for title~FirstWord returns all documents containing
1406 // this word ANYWHERE in their titles. If parameter startsWith is false,
1407 // then this is fine. But if parameter startsWith is true, then go
1408 // through all the resulting titles found (containing FirstWord), select
1409 // only pids of those titles that contain the entire phrase titleContents
1410
1411 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1412
1413 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1414 // if titleContents is a phrase (contains space), then it's not
1415 // a single word, in which case search for just the first word
1416 String title = titleContents; // assume it's a single word
1417 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1418 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1419
1420 FieldSearchResult objects = findObjectsWithTitlesContaining(
1421 collName, title);
1422 if(objects == null) {
1423 final String[] empty = {};
1424 return empty;
1425 }
1426
1427 // Go through all the titles found and for those that match the criteria*,
1428 // store their pid. *Criteria: titles that start with OR contain the
1429 // word OR phrase of titleContents.
1430 ObjectFields[] results = objects.getResultList();
1431 Vector v = new Vector(); // return pids in the order found
1432 for(int i = 0; i < results.length; i++) {
1433 // from the result list, select those titles that don't
1434 // just *contain* the first word, but the entire phrase of
1435 // words in titleContents:
1436 String resultTitle = results[i].getTitle(0);
1437 boolean accepted = false; // accept the resultTitle found
1438
1439 String resultPID = results[i].getPid();
1440 // skip the collection object itself, since it's not a document
1441 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1442 accepted = false;
1443 }
1444 // if titleContents is a single word and we are checking
1445 // whether resultTitle contains titleContents:
1446 else if(indexOfFirstSpace == -1) { // titleContents is a single word
1447 if(!startsWith) // titles that *contain* the word titleContents
1448 accepted = true; //accept all titles found
1449 // else startWith: accept titles starting with word titleContents
1450 else if (resultTitle.toLowerCase().startsWith(
1451 titleContents.toLowerCase()))
1452 accepted = true;
1453
1454 }
1455 else { // otherwise, titleContents is a phrase of >1 word, need
1456 // to check that the result title contains the entire phrase
1457 if(startsWith && resultTitle.toLowerCase().startsWith(
1458 titleContents.toLowerCase()))
1459 accepted = true;
1460 else if(!startsWith && resultTitle.toLowerCase().contains(
1461 titleContents.toLowerCase()))
1462 accepted = true;
1463 }
1464
1465 // if the resultTitle fit the criteria, store its pid
1466 if(accepted) {
1467 v.add(resultPID);
1468 //System.out.println(resultTitle);
1469 }
1470
1471 }
1472 pids = new String[v.size()];
1473 v.toArray(pids);
1474 return pids;
1475 }
1476
1477
1478 /**
1479 * @param collName - the collection of documents we'll be searching in.
1480 * @param titleWord - the word we'll be searching the document titles for.
1481 * (Fedora's search returns all objects whose title contains that word).
1482 *
1483 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1484 * (see link):
1485 * <pre>
1486 * "There are two search methods: a search on all fields or a search on
1487 * specific fields. To search all fields the setTerms function of the
1488 * FieldSearchQuery must be used, with the paramater being the desired string.
1489 *
1490 * To search by specific fields, you must create an array of Condition
1491 * objects. Each condition consists of three parts:
1492 * the field to be searched (.setProperty()),
1493 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1494 * and the search string (.setValue())"
1495 * </pre>
1496 * We want to use the second search method above when browsing and searching,
1497 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1498 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1499 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1500 *
1501 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1502 * web services are defined. (The web.xml defines the "Servlets for REST-based
1503 * interfaces to the Fedora Repository Server").
1504 * Do a search on the word "search":
1505 * fedora.server.access.FieldSearchServlet is the class we need to look at
1506 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1507 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1508 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1509 * is what's used in the fedora client application that makes use of
1510 * the SOAP-based interface.
1511 *
1512 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1513 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1514 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1515 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1516 */
1517 protected FieldSearchResult findObjectsWithTitlesContaining(
1518 String collName, final String titleWord)
1519 throws RemoteException, FedoraVersionNotSupportedException
1520 {
1521 // Searching for pids of the form "greenstone:gs2mgdemo-*";
1522 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1523
1524 Condition[] conditions = new Condition[2];
1525 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1526 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1527
1528 FieldSearchQuery query = new FieldSearchQuery();
1529 query.setConditions(conditions);
1530
1531 // We'd like pid and title returned for each object, because we'll make
1532 // use of title. We pass maxResults=null to get all objects that match
1533 // (i.e. all collections).
1534 FieldSearchResult objects = null;
1535 final String[] retrieveFields = {"pid", "title"};
1536 try {
1537 objects = AutoFinder.findObjects(
1538 APIA, retrieveFields, maxresults, query);
1539 // collection = APIA.findObjects(new String[]{"pid", "title"},
1540 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1541 } catch(RemoteException ex) {
1542 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1543 // fedoraVersion is too low, searching/browsing is not possible
1544 // (because class Condition has changed after 2.0, from 2.1.1
1545 // onwards)
1546 throw new FedoraVersionNotSupportedException(fedoraVersion);
1547 } else {
1548 LOG.error(
1549 "Remote exception when calling web service operation " +
1550 "findObject() to execute search:\n" + ex.getMessage());
1551 ex.printStackTrace();
1552 throw ex;
1553 }
1554 }
1555 return objects; // return the FieldSearchResult objects found
1556 }
1557
1558 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1559 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1560 * @param docPID - pid of a greenstone document in the fedora repository. */
1561 public String getDocName(String docPID) {
1562 return docPID.substring(docPID.indexOf('-')+1);
1563 }
1564
1565 /** @return the &lt;name&gt; in the parameter collPID
1566 * (greenstone:&lt;name&gt;-collection)
1567 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1568 * in the docPID (greenstone:&lt;name&gt;-docID).
1569 * @param collPID - pid of a greenstone collection in the fedora repository. */
1570 public String getCollectionName(String collPID) {
1571 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1572 }
1573
1574
1575 /** Return the TOC substructure requested
1576 * @return an element containing a copy if element e with either only its child
1577 * elements or with all its descendants and/or its ancestors or only its parent
1578 * and/or its siblings (depending on what the parameter structure specifies).
1579 * @param e - the element to start copying from and whose structure is requested.
1580 * @param structure - a string containing any combination of the values:
1581 * ancestors, parent, siblings, children, descendants,
1582 * specifying the portion of the structure to retrieve.
1583 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1584 */
1585 protected Element getSubstructure(Element original, String structure)
1586 {
1587 Document doc = builder.newDocument();
1588
1589 boolean descendants = (structure.indexOf("descendants") != -1) ? true : false;
1590 Node current = doc.importNode(original, descendants);
1591
1592 // descendants=true: import/copy descendants.
1593 // Else, copy just current node original (later copy its direct children)
1594
1595 Node parentOfCurrent = null;
1596 Node parentOfOriginal = original.getParentNode();
1597 if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot)
1598 parentOfOriginal = null;
1599 }
1600
1601 if(parentOfOriginal == null) { // no parentNode, so current is the root node.
1602 // can't get ancestors/parent/siblings, since all these need parentNode
1603 doc.appendChild(current);
1604 } else { // siblings, ancestors and parent requests all require parent node to exist
1605 // First check if we need to get ancestors, else for whether parent is required
1606 if(structure.indexOf("ancestors") != -1) {
1607 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1608
1609 Node child = null;
1610 Node parent = parentOfCurrent; // the copy
1611 Node n = parentOfOriginal.getParentNode(); // the doc to copy from
1612
1613 while(n != null && n != original.getOwnerDocument()) {
1614 child = parent;
1615 parent = doc.importNode(n, false); // no descendants
1616 parent.appendChild(child);
1617 n = n.getParentNode();
1618 }
1619
1620 doc.appendChild(parent); // need to put the copied node into a document
1621 // else it won't have a parent doc (DOMSource can't work with it
1622 // without it having a document parent).
1623
1624 } else if(structure.indexOf("parent") != -1) {
1625 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1626 //parentOfCurrent.appendChild(current);
1627 doc.appendChild(parentOfCurrent);
1628 }
1629
1630 // a request for siblings is independently tested for
1631 if(structure.indexOf("siblings") != -1) {
1632 // only import parent if we didn't already import
1633 // it for a request for ancestors or parent
1634 if(parentOfCurrent == null) {
1635 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1636 doc.appendChild(parentOfCurrent); // this becomes the root
1637 }
1638 // now the siblings of current (children of parentOfCurrent)
1639 NodeList children = parentOfOriginal.getChildNodes();
1640 for(int i = 0; i < children.getLength(); i++) {
1641 Node n = children.item(i);
1642
1643 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1644 if((Element)n != original) { // skip original which was already imported
1645 Node child = doc.importNode(n, false); // no descendants
1646 parentOfCurrent.appendChild(child);
1647 } else { // already imported Current element, insert at this position
1648 parentOfCurrent.appendChild(current);
1649 }
1650
1651 }
1652 }
1653 } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests
1654 // (sibling request adds the current node into a particular position)
1655 parentOfCurrent.appendChild(current);
1656 // need to put the copied node into a document
1657 // else it won't have a parent doc (DOMSource can't work with it
1658 // without it having a document parent).
1659 } else { // when only children or descendants were requested, current becomes root document
1660 doc.appendChild(current);
1661 }
1662 }
1663
1664 // if we are not recursively copying all descendants, then copy just
1665 // the childnodes of current:
1666 if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children
1667
1668 // get e's children and copy them into the new document
1669 NodeList children = original.getChildNodes();
1670 for(int i = 0; i < children.getLength(); i++) {
1671 // create copy
1672 Node n = doc.importNode(children.item(i), false);
1673 // attach it to parent
1674 current.appendChild(n);
1675
1676 // Now we need to indicate whether this new node (child) is a leaf
1677 // or not. (This is necessary for getChildrenOfSection(), else
1678 // it's hard to know if the children are leaves or have further
1679 // subsections.
1680 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1681 // we're dealing only with section children
1682
1683 // Check if the matching original had children:
1684 Element originalsChild = (Element)children.item(i);
1685 NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT);
1686 if(grandchildren.getLength() > 0) {
1687 // original's child has children, so indicate this
1688 // in the copied child:
1689 Element child = (Element)current;
1690 // child.setAttribute(TYPE, INTERNAL_NODE);
1691
1692 }
1693 }
1694 }
1695 }
1696
1697 return doc.getDocumentElement();
1698 }
1699
1700
1701 /** Return the TOC substructure with the requested structural info.
1702 * @return an element containing a copy if element e with either only its child
1703 * elements or with all its descendants and/or its ancestors or only its parent
1704 * and/or its siblings (depending on what the parameter structure specifies).
1705 * Returns null if the element, e, passed in is null.
1706 * @param doc - the new document into whose root element the structural information
1707 * will be inserted as attributes.
1708 * @param e - the element to start copying from and whose structure is requested.
1709 * @param info - a string containing any combination of the values: numChildren,
1710 * numSiblings, siblingPosition. The requested info gets added as attributes to
1711 * the returned root element.
1712 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1713 */
1714 protected Element getStructureInfo(Document doc, Element e, String info)
1715 {
1716 if(e == null) {
1717 return null;
1718 }
1719
1720 Element root = doc.getDocumentElement();
1721
1722 if(!info.equals("")) {
1723 if(info.indexOf("numChildren") != -1) {
1724 //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength();
1725 int numChildren = 0;
1726
1727 NodeList children = e.getChildNodes();
1728 for(int i = 0; i < children.getLength(); i++) {
1729 Node n = children.item(i);
1730 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1731 numChildren++;
1732 }
1733 }
1734
1735 root.setAttribute("numChildren", Integer.toString(numChildren));
1736 }
1737
1738 if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings
1739 int numSiblings = 0;
1740 int siblingPosition = 0;
1741
1742 Node parent = e.getParentNode();
1743 if(parent == null) {
1744 numSiblings = 0;
1745 siblingPosition = 1;
1746 } else {
1747 //numSiblings = parent.getChildNodes().getLength();
1748 NodeList siblings = parent.getChildNodes();
1749
1750 for(int i = 0; i < siblings.getLength(); i++) {
1751 Node n = siblings.item(i);
1752 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1753 if(e == (Element)n) {
1754 siblingPosition = numSiblings+1;
1755 } else { // count every sibling section element, except e itself
1756 numSiblings++;
1757 }
1758 }
1759 }
1760 }
1761
1762 if(info.indexOf("numSiblings") != -1) {
1763 root.setAttribute("numSiblings", Integer.toString(numSiblings));
1764 }
1765
1766 if(info.indexOf("siblingPosition") != -1) {
1767 root.setAttribute("siblingPosition", Integer.toString(siblingPosition));
1768 }
1769 }
1770 }
1771
1772 return root;
1773 }
1774
1775
1776 /**
1777 * Return a datastream of a document, given the document's id
1778 * and the item id of the datastream which is to be retrieved.
1779 * @return the XML (in String form) of the item denoted by itemID
1780 * that's part of the fedora data object denoted by docPID.
1781 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1782 * Can't retrieve images denoted by itemID using this method, only items
1783 * that are of XML format.
1784 * @param docPID - pid of a greenstone document in the fedora repository.
1785 * @param itemID - the itemID of a datastream of the fedora object
1786 * identified by docPID.
1787 */
1788 protected String getItem(String docPID, String itemID)
1789 throws RemoteException, UnsupportedEncodingException
1790 {
1791 // MIMETypedStream getDatastreamDissemination(
1792 // String pid, String dsID, asOfDateTime)
1793 MIMETypedStream datastream
1794 = APIA.getDatastreamDissemination(docPID, itemID, null);
1795 return new String(datastream.getStream(), UTF8);
1796 }
1797
1798 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1799 * returns "1.2.1".
1800 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1801 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1802 * However, the string str is returned unchanged if the prefix does not occur
1803 * at the start of str.
1804 * @return the String parameter str without the prefix.
1805 * It can be used to return the number of an itemID of a greenstone document
1806 * stored in the fedora repository without the given prefix.
1807 * @param prefix - the prefix which ought to be removed from the itemID.
1808 * @param str - the value of the itemID.
1809 */
1810 protected String removePrefix(String str, String prefix) {
1811 // do nothing in those cases where the prefix is not in param str
1812 if(!str.startsWith(prefix))
1813 return str;
1814 // otherwise:
1815 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1816 return "1" + str.substring(prefix.length());
1817 } else {
1818 return str.substring(prefix.length());
1819 }
1820 }
1821
1822 /** Given a number of the form x(.y.z), this method returns this number
1823 * as is, except when x = 1, in which case, it would return .y.z
1824 * That is, given number=3.2.1, this method would return 3.2.1
1825 * But, given number=1.2.3, this method would return .2.3.
1826 * When number=1, it is NOT a special case: "" is returned as explained.
1827 * @param number - a proper (fedora-greenstone document) section number
1828 * @return the same number as it ought to be for the associated EX, DC datastreama.
1829 */
1830 protected String convertToMetaNumber(String number) {
1831 if(number.startsWith("1.") || number.equals("1"))
1832 return number.substring(1); // remove the first char: the initial '1'
1833 else return number;
1834 }
1835
1836 /** @return fedora's baseURL. It's of the form
1837 * "http://localhost:8080/fedora" */
1838 public String getBaseURL() { return baseURL; }
1839
1840 /** @return the portAddressURL (in use) of the Fedora APIA
1841 * web service (should be the endpoint location in the APIA's
1842 * WSDL file).
1843 * It's usually of the form baseURL+"/services/access" */
1844 public String getPortAddressURL() {
1845 return this.baseURL + this.portAddressSuffix;
1846 }
1847
1848 /** @return the baseURL for gsdlAssocFiles */
1849 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1850
1851 public static void main(String args[]) {
1852 try {
1853 FedoraConnection fedoraCon
1854 = new FedoraConnection(new File("fedoraGS3.properties"));
1855
1856 String[] pids = null;
1857 pids = fedoraCon.getCollections();
1858 String[] titles = fedoraCon.getCollectionTitles(pids);
1859 for(int i = 0; i < pids.length; i++) {
1860 System.out.println("extracted title:" + titles[i]);
1861 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1862 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1863 for(int j = 0; j < docPIDs.length; j++) {
1864 System.out.println("\tExtr doc title: " + docTitles[j]);
1865 }
1866 }
1867
1868 String PID = "greenstone:gs2mgdemo-collection";
1869 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1870 String dcXML = fedoraCon.getDC(PID);
1871 String exXML = fedoraCon.getEX(PID);
1872 String tocXML = fedoraCon.getTOC(docPID);
1873 System.out.println("Dublin Core Metadata for " + PID
1874 + " is:\n" + dcXML);
1875 System.out.println("GS3 extracted metadata for " + PID
1876 + " is:\n" + exXML);
1877 System.out.println("Table of Contents for " + docPID
1878 + " is:\n" + tocXML);
1879
1880
1881 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1882 System.out.println("\nSection names for " + docPID + " are:");
1883 for(int i = 0; i < sectionNames.length; i++)
1884 System.out.println(sectionNames[i]);
1885
1886 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1887 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1888 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1889 System.out.println("\nSection numbers for " + docPID + " are:");
1890 for(int i = 0; i < sectionNumbers.length; i++) {
1891 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1892 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1893 }
1894
1895 String sectionID = "SECTION1"; //SECTION1.5
1896 System.out.println("\n");
1897
1898 System.out.println(
1899 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1900 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1901 for(int i = 0; i < pids.length; i++)
1902 System.out.println(pids[i]);
1903
1904 System.out.println(
1905 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1906 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1907 for(int i = 0; i < pids.length; i++)
1908 System.out.println(pids[i]);
1909
1910 System.out.println("\nDone - exiting.");
1911 System.exit(0);
1912 } catch(RemoteException re) {
1913 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1914 re.printStackTrace();
1915 } catch(Exception e) {
1916 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1917 e.printStackTrace();
1918 //LOG.error("Unable to instantiate FedoraConnection\n" + e, e);
1919 }
1920 }
1921}
Note: See TracBrowser for help on using the repository browser.