source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 26171

Last change on this file since 26171 was 26171, checked in by ak19, 12 years ago

Uncommitted changes from ages back to fedoraGS3 classes to get greenstone to work as an interface to fedora repository backend.

File size: 85.5 KB
Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22import org.greenstone.gsdl3.util.GSXML;
23
24import fedora.client.utility.AutoFinder;
25import fedora.server.access.FedoraAPIAServiceLocator;
26// The object for accessing FedoraAPI-A web services:
27import fedora.server.access.FedoraAPIA;
28
29// The definitions for all complex fedora types:
30import fedora.server.types.gen.MIMETypedStream;
31import fedora.server.types.gen.RepositoryInfo;
32import fedora.server.types.gen.FieldSearchResult;
33import fedora.server.types.gen.FieldSearchQuery;
34import fedora.server.types.gen.DatastreamDef;
35import fedora.server.types.gen.ObjectFields;
36import fedora.server.types.gen.Condition;
37import fedora.server.types.gen.ComparisonOperator;
38//import fedora.server.types.gen.*;
39
40import javax.net.ssl.SSLHandshakeException;
41import java.net.ConnectException;
42import org.xml.sax.SAXException;
43import java.io.UnsupportedEncodingException;
44import java.io.IOException;
45import javax.xml.parsers.ParserConfigurationException;
46import java.net.MalformedURLException;
47import java.rmi.RemoteException;
48
49import java.io.StringReader;
50import java.io.FileInputStream;
51import java.io.File;
52import java.util.TreeSet;
53import java.util.Properties;
54import java.util.Vector;
55
56import java.awt.GridLayout;
57import javax.swing.JLabel;
58import javax.swing.JOptionPane;
59import javax.swing.JPanel;
60import javax.swing.JPasswordField;
61import javax.swing.JTextField;
62
63import org.apache.log4j.Logger;
64import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
65import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
66import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
68import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
69
70import javax.xml.parsers.DocumentBuilderFactory;
71import javax.xml.parsers.DocumentBuilder;
72import javax.xml.transform.*;
73
74import org.xml.sax.InputSource;
75import org.w3c.dom.Document;
76import org.w3c.dom.Element;
77import org.w3c.dom.NodeList;
78import org.w3c.dom.Node;
79
80/** Class that establishes a connection with Fedora's web services (via
81 * Java stub classes for the same) and then provides methods to retrieve
82 * Greenstone-specific data, such as the TOC, EX, DC,and Section
83 * datastreams of the Greenstone documents stored in Fedora's repository.
84 * These datastreams are returned as Strings without any changes being
85 * made to them.
86 * @author ak19
87*/
88public class FedoraConnection implements FedoraGS3DL {
89 /** The logging instance for this class */
90 private static final Logger LOG = Logger.getLogger(
91 FedoraConnection.class.getName());
92
93 /** The version of fedora that is supported by class FedoraConnection */
94 protected static final String SUPPORTED_VERSION = "3.3"; //"2.2.1";
95 // 3.3 works with genericSearch version 2.2
96
97 /* Some fixed strings of known literals */
98 protected static final String GET = "/get/";
99
100 // The DemoSOAPClient declares and uses the following as a static member
101 // Probably none of the APIA methods (web service methods) remembers
102 // state, that might explain why we can use it as a static member then.
103 /** The object used to access the Fedora API-A web service methods */
104 protected FedoraAPIA APIA;
105
106 /** Version of the running fedora server */
107 protected String fedoraVersion;
108 /** The location of the fedora server, usually of the form: protocol://host:port/fedora
109 * e.g. (and default) http://localhost:8080/fedora */
110 protected String baseURL;
111
112 /** The user-specified portAddressSuffix of the Fedora Access web services
113 * (endpoint URL in the WSDL), usually of the form
114 * http://localhost:8080/fedora/services/access
115 * Users can tell FedoraGS3 to try accessing that first by setting
116 * the "port.address.suffix" property in the properties file.
117 * FedoraGS3 itself will not write the portAddressSuffix currently used in
118 * the file for next time, but leave whatever value was entered in the
119 * properties file. The portAddress--not just suffix--currently in use (once
120 * the FedoraAPIA handle has been instantiated) can be obtained through
121 * getPortAddressURL() method. */
122 protected String portAddressSuffix;
123
124 /** The part of the portAddress that comes after the baseURL. By default and
125 * usually this is: "/services/access" */
126 protected static final String defaultPortAddressSuffix = "/services/access";
127
128 /** The preferred language of the display content */
129 protected String lang;
130 /** The maximum number of collections to retrieve */
131 protected int maxresults;
132 /** DocumentBuilder used to create and parse XML documents */
133 protected DocumentBuilder builder;
134
135 /** Static method that returns the version of Fedora supported by this
136 * class FedoraConnection. */
137 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
138 /** The version of the running Fedora server, which may or may not
139 * match the supported version. */
140 public String getFedoraVersion() { return fedoraVersion; }
141
142 /** @return the default language used to query for titles (and anything else
143 * where there are multiple language options). Upon initialisation, this
144 * defaults to English. */
145 public String getLanguage() { return lang; }
146
147 /** Sets the the default language used to query for titles (and anything else
148 * where there are multiple language options). If the default language for any
149 * query is not available, then English ("en") is used. If that's not available
150 * then the first other available language is used.
151 * @param lang - the two-letter language code to set the default language to.
152 */
153 public void setLanguage(String lang) { this.lang = lang; }
154
155 /** The default maximum number of search results returned for a search. Upon
156 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
157 public int getMaxResults() { return maxresults; }
158
159 /** Set the default maximum number of search results returned for a search.
160 * @param maxresults - the new default maximum number of search results to
161 * be returned. */
162 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
163
164 /** Code for this constructor is from DemoSOAPClient.java.
165 * Instantiates the APIA handle using the protocol, host, port, fedora
166 * server repository username and password.
167 * @param host - the fedora server host (may be prefixed with http:// or
168 * https:// if parameter protocol is empty). If there's no protocol, and
169 * no protocol prefixed to the host, then the protocol defaults to http.
170 * @param protocol - either http or https (or empty "")
171 * @param port - the port on which fedora is running.
172 * @param fedoraServerUsername - the administrator username required to
173 * access the fedora server's repository. ("fedoraAdmin" unless changed).
174 * @param fedoraServerPassword - the fedora server repository's
175 * administrator password. If none was set on fedora installation, this
176 * can be empty (""). */
177 public FedoraConnection(String protocol, String host, int port,
178 String fedoraServerUsername, String fedoraServerPassword)
179 throws ParserConfigurationException, MalformedURLException,
180 SSLHandshakeException, RemoteException, AuthenticationFailedException,
181 NotAFedoraServerException, ConnectException, Exception
182 {
183 try {
184 this.portAddressSuffix = "";
185 init(protocol, host, Integer.toString(port),
186 fedoraServerUsername, fedoraServerPassword);
187 } /*catch(RemoteException re) { //subclass of IOException
188 throw re;
189 } catch(SSLHandshakeException ssle) { //subclass of IOException
190 // this is also of type IOException
191 throw ssle;
192 }*/ catch(IOException ioe) { // connected to the wrong server
193 String exceptMsg = ioe.getMessage().toLowerCase();
194 if(exceptMsg.indexOf("request failed") != -1
195 || exceptMsg.indexOf("404") != -1)
196 throw new NotAFedoraServerException();
197 else // the IOException is not due the cause we thought it was, so
198 throw ioe; // rethrow whatever other IOException was caught (which
199 // could have been RemoteException or SSLHandshakeException
200 // or some other cause)
201 }
202 }
203
204 /** Default constructor which takes input from the user to get host, port,
205 * fedora username and password.
206 * It keeps looping to display authentication popup, until valid values are
207 * entered:
208 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
209 * This popup keeps appearing until the password and username are correct (as
210 * long as there's indeed a fedora server listening at the given host and port).
211 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
212 * the 'https' protocol to the host string when it should have been 'http';
213 * OR the ssl connection failed for some other reason.
214 * Allowing for the 1st case, the authentication popup is displayed just once
215 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
216 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
217 * it takes a long time for the SSLHandshakeException to be thrown.
218 * (c) if the connection is refused, then a ConnectException is thrown.
219 * In that case, it's
220 * EITHER because the host and port values that were entered are wrong (and
221 * the authentication popup dialog is redisplayed just once more allowing
222 * the user to correct host/port values)
223 * OR the entered host and part were right but the fedora server at this
224 * host and port is not running.
225 * On the second consecutive attempt where a ConnectionException is thrown,
226 * it's no longer processed but rethrown, as there's no use in redisplaying
227 * the authentication popup when the problem is not an authentication issue.
228 * (d) Another IOException (other than the SSLHandshakeException of (b))
229 * occurs when there is indeed a server listening at the host and port
230 * entered, but it's not a Fedora server, because it is unable to process
231 * Fedora requests. If the expected message is found in the exception, than
232 * the authentication popup is displayed. However, other causes for an
233 * IOException are not handled. In such cases, the IOException is rethrown.
234 * (Note that IOException is not in the throws clause - other causes for
235 * it being unknown, it can be be considered as the more generic Exception.
236 */
237 public FedoraConnection()
238 throws ParserConfigurationException, MalformedURLException,
239 CancelledException, ConnectException, RemoteException,
240 SSLHandshakeException, Exception
241 {
242 Properties properties = new Properties();
243 // loop to display fedora server authentication popup to
244 // get user input
245 setInitialisationProperties(properties);
246 properties = null; // finished
247 }
248
249 /** Single argument constructor that takes the name of the properties file
250 * defining the values of the initialisation parameters required to
251 * instantiate a FedoraConnection. These are fedora server username, password,
252 * host and port. If these values are not present in the file, they are set
253 * to "" before showing the initialisation input dialog.
254 * @param propertyFile is the name of the properties file specifying the
255 * values for Fedora server username, password, host and port. */
256 public FedoraConnection(File propertyFile)
257 throws ParserConfigurationException, MalformedURLException,
258 CancelledException, ConnectException, RemoteException,
259 SSLHandshakeException, Exception
260 {
261 Properties properties = new Properties();
262 // Load the properties from the given file
263 try{
264 if(propertyFile.exists()) {
265 properties.load(new FileInputStream(propertyFile));
266 }
267 } catch(Exception e) {
268 // If the file didn't exist or could not be located,
269 // then we just continue by creating empty properties
270 LOG.warn("Exception loading from propertyFile "
271 + propertyFile + ": " + e);
272 }
273
274 // Go through the process of showing the initialisation dialog
275 setInitialisationProperties(properties);
276
277 // Now let's save whatever values the user may have entered into the
278 // input dialog as the default values for next time the dialog shows
279 try {
280 java.io.FileOutputStream out = new java.io.FileOutputStream(
281 propertyFile); // same file as properties loading file
282 // First make sure errormessage gets stored as "" and doesn't
283 // cause problems next time.
284 properties.setProperty("errormessage", "");
285 // Don't save passwords
286 properties.setProperty("password", "");
287 // If the portAddressSuffix is in the file already, then it's
288 // user-specified and we shouldn't change it. But if there is no
289 // such property in the file, then create it and write it to the file
290 // with an empty string value:
291 String portSuffix = properties.getProperty("port.address.suffix");
292 if(portSuffix == null) {
293 properties.setProperty("port.address.suffix", "");
294 }
295
296 properties.store(out, "fedoraGS3 properties"); // write properties
297 // Javadoc states that "The output stream remains open after this
298 // method (Properties.store) returns." So we close it here
299 out.close();
300 } catch(Exception e) {
301 LOG.warn("Exception writing to propertyFile "
302 + propertyFile + ": " + e);
303 }
304 properties = null; // finished
305 }
306
307 /** Method that loops to display the dialog that retrieves the
308 * fedora server initialisation properties from the user. If there
309 * is a property file with values set already, it will display
310 * the previously entered values by loading them from that file.
311 * Otherwise, input fields in the dialog are empty.
312 * @param properties the Properties Hashmap storing values for
313 * username, password, host and port (and any errormessage). */
314 protected void setInitialisationProperties(Properties properties)
315 throws ParserConfigurationException, MalformedURLException,
316 CancelledException, ConnectException, RemoteException,
317 SSLHandshakeException, Exception
318 {
319 // keep looping to display authentication popup, until valid values are
320 // entered (except when a ConnectionRefused Exception is caught - this
321 // needs to be rethrown):
322 boolean authenticated = true;
323 // reset any error messages that may have been stored (should not be
324 // the case, but if there had been any difficulty during storing, it
325 // may not have written out an empty errorMessage)
326 properties.setProperty("errormessage", "");
327 do{
328 // show the Authentication-popup:
329 // By passing the HashMap Properties, user-updated values will
330 // be persistent in the authentication-popup fields (rather than
331 // reset to the default initial values).
332 properties = showAuthenticationPopup(properties);
333 String fedoraServerUsername = properties.getProperty("username", "");
334 String fedoraServerPassword = properties.getProperty("password", "");
335 String host = properties.getProperty("host", "");
336 String port = properties.getProperty("port", "");
337 //String protocol = host.startsWith("http") ? "" : "http://";
338 String protocol = "http://";
339 if(host.startsWith("http") || host.startsWith("https"))
340 protocol = "";
341 // NOTE THAT: if a fedora server at https:// is not accessible,
342 // it takes a long time for the authentication popup to reappear.
343
344 try{
345 this.portAddressSuffix
346 = properties.getProperty("port.address.suffix", "");
347 // Use the FedoraClient utility to get the SOAP stub for APIA.
348 // This SOAP stub enables the client to connect to a Fedora
349 // repository via the API-A web service interface.
350 init(protocol, host, port,
351 fedoraServerUsername, fedoraServerPassword);
352 // will throw Exception if it can't instantiate APIA
353
354 // if no exception thrown in the initialisation statement above,
355 // then we have been authenticated:
356 authenticated = true;
357 } catch(AuthenticationFailedException afe) {
358 authenticated = false;
359 properties.setProperty("errormessage", afe.getMessage());
360 } catch(RemoteException e) { // causes could be various
361 String reason = e.getMessage();
362 if(e.getCause() != null) {
363 // For instance, if a ConnectException indicating
364 // 'Connection Refused' or a java.net.UnknownHostException
365 // caused the RemoteException
366
367 // Strip out prefix "Nested exception is..." from the
368 // encapsulating Exception's message, by using the Cause's
369 // message. Keep Exception classname to give it some context:
370 reason = e.getCause().getClass().getName() + ": "
371 + e.getCause().getMessage();
372 // Give some more information if the connection was refused.
373 // (This can also happen when the Fedora server is not running)
374 if(e.getCause().getClass().equals(ConnectException.class)) {
375 reason += FedoraGS3Exception.connectionRefusedMessage;
376 }
377 }
378 // if the message indicates that a server was running there,
379 // then we tell the user it was not a Fedora server
380 if(reason.toLowerCase().contains("404")
381 || reason.toLowerCase().contains("request failed"))
382 {
383 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
384 }
385 authenticated = false;
386 properties.setProperty("errormessage", reason);
387 } catch(ConnectException e) {
388 properties.setProperty("errormessage",
389 FedoraGS3Exception.connectionRefusedMessage);
390 authenticated = false;
391 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
392 // be handled before IOException, as it's an IOException subclass.
393 authenticated = false;
394 properties.setProperty("errormessage",
395 FedoraGS3Exception.sslHandshakeExceptionMessage);
396 // we won't prefix the host with http for the user, as https
397 // might be right after all, and something else might have gone
398 // during the connection attempt instead.
399 //host = host.replace("https", "http"); //setting it for them
400 //properties.setProperty("host", host);
401 } catch(IOException ioe) { // occurs when we try to connect to a
402 // host/port where some server other than Fedora's is listening
403 // (e.g. if we end up connecting to GS3's host and port).
404 // In that case, we can get exception messages like a 404:
405 // "Unable to instantiate FedoraConnection
406 // java.io.IOException: Request failed [404 /fedora/describe]"
407 // Test this by trying to connect to localhost at 9090 where GS3 is
408 String exceptMsg = ioe.getMessage().toLowerCase();
409 if(exceptMsg.indexOf("request failed") != -1
410 || exceptMsg.indexOf("404") != -1)
411 {
412 properties.setProperty("errormessage",
413 NotAFedoraServerException.MESSAGE
414 + "\n(" + ioe.getMessage() + ")");
415 } else if(exceptMsg.indexOf("401") != -1
416 || exceptMsg.indexOf("500") != -1)
417 {
418 authenticated = false;
419 properties.setProperty("errormessage", ioe.getMessage());
420 } else { // the exception occurred for some other reason, rethrow it
421 throw ioe;
422 }
423 }
424 } while(!authenticated); // will keep showing popup until auhentication
425 // and connection input values are valid
426 }
427
428 /**
429 * Static method that displays a popup to allow the user to provide Fedora
430 * authentication (username, pwd) and connection (protocol+host, port) details.
431 * @param properties is a Properties HashMap where the property Keys which must
432 * have been put in here in advance (even with "" Values if appropriate) are:
433 * <pre>
434 * - username
435 * - password
436 * - host (may - but need not - be prefixed with either of the protocols
437 * "http://" and "https://"
438 * - port
439 * - errorMessage (displayed near the top of the popup dialog). Can be "".
440 * </pre>
441 * The values stored in the properties HashMap for the above property are
442 * initially displayed in the fields and the user can overwrite them.
443 * This is useful in such cases where invalid values were entered and this
444 * popup must be redisplayed to allow the user to correct their previous input.
445 * @return the same HashMap Properties which was passed as parameter. */
446 protected static Properties showAuthenticationPopup(Properties properties)
447 throws CancelledException
448 {
449 // Retrieve all the properties -- defaults to "" if any are null
450 JTextField usernameField = new JTextField(
451 properties.getProperty("username", "fedoraAdmin"));
452 JTextField passwordField = new JPasswordField(
453 properties.getProperty("password", ""));
454 JTextField hostField = new JTextField(
455 properties.getProperty("host", "localhost"));
456 JTextField portField = new JTextField(
457 properties.getProperty("port", "8080"));
458
459 JPanel panel = new JPanel(new GridLayout(4,2));
460 panel.add(new JLabel("User Name"));
461 panel.add(usernameField);
462 panel.add(new JLabel("Password"));
463 panel.add(passwordField);
464 panel.add(new JLabel("Host"));
465 panel.add(hostField);
466 panel.add(new JLabel("Port"));
467 panel.add(portField);
468
469 String heading = "Fedora Server Admin Authentication:";
470 String errorMessage = properties.getProperty("errormessage", "");
471 if(!errorMessage.equals("")) {
472 heading = "=> " + errorMessage + "\n\n" + heading;
473 }
474 int option = JOptionPane.showConfirmDialog(null, new Object[] {
475 heading, panel},
476 "Enter Network Password",
477 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
478
479 if (option == JOptionPane.OK_OPTION) {
480 String fedoraServerUsername = usernameField.getText();
481 String fedoraServerPassword = passwordField.getText();
482 String host = hostField.getText();
483 String port = portField.getText();
484 properties.setProperty("username", fedoraServerUsername);
485 properties.setProperty("password", fedoraServerPassword);
486 properties.setProperty("host", host);
487 properties.setProperty("port", port);
488 } else { // Cancel option
489 throw new CancelledException();
490 }
491 return properties;
492 }
493
494 /** Init method that is called by the constructor to set some
495 * important member variables including instantiating the APIA object
496 * used to invoke the Fedora APIA web service operations.
497 * @param protocol can be http or https
498 * @param host is the name of the Fedora server host
499 * @param port is the port number (String form) of the Fedora server
500 * @param fedoraServerUsername is the user name to access the Fedora
501 * Server
502 * @param fedoraServerPassword is the password needed to access the
503 * Fedora Server
504 */
505 protected void init(String protocol, String host, String port,
506 String fedoraServerUsername, String fedoraServerPassword)
507 throws ParserConfigurationException, MalformedURLException,
508 AuthenticationFailedException, RemoteException, Exception
509 {
510 // initialise member variables
511 lang = ENGLISH;
512 maxresults = Integer.MAX_VALUE;
513 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
514 builder = factory.newDocumentBuilder();
515
516 // (protocol is "" if host already contains protocol)
517 if(!protocol.equals("") && !protocol.endsWith("://"))
518 protocol += "://";
519 // now create baseURL = protocol://host:port/fedora
520 this.baseURL = protocol + host + ":" + port + "/fedora";
521
522 // Get the FedoraAPIA handle to/stub of the Fedora web services
523 // New way of instantiating connection to Fedora is dependent on
524 // fewer files of FedoraClient.jar
525 FedoraAPIAServiceLocator serviceLocator
526 = new FedoraAPIAServiceLocator(fedoraServerUsername,
527 fedoraServerPassword);
528
529 APIA = null;
530 boolean isUserSpecifiedPortAddressSuffix = false;
531 // try any portAddressSuffix specified by the user
532 if(!this.portAddressSuffix.equals("")) {
533 isUserSpecifiedPortAddressSuffix = true;
534 this.createAPIA(serviceLocator, this.portAddressSuffix,
535 "user-specified", isUserSpecifiedPortAddressSuffix);
536 }
537
538 // If the user-specified portAddressSuffix failed or if there was none
539 // given, then APIA will be null, so we will try with the default
540 // portAddressSuffix. This time all exceptions will be passed on.
541 if(APIA == null) {
542 isUserSpecifiedPortAddressSuffix = false;
543 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
544 "default", isUserSpecifiedPortAddressSuffix);
545 }
546
547 }
548
549 /** Tries to create the FedoraAPIA instance using the serviceLocator
550 * and the given portSuffix. The APIA instance is obtained for the
551 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
552 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
553 * Remote Exception from AXIS that it can't find the target service to
554 * invoke is ignored so that the caller can retry with the default port-
555 * address suffix first before giving up. */
556 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
557 String portSuffix, String messageInsert,
558 boolean isUserSpecifiedPortAddressSuffix)
559 throws Exception
560 {
561 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
562 // this.portAddressSuffix : defaultPortAddressSuffix;
563
564 try {
565 LOG.debug( "Trying to connect to Fedora using the given"
566 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
567 + baseURL + portSuffix);
568 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
569 new java.net.URL(baseURL+portSuffix));
570 // let's test whether we're authenticated (otherwise a
571 // RemoteException will be thrown to indicate that the
572 // password was incorrect.)
573 RepositoryInfo repositoryInfo = APIA.describeRepository();
574 // throws RemoteException if pwd wrong or for other reasons
575 // in which case describeRepository() service is unavailable
576 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
577 // If we come all the way here, no exceptions were thrown:
578 this.portAddressSuffix = portSuffix; // store the one currently in use
579 } catch(RemoteException re) {
580 // if we're here, then APIA was unable to call the web service
581 // If this was because the fedora authentication failed, then
582 // let's throw a custom exception
583 String message = re.getMessage().toLowerCase();
584 // Looking for something Unauthorized(401)
585 if(message.indexOf("unauthorized") != -1
586 || message.indexOf("401") != -1)
587 {
588 throw new AuthenticationFailedException();
589 } else if(isUserSpecifiedPortAddressSuffix
590 && re.getMessage().contains(
591 FedoraGS3Exception.missingTargetService))
592 {
593 LOG.warn("Failed to connect to Fedora APIA services at given"
594 + " port address:\n" + portSuffix
595 + "\nException: " + re.getMessage());
596 // APIA.describeRepository can throw a remote exception
597 // whereby AXIS says the target service is missing and can't
598 // be invoked (FedoraGS3Exception.missingTargetService)
599 // Don't rethrow this, if AXIS can't find the user-specified
600 // portAddressSuffix, we will try with the default suffix next
601 APIA = null;
602 } else { // if trying default portAddressSuffix or if any other
603 // RemoteException was generated (whose cause is something
604 // other than an authentication failure) rethrow it.
605 throw re;
606 }
607 } catch(Exception e) { // Other Exceptions
608 // Could possibly be a ServiceException when using ServiceLocator
609 if(isUserSpecifiedPortAddressSuffix) {
610 APIA = null; // we won't throw other exceptions yet until
611 // we have tried the default PortAddressSuffix for the baseURL
612 } else {
613 throw new FedoraGS3InitFailureException(e);
614 }
615 }
616 }
617
618 /** Gets all greenstone collections. Searches for greenstone:*-collection.
619 * Method getCollections() defaults to getting only those objects in fedora's
620 * repository whose pids are of the format greenstone:*-collection.
621 * The use of AutoFinder and findObjects is shown in
622 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
623 * The Fedora-APIA's method definition of findObjects is:
624 * <pre>
625 * fedora-types:FieldSearchResult findObjects(
626 * fedora-types:ArrayOfString resultFields,
627 * xsd:nonNegativeInteger maxResults,
628 * fedora-types:FieldSearchQuery query )
629 * </pre>
630 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
631 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
632 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
633 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html">Type definition of 2.2.1 FieldSearchQuery</a>
634 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
635 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
636 *
637 * @return an array of Strings containing the pids of all collections
638 * matching the format greenstone:*-collection.
639 */
640 public String[] getCollections() throws RemoteException
641 {
642 // Available constructors:
643 // FieldSearchQuery(java.util.List conditions)
644 // FieldSearchQuery(java.lang.String terms)
645 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
646 FieldSearchQuery query = new FieldSearchQuery();
647 query.setTerms(queryStr);
648 query.setConditions(null);
649 // we'd like pid and title returned for each object
650 // we pass maxResults=null to get all objects that match
651 // (i.e. all collections)
652 String[] pids = null;
653
654 FieldSearchResult collection = AutoFinder.findObjects(
655 APIA, new String[]{"pid", "title"}, maxresults, query);
656 ObjectFields[] results = collection.getResultList();
657 pids = new String[results.length];
658 for(int i = 0; i < results.length; i++) {
659 pids[i] = results[i].getPid();
660 }
661 return pids;
662 }
663
664 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
665 * top-level documents or document sections - have a DC datastream. This
666 * method returns the content (XML) of the DC datastream as it is stored in
667 * fedora's repository.
668 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
669 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
670 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
671 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
672 * The method signature is:
673 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
674 * where dsID = itemID (look at datastreams page of running fedora instance)
675 * To access the XML content of the MIMETypedObject returned, we use its method
676 * bytes[] getStream(), but when instantiating a String from this, we have to
677 * use the String() contructor where we can specify the charset encoding (in
678 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
679 * @return a String version of the XML in the DC datastream for the fedora
680 * object denoted by pid.
681 * @param pid - the fedora persistent identifier for an item in the fedora
682 * repository.
683 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
684 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
685 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
686 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
687 */
688 public String getDC(String pid)
689 throws RemoteException, UnsupportedEncodingException
690 {
691 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
692 // datastream ID, dsID = itemID, look at a running fedora
693 MIMETypedStream dcStream
694 = APIA.getDatastreamDissemination(pid, DC, null);
695 //asOfDateTime = null to get the current version of the dataStream
696
697 // need to set the charset encoding to UTF8
698 return new String(dcStream.getStream(), UTF8);
699 }
700
701 /** All "greenstone:*" objects in fedora (be they collections be they
702 * collections, top-level documents or document sections) have an EX
703 * datastream. This method returns the content (XML) of the EX datastream as
704 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
705 * @return a String version of the XML in the EX datastream for the fedora
706 * object denoted by pid.
707 * @param pid - the fedora persistent identifier for an item in the fedora
708 * repository.
709 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
710 * asOfDateTime).
711 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
712 * @see String getDC(String pid) throws Exception
713 * */
714 public String getEX(String pid)
715 throws RemoteException, UnsupportedEncodingException
716 {
717 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
718 //asOfDateTime = null to get the current version of the dataStream
719
720 // need to set the charset encoding to UTF8
721 return new String(exStream.getStream(), UTF8);
722 }
723
724 /** Some "greenstone:*" top-level documents in the fedora repository (but not
725 * greenstone collections or document sections) have a DLS metadata datastream.
726 * This method returns the content (XML) of the DLS datastream as is. (It calls
727 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
728 * @return a String version of the XML in the DLS datastream for the fedora
729 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
730 * @param pid - the fedora persistent identifier for an item in the fedora
731 * repository.
732 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
733 * asOfDateTime).
734 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
735 * @see String getDC(String pid) throws Exception
736 * */
737 public String getDLS(String pid)
738 throws RemoteException, UnsupportedEncodingException
739 {
740 MIMETypedStream dlsStream = null;
741 // If there is no DLS datastream, it throws an exception (whose class
742 // fedora.server.errors.DatastreamNotFoundException can't be imported
743 // here (it's not in the client side fedora.server.* package, but on
744 // the server side package of that name):
745 try{
746 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
747 //asOfDateTime=null to get the current version of the dataStream
748 } catch(RemoteException e) {
749 //These two don't work:
750 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
751 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
752
753 if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
754 { // there is no DLS data stream for this document
755 return "";
756 }
757 else { // different problem, exception due to different cause
758 throw(e);
759 }
760 }
761 if(dlsStream == null)
762 return "";
763 // need to set the charset encoding to UTF8
764 return new String(dlsStream.getStream(), UTF8);
765 }
766
767 /** All "greenstone:*" objects in fedora (be they collections or documents)
768 * have a TOC datastream, unless they have only 1 section (SECTION1).
769 * This method returns the content (XML) of the TOC datastream as is.
770 * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
771 * @return a String version of the XML in the TOC datastream for the fedora
772 * object denoted by pid.
773 * @param pid - the fedora persistent identifier for an item in the fedora
774 * repository.
775 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
776 * asOfDateTime)
777 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
778 * @see String getDC(String pid) throws Exception
779 * */
780 public String getTOC(String pid)
781 throws RemoteException, UnsupportedEncodingException
782 {
783 try {
784 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
785 //asOfDateTime = null to get the current version of the dataStream
786 // need to set the charset encoding to UTF8
787 return new String(tocStream.getStream(), UTF8);
788 } catch(RemoteException re) {
789 // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
790 return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
791 }
792 }
793
794 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
795 * for the collections indicated by collPIDs.
796 * @param collPIDs - an array of Strings denoting the pids for greenstone
797 * collections stored in the fedora repositoryl. These should be of the
798 * format "greenstone:&lt;collectionName&gt;-collection". */
799 public String[] getCollectionNames(String[] collPIDs) {
800 String[] collNames = new String[collPIDs.length];
801 for(int i = 0; i < collPIDs.length; i++)
802 collNames[i] = getCollectionName(collPIDs[i]);
803 return collNames;
804 }
805
806 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
807 * in the parameter collNames.
808 * @param collNames - a list of names of greenstone collections
809 * stored in the fedora repository. */
810 public String[] getCollectionPIDs(String[] collNames) {
811 String[] collPIDs = new String[collNames.length];
812 for(int i = 0; i < collNames.length; i++)
813 collPIDs[i] = getCollectionName(collNames[i]);
814 return collPIDs;
815 }
816
817 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
818 * denoted by parameter collName.
819 * @param collName - the name of a greenstone collection stored
820 * stored in the fedora repository. */
821 public String getCollectionPID(String collName) {
822 return GREENSTONE_+collName+_COLLECTION;
823 }
824
825 /**
826 * Gets the title of the collection denoted by the given collection's pid by
827 * retrieving the title metadata for it from the collection's EX datastream.
828 * @return the title (in the default language, else English, else the
829 * first title found) for the particular collection denoted by its PID.
830 * @param collPID is the pid of a greenstone collection in the fedora
831 * repository. */
832 public String getCollectionTitle(String collPID)
833 throws RemoteException, UnsupportedEncodingException,
834 SAXException, IOException
835 {
836 String title = null; // has to be null initially, we do a check on it
837 // Parse the EX datastream (XML), and in its DOM, find the
838 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
839 // There might be one OR several of those with attribute
840 // name="collectionname". If there's only one, then get that.
841 // If there are several, there would possibly a be qualifier attribute,
842 // in which case get qualifier=lang (where lang is the member variable)
843 // If there is no qualifier with the requested language, then get the
844 // english one which is likely to be there, else return the title for
845 // the first collectionname .
846
847 MIMETypedStream exdata
848 = APIA.getDatastreamDissemination(collPID, EX, null);
849 String exStream = new String(exdata.getStream(), UTF8);
850
851 InputSource source = new InputSource(new StringReader(exStream));
852 Document doc = builder.parse(source);
853 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
854 NodeList children = docEl.getChildNodes();
855
856 String firstName = "";
857 String englishName = "";
858 for(int i = 0; i < children.getLength(); i++ ) {
859 Node n = children.item(i);
860 if(n.getNodeType() == Node.ELEMENT_NODE) {
861 Element e = (Element)n;
862 if(e.hasAttribute(NAME)
863 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
864 firstName = FedoraCommons.getValue(e);
865 if(!e.hasAttribute(QUALIFIER)) {
866 title = FedoraCommons.getValue(e);
867 break;
868 }
869 else if(e.getAttribute(QUALIFIER).equals(lang)) {
870 title = FedoraCommons.getValue(e);
871 break;
872 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
873 englishName = FedoraCommons.getValue(e);
874 }
875 }
876 }
877 }
878
879 // if the title is still not set to that of the requested language,
880 // then try setting it to the collection name in English. If English
881 // isn't available, then set it to the first collection name provided
882 // (in whichever language).
883 if(title == null) {
884 title = englishName.equals("") ? firstName : englishName;
885 }
886 doc = null;
887 return title;
888 }
889
890 /** @return the collection titles for all the collections indicated by
891 * collPIDs.
892 * @param collPIDs - a list of pids identifying greenstone collections
893 * stored in the fedora repository. */
894 public String[] getCollectionTitles(String[] collPIDs)
895 throws RemoteException, UnsupportedEncodingException,
896 SAXException, IOException
897 {
898 String[] titles = new String[collPIDs.length];
899
900 // parse each EX datastream (XML) which contains the gs3-extracted meta.
901 for(int i = 0; i < collPIDs.length; i++) {
902 titles[i] = getCollectionTitle(collPIDs[i]);
903 }
904 return titles;
905 }
906
907 /** @return the title metadata for the given doc objects of a collection.
908 * These titles are returned in the same order as the given docIDs.
909 * (The docPIDs already contain the collection name anyway.)
910 * @param docPIDs - a list of pids identifying documents stored in the
911 * fedora repository. */
912 public String[] getDocTitles(String[] docPIDs)
913 throws RemoteException, UnsupportedEncodingException,
914 SAXException, IOException
915 {
916 String[] titles = new String[docPIDs.length];
917 for(int i = 0; i < docPIDs.length; i++) {
918 titles[i] = getDocTitle(docPIDs[i]);
919 }
920 return titles;
921 }
922
923 /** Gets the title metadata for a particular doc object in a collection
924 * denoted by docPID. The docPID already contains the collection name.
925 * @return the title for the fedora document item denoted by docPID
926 * @param docPID is the pid of the document in the fedora repository
927 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
928 public String getDocTitle(String docPID)
929 throws RemoteException, UnsupportedEncodingException,
930 SAXException, IOException
931 {
932 // We need the extracted metadata file, and find its
933 // documentElement's child
934 // <ex:metadata name="Title">sometitle</ex:metadata>
935 // where the title we return is sometitle
936
937 String title = "";
938 MIMETypedStream exdata
939 = APIA.getDatastreamDissemination(docPID, EX, null);
940 String exStream = new String(exdata.getStream(), UTF8);
941 return getTitle(exStream);
942 }
943
944 /** Given a string representation of a document's or document section's
945 * EX datastream -- which is a greenstone extracted metadata XML file --
946 * of the form:
947 * &lt;ex&gt;
948 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
949 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
950 * ...
951 * &lt;/ex&gt;
952 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
953 * returns the value embedded in that element ('sometitle' in
954 * the example above).
955 * @return the title metadata of the document/document section whose EX
956 * datastream is passed as parameter
957 * @param exStream the EX datastream in String form of the document or
958 * document section. */
959 protected String getTitle(String exStream)
960 throws SAXException, IOException
961 {
962 String title = "";
963 InputSource source = new InputSource(new StringReader(exStream));
964 Document doc = builder.parse(source);
965 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
966 NodeList children = docEl.getChildNodes();
967
968 // Cycle through all the *element* children of <ex:ex></ex:ex>
969 // which are all of the form:
970 // <ex:metadata name="somename">somevalue</ex:metadata>
971 // Find the one where name="Title", its value is the title
972 for(int i = 0; i < children.getLength(); i++ ) {
973 Node n = children.item(i);
974 if(n.getNodeType() == Node.ELEMENT_NODE) {
975 Element e = (Element)n;
976 if(e.hasAttribute(NAME)
977 && e.getAttribute(NAME).equals(TITLE)) {
978 title = FedoraCommons.getValue(e);
979 break;
980 }
981 }
982 }
983 return title;
984 }
985
986
987 /** Used to obtain the dc:title value (hashID) of the DC stream of a digital
988 * object whose fedoraID is of a special sort: greenstone-http:<colname>-id. */
989 protected String getDCTitle(String fedoraPID)
990 throws RemoteException, UnsupportedEncodingException,
991 SAXException, IOException
992 {
993 String title = "";
994 MIMETypedStream dcdata = APIA.getDatastreamDissemination(fedoraPID, DC, null);
995 if(dcdata == null || dcdata.equals("")) {
996 return title;
997 }
998 String dcStream = new String(dcdata.getStream(), UTF8);
999
1000 InputSource source = new InputSource(new StringReader(dcStream));
1001 Document doc = builder.parse(source);
1002 Element docEl = doc.getDocumentElement(); // docEl=<oai_dc:dc></oai_dc:dc>
1003 NodeList children = docEl.getElementsByTagName("dc:title");
1004 if(children != null && children.getLength() > 0) {
1005 Node n = children.item(0); // <dc:title>
1006 Element e = (Element)n;
1007 title = FedoraCommons.getValue(e);
1008 }
1009 return title;
1010 }
1011
1012 /** @return the title metadata for the given document sections.
1013 * These titles are returned in the same order as the given docPIDs
1014 * and associated sectionIDs.
1015 * (The docPIDs already contain the collection name anyway.)
1016 * @param docPIDs - a list of pids identifying documents stored in the
1017 * fedora repository.
1018 * @param sectionIDs - a list of sectionIDs identifying individual sections
1019 * of documents stored in the fedora repository whose titles are requested. */
1020 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
1021 throws RemoteException, UnsupportedEncodingException,
1022 SAXException, IOException
1023 {
1024 String[] titles = new String[docPIDs.length];
1025 for(int i = 0; i < docPIDs.length; i++) {
1026 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1027 }
1028 return titles;
1029 }
1030
1031 /** @return the title metadata for the given document section.
1032 * (The docPID already contain the collection name anyway.)
1033 * @param docPID - a pid identifying a document in the fedora repository.
1034 * @param sectionID - the sectionID of the section of the
1035 * document whose title is requested. */
1036 public String getSectionTitle(String docPID, String sectionID)
1037 throws UnsupportedEncodingException, RemoteException,
1038 SAXException, IOException
1039 {
1040 String ex = this.getSectionEXMetadata(docPID, sectionID);
1041 return getTitle(ex);
1042 }
1043
1044 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1045 * returns the PIDs of the data objects found, with the exception of
1046 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1047 * collection PID.
1048 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1049 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1050 * the collection and not an object of the same collection) are returned.
1051 * All pids that do not map to a collection are assumed to be documents!
1052 * @return a list of the pids of all the (doc) objects in a collection.
1053 * @param colPID is the pid of the greenstone collection stored in
1054 * the fedora repository. */
1055 public String[] getCollectionDocs(String colPID)
1056 throws RemoteException
1057 {
1058 String colName = getCollectionName(colPID);
1059 //LOG.debug("colName: " + colName);
1060
1061 // Search fedora objects for pid=greenstone:<colName>-*
1062 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1063 // searches for "greenstone:"+colName+"-*";
1064 FieldSearchQuery query = new FieldSearchQuery();
1065 query.setTerms(queryStr);
1066 query.setConditions(null);
1067 String[] pids = null;
1068
1069 FieldSearchResult objects = AutoFinder.findObjects(
1070 APIA, new String[]{"pid", "title"}, maxresults, query);
1071 ObjectFields[] results = objects.getResultList();
1072
1073 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1074 // that's not a document object:
1075 pids = new String[results.length-1]; // not storing collection object
1076 int index = 0; // keeps track of docPid index
1077 for(int i = 0; i < results.length; i++) {
1078 // check it's not a collection object
1079 if(!results[i].getPid().endsWith(_COLLECTION)) {
1080 pids[index] = results[i].getPid();
1081 index++;
1082 }
1083 }
1084
1085 return pids;
1086 }
1087
1088 /** Given the pid of a document fedora data object, this method will return
1089 * all itemIDs that are part of that data object and are Sections. For further
1090 * information see interface Comparable (implemented by String), SortedSet
1091 * and TreeSet.
1092 * @return an array of itemIDs of the Sections of the document,
1093 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1094 * @param docPID is a fedora pid identifying a greenstone document object.
1095 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1096 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1097 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1098 */
1099 public String[] getSectionNames(String docPID) throws RemoteException {
1100 // DatastreamDef[] listDatastreams(
1101 // java.lang.String pid, java.lang.String asOfDateTime)
1102
1103 // listDatastreams returns information on each item (including itemID=dsID)
1104 // in the document object indicated by docPID
1105
1106 // Need to give an object version number, because null for asOfDateTime
1107 // does not return any datastreams!
1108 String[] times = APIA.getObjectHistory(docPID);
1109
1110 DatastreamDef[] datastreams = APIA.listDatastreams(
1111 docPID, times[times.length-1]);
1112
1113 // TreeSet is a SortedSet. We're going to put Strings into it,
1114 // and Strings implement interface Comparable already.
1115 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1116 for(int i = 0; i < datastreams.length; i++) {
1117 String itemID = datastreams[i].getID();
1118 if (itemID.startsWith("SECTION"))
1119 orderedList.add(itemID);
1120 }
1121
1122 String[] sectionNames = new String[orderedList.size()];
1123 orderedList.toArray(sectionNames);
1124 orderedList = null;
1125 return sectionNames;
1126 }
1127
1128 /** Given the pid of a document fedora data object, this method will return all
1129 * itemIDs that are part of that data object and are Sections, but just the
1130 * Section numbers are returned. For further information see interface Comparable
1131 * (implemented by String), SortedSet and TreeSet.
1132 * @return an array of itemIDs of the Section numbers of the document
1133 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1134 * @param docPID is a fedora pid identifying a greenstone document object.
1135 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1136 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1137 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1138 */
1139 public String[] getSectionNumbers(String docPID) throws RemoteException {
1140 String[] times = APIA.getObjectHistory(docPID);
1141
1142 DatastreamDef[] datastreams
1143 = APIA.listDatastreams(docPID, times[times.length-1]);
1144 //Vector v = new Vector(datastreams.length);
1145 TreeSet orderedList = new TreeSet();
1146
1147 for(int i = 0; i < datastreams.length; i++) {
1148 String itemID = datastreams[i].getID();
1149 if (itemID.startsWith("SECTION")) {
1150 //int index = SECTION.length();
1151 //itemID = itemID.substring(index);
1152 itemID = removePrefix(itemID, SECTION);
1153 orderedList.add(itemID);
1154 }
1155 }
1156
1157 String[] sectionNumbers = new String[orderedList.size()];
1158 orderedList.toArray(sectionNumbers);
1159 orderedList = null;
1160
1161 return sectionNumbers;
1162 }
1163
1164 /** @return the titles for the document sections denoted by the parameters.
1165 * @param docPID is a fedora pid identifying a greenstone document object.
1166 * @param sectionIDs is a list of identifiers identifying sections in the
1167 * document denoted by docPID, whose titles need to be returned. Each
1168 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1169 * or a section number (eg. 1.5.1). */
1170 public String[] getTitles(String docPID, String[] sectionIDs)
1171 throws RemoteException, UnsupportedEncodingException,
1172 SAXException, IOException
1173 {
1174 String[] titles = new String[sectionIDs.length];
1175 for(int i = 0; i < titles.length; i++)
1176 titles[i] = getTitle(docPID, sectionIDs[i]);
1177 return titles;
1178 }
1179
1180 /** @return the title for the document section denoted by the parameters.
1181 * @param docPID is a fedora pid identifying a greenstone document object.
1182 * @param sectionID identifies the particular section in the document denoted
1183 * by docPID, whose title needs to be returned. The sectionID may be either a
1184 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1185 public String getTitle(String docPID, String sectionID)
1186 throws RemoteException, UnsupportedEncodingException,
1187 SAXException, IOException
1188 {
1189 // Compose the itemID for the EX data stream from the number in the
1190 // sectionID:
1191 String exID = removePrefix(sectionID, SECTION);
1192 exID = EX+convertToMetaNumber(exID);
1193
1194 // Retrieve the extracted metadata stream (EX, in XML) for the given
1195 // section
1196 String exStream = getItem(docPID, exID);
1197
1198 // Extract the title from the XML, look for:
1199 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1200 InputSource source = new InputSource(new StringReader(exStream));
1201 Document doc = builder.parse(source);
1202 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1203 NodeList children = docEl.getElementsByTagName(
1204 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1205 for(int i = 0; i < children.getLength(); i++) {
1206 Element e = (Element)children.item(i);
1207 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1208 return FedoraCommons.getValue(e); // extract and return the title
1209 }
1210 return ""; // if we got here, then we couldn't find a title
1211 }
1212
1213 /** @return the section's XML (as a String) as it is stored in fedora.
1214 * Works out if sectionID is a sectionName or sectionNumber.
1215 * @param docPID - a fedora pid identifying a greenstone document object.
1216 * @param sectionID - identifyies the particular section in the
1217 * document denoted by docPID, may be a section name or number. */
1218 public String getSection(String docPID, String sectionID)
1219 throws RemoteException, UnsupportedEncodingException
1220 {
1221 if(!sectionID.startsWith(SECTION)) // then it has only section number
1222 sectionID = SECTION+sectionID;
1223
1224 String sectionXML = this.getItem(docPID, sectionID);
1225 return sectionXML;
1226 }
1227
1228 /** @return the required section's DC metadata XML datastream.
1229 * @param docPID - a fedora pid identifying a greenstone document object.
1230 * @param sectionID - identifyies the particular section in the
1231 * document denoted by docPID, may be a section name or number. */
1232 public String getSectionDCMetadata(String docPID, String sectionID)
1233 throws RemoteException, UnsupportedEncodingException
1234 {
1235 String dcID = removePrefix(sectionID, SECTION);
1236 // ensure we have just the section number
1237 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1238
1239 // now get the DC datastream for that number
1240 String dcXML = this.getItem(docPID, dcID);
1241 return dcXML;
1242 }
1243
1244 /** Returns the section EX metadata XML datastream for SectionID which may be
1245 * a section name or number. Currently a few EX files are named awkwardly:
1246 * the EX file for section 1.* is actually associated with datastream EX.*.
1247 * But subsequent EX datastreams are named appropriately: for instance,
1248 * EX2.1.1 matches with section 2.1.1
1249 * @return the required section's EX metadata XML datastream.
1250 * @param docPID - a fedora pid identifying a greenstone document object.
1251 * @param sectionID - identifyies the particular section in the
1252 * document denoted by docPID, may be a section name or number. */
1253 public String getSectionEXMetadata(String docPID, String sectionID)
1254 throws RemoteException, UnsupportedEncodingException
1255 {
1256 String exID = removePrefix(sectionID, SECTION);
1257 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1258
1259 // now get the EX datastream for that for number
1260 String exXML = this.getItem(docPID, exID);
1261 return exXML;
1262 }
1263
1264 /** Given a documentNode element, adds the nodetype attribute to all of its
1265 * docNode descendants. The nodetype is either Root, Internal or Leaf to indicate
1266 * whether the docnode is a toplevel document Node, or has children or has none.
1267 * @param e - the documentNode element whose descendants' nodetypes will be set
1268 * at method's end. */
1269 protected void addNodeTypeToDescendants(Element e) {
1270 NodeList sections = e.getElementsByTagName(SECTION_ELEMENT);
1271 for(int i = 0; i < sections.getLength(); i++) {
1272 Element section = (Element)sections.item(i);
1273 NodeList descendants = section.getElementsByTagName(SECTION_ELEMENT);
1274 if(descendants.getLength() > 0) {
1275 // if there are any descendants (which includes children) that are SECTIONS
1276 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
1277 } else {
1278 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
1279 }
1280 }
1281 }
1282
1283
1284 /** @return the part of the TOC XML file (which outlines doc structure)
1285 * relating to the given section. This includes the section denoted by
1286 * sectionID as well as all descendent subsections thereof.
1287 * @param docPID - a fedora pid identifying a greenstone document object.
1288 * @param sectionID - identifyies the particular section in the
1289 * document denoted by docPID, may be a section name or number.
1290 * @param structure can contain any combination of: ancestors, parent,
1291 * siblings, children, descendants, entire, specifying the portion of
1292 * the structure to retrieve.
1293 * @param info can contain any combination of: siblingPosition, numSiblings,
1294 * numChildren, requesting additional information about the structure. */
1295 public Element getSectionStructureXML(String docPID, String sectionID, String structure, String info)
1296 throws RemoteException, UnsupportedEncodingException, SAXException, IOException
1297 {
1298 // get the TableOfContents (TOC) XML datastream as a String
1299 String xmlTOC = getTOC(docPID);
1300
1301 // convert it into a DOM document
1302 InputSource source = new InputSource(new StringReader(xmlTOC));
1303 Document doc = builder.parse(source);
1304 // toplevel element docEl = <Section id="1"></Section>
1305 Element docEl = doc.getDocumentElement();
1306 addNodeTypeToDescendants(docEl);
1307 docEl.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
1308
1309 if(structure.indexOf("entire") != -1) { // don't need to find the specific section, doc root is what's required
1310 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1311 return docEl;
1312 }
1313
1314 if(sectionID.equals("")) {
1315 sectionID = "1";
1316 }
1317
1318 // Store just the number
1319 String sectionNumber = removePrefix(sectionID, SECTION);
1320 // Check whether we're requested to return the toplevel element itself
1321 // If sectionNumber=1, then the top-level element/document element
1322 // of the TOC XML is requested, so return the TOC as is.
1323 if(sectionNumber.equals("1") && structure.indexOf("descendants") != -1) {
1324 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1325 return docEl;
1326 }
1327
1328 // if the root is the section required, return that
1329 if(docEl.getTagName().equals(SECTION_ELEMENT)
1330 && docEl.getAttribute(ID).equals(sectionNumber)) {
1331 Element substructure = getSubstructure(docEl, structure);
1332 return getStructureInfo(substructure.getOwnerDocument(), docEl, info);
1333 //return docEl;
1334 }
1335
1336
1337 // Else, get all <Section> elements and find the
1338 // <Section id="sectionNumber"></Section> and return that
1339 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1340 for(int i = 0; i < sections.getLength(); i++) {
1341
1342 Element e = (Element)sections.item(i);
1343 if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) {
1344 Element substructure = getSubstructure(e, structure);
1345 return getStructureInfo(substructure.getOwnerDocument(), e, info);
1346 }
1347 }
1348
1349 return null; // not found
1350 }
1351
1352
1353 /** Implements browsing document titles of a greenstone collection stored in
1354 * the fedora repository by letter.
1355 * @return the document pids whose titles start with the given letter.
1356 * @param collName - the name of the collection.
1357 * @param letter - the starting letter to browse by.
1358 */
1359 public String[] browseTitlesByLetter(final String collName, final String letter)
1360 throws RemoteException, FedoraVersionNotSupportedException
1361 {
1362 String[] pids = null;
1363
1364 // We want to do the following kind of search (assuming letter=f
1365 // and collName=demo):
1366 // pid~greenstone:demo* title~f*
1367
1368 // We don't need to normalise the letter first (to search titles starting
1369 // with both uppercase and lowercase versions of the letter), because
1370 // Fedora always searches for both.
1371 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1372 // ANYWHERE in their titles!
1373 // SOLUTION: search the collection for all titles containing f as given,
1374 // retrieving pid and title fields. Then from the list of results, select
1375 // only those titles that start with the given letter.
1376 // This may seem an unnecessarily cumbersome job (when it looked like it
1377 // should have worked with just title~f*), BUT, at least the resulting
1378 // documents will be reduced to a set of titles containing f; rather than
1379 // having to search *all* documents in the collection.
1380 final String title = letter+WILDCARD;
1381
1382 FieldSearchResult objects = findObjectsWithTitlesContaining(
1383 collName, title);
1384 ObjectFields[] results = objects.getResultList();
1385 TreeSet v = new TreeSet(); // TreeSet to return the results in
1386 //alphabetical order
1387 for(int i = 0; i < results.length; i++) {
1388 // from the result list, select those titles that don't
1389 // just *contain* the letter, but actually start with it:
1390 String resultTitle = results[i].getTitle(0);
1391 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1392 String pid = results[i].getPid();
1393 // skip the collection object itself
1394 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1395 v.add(pid);
1396 //LOG.debug(resultTitle);
1397 }
1398 }
1399 }
1400 pids = new String[v.size()];
1401 v.toArray(pids);
1402 return pids;
1403 }
1404
1405 /** Implements querying document DC titles of a greenstone collection stored in
1406 * the fedora repository for a term that may occur anywhere in their titles.
1407 * @return the document pids whose DC titles contain the parameter term.
1408 * @param titleContents - the word or phrase to search the collection's
1409 * document titles for. Only one word, and this method finds Greenstone
1410 * DOCUMENT titles CONTAINING that word (if any).
1411 * @param startsWith - if true, searches for titles that start with
1412 * titleContents. Else it searches for titles that contain titleContents. */
1413 public String[] searchDocumentTitles(String collName, String titleContents,
1414 boolean startsWith)
1415 throws RemoteException, FedoraVersionNotSupportedException
1416 {
1417 String[] pids = null;
1418
1419 // We want to do the following kind of search (when written in Fedora's
1420 // REST format - see http://localhost:8080/fedora/search):
1421 // pid~greenstone:<colname>-* title~<1st word of titleContents>
1422
1423 // We don't need to normalise the word first (to search titles starting
1424 // with both uppercase and lowercase versions of it), because
1425 // Fedora always searches for the normalised word.
1426
1427 // 2 difficulties:
1428 // - We can only search for single words with Fedora's Conditional Search.
1429 // Obtain pids and titles of documents containing the first word and then
1430 // we filter the titles to those containing the entire phrase of
1431 // titleContents.
1432 // - Searching for title~FirstWord returns all documents containing
1433 // this word ANYWHERE in their titles. If parameter startsWith is false,
1434 // then this is fine. But if parameter startsWith is true, then go
1435 // through all the resulting titles found (containing FirstWord), select
1436 // only pids of those titles that contain the entire phrase titleContents
1437
1438 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1439
1440 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1441 // if titleContents is a phrase (contains space), then it's not
1442 // a single word, in which case search for just the first word
1443 String title = titleContents; // assume it's a single word
1444 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1445 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1446
1447 FieldSearchResult objects = findObjectsWithTitlesContaining(
1448 collName, title);
1449 if(objects == null) {
1450 final String[] empty = {};
1451 return empty;
1452 }
1453
1454 // Go through all the titles found and for those that match the criteria*,
1455 // store their pid. *Criteria: titles that start with OR contain the
1456 // word OR phrase of titleContents.
1457 ObjectFields[] results = objects.getResultList();
1458 Vector v = new Vector(); // return pids in the order found
1459 for(int i = 0; i < results.length; i++) {
1460 // from the result list, select those titles that don't
1461 // just *contain* the first word, but the entire phrase of
1462 // words in titleContents:
1463 String resultTitle = results[i].getTitle(0);
1464 boolean accepted = false; // accept the resultTitle found
1465
1466 String resultPID = results[i].getPid();
1467 // skip the collection object itself, since it's not a document
1468 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1469 accepted = false;
1470 }
1471 // if titleContents is a single word and we are checking
1472 // whether resultTitle contains titleContents:
1473 else if(indexOfFirstSpace == -1) { // titleContents is a single word
1474 if(!startsWith) // titles that *contain* the word titleContents
1475 accepted = true; //accept all titles found
1476 // else startWith: accept titles starting with word titleContents
1477 else if (resultTitle.toLowerCase().startsWith(
1478 titleContents.toLowerCase()))
1479 accepted = true;
1480
1481 }
1482 else { // otherwise, titleContents is a phrase of >1 word, need
1483 // to check that the result title contains the entire phrase
1484 if(startsWith && resultTitle.toLowerCase().startsWith(
1485 titleContents.toLowerCase()))
1486 accepted = true;
1487 else if(!startsWith && resultTitle.toLowerCase().contains(
1488 titleContents.toLowerCase()))
1489 accepted = true;
1490 }
1491
1492 // if the resultTitle fit the criteria, store its pid
1493 if(accepted) {
1494 v.add(resultPID);
1495 //System.out.println(resultTitle);
1496 }
1497
1498 }
1499 pids = new String[v.size()];
1500 v.toArray(pids);
1501 return pids;
1502 }
1503
1504
1505 /**
1506 * @param collName - the collection of documents we'll be searching in.
1507 * @param titleWord - the word we'll be searching the document titles for.
1508 * (Fedora's search returns all objects whose title contains that word).
1509 *
1510 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1511 * (see link):
1512 * <pre>
1513 * "There are two search methods: a search on all fields or a search on
1514 * specific fields. To search all fields the setTerms function of the
1515 * FieldSearchQuery must be used, with the paramater being the desired string.
1516 *
1517 * To search by specific fields, you must create an array of Condition
1518 * objects. Each condition consists of three parts:
1519 * the field to be searched (.setProperty()),
1520 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1521 * and the search string (.setValue())"
1522 * </pre>
1523 * We want to use the second search method above when browsing and searching,
1524 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1525 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1526 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1527 *
1528 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1529 * web services are defined. (The web.xml defines the "Servlets for REST-based
1530 * interfaces to the Fedora Repository Server").
1531 * Do a search on the word "search":
1532 * fedora.server.access.FieldSearchServlet is the class we need to look at
1533 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1534 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1535 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1536 * is what's used in the fedora client application that makes use of
1537 * the SOAP-based interface.
1538 *
1539 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1540 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1541 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1542 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1543 */
1544 protected FieldSearchResult findObjectsWithTitlesContaining(
1545 String collName, final String titleWord)
1546 throws RemoteException, FedoraVersionNotSupportedException
1547 {
1548 // Searching for pids of the form "greenstone:gs2mgdemo-*";
1549 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
1550
1551 Condition[] conditions = new Condition[2];
1552 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1553 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1554
1555 FieldSearchQuery query = new FieldSearchQuery();
1556 query.setConditions(conditions);
1557
1558 // We'd like pid and title returned for each object, because we'll make
1559 // use of title. We pass maxResults=null to get all objects that match
1560 // (i.e. all collections).
1561 FieldSearchResult objects = null;
1562 final String[] retrieveFields = {"pid", "title"};
1563 try {
1564 objects = AutoFinder.findObjects(
1565 APIA, retrieveFields, maxresults, query);
1566 // collection = APIA.findObjects(new String[]{"pid", "title"},
1567 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1568 } catch(RemoteException ex) {
1569 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1570 // fedoraVersion is too low, searching/browsing is not possible
1571 // (because class Condition has changed after 2.0, from 2.1.1
1572 // onwards)
1573 throw new FedoraVersionNotSupportedException(fedoraVersion);
1574 } else {
1575 LOG.error(
1576 "Remote exception when calling web service operation " +
1577 "findObject() to execute search:\n" + ex.getMessage());
1578 ex.printStackTrace();
1579 throw ex;
1580 }
1581 }
1582 return objects; // return the FieldSearchResult objects found
1583 }
1584
1585 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1586 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1587 * @param docPID - pid of a greenstone document in the fedora repository. */
1588 public String getDocName(String docPID) {
1589 return docPID.substring(docPID.indexOf('-')+1);
1590 }
1591
1592 /** @return the &lt;name&gt; in the parameter collPID
1593 * (greenstone:&lt;name&gt;-collection)
1594 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1595 * in the docPID (greenstone:&lt;name&gt;-docID).
1596 * @param collPID - pid of a greenstone collection in the fedora repository. */
1597 public String getCollectionName(String collPID) {
1598 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1599 }
1600
1601
1602 /** Return the TOC substructure requested
1603 * @return an element containing a copy if element e with either only its child
1604 * elements or with all its descendants and/or its ancestors or only its parent
1605 * and/or its siblings (depending on what the parameter structure specifies).
1606 * @param e - the element to start copying from and whose structure is requested.
1607 * @param structure - a string containing any combination of the values:
1608 * ancestors, parent, siblings, children, descendants,
1609 * specifying the portion of the structure to retrieve.
1610 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1611 */
1612 protected Element getSubstructure(Element original, String structure)
1613 {
1614 Document doc = builder.newDocument();
1615
1616 boolean descendants = (structure.indexOf("descendants") != -1) ? true : false;
1617 Node current = doc.importNode(original, descendants);
1618
1619 // descendants=true: import/copy descendants.
1620 // Else, copy just current node original (later copy its direct children)
1621
1622 Node parentOfCurrent = null;
1623 Node parentOfOriginal = original.getParentNode();
1624 if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot)
1625 parentOfOriginal = null;
1626 }
1627
1628 if(parentOfOriginal == null) { // no parentNode, so current is the root node.
1629 // can't get ancestors/parent/siblings, since all these need parentNode
1630 doc.appendChild(current);
1631 } else { // siblings, ancestors and parent requests all require parent node to exist
1632 // First check if we need to get ancestors, else for whether parent is required
1633 if(structure.indexOf("ancestors") != -1) {
1634 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1635
1636 Node child = null;
1637 Node parent = parentOfCurrent; // the copy
1638 Node n = parentOfOriginal.getParentNode(); // the doc to copy from
1639
1640 while(n != null && n != original.getOwnerDocument()) {
1641 child = parent;
1642 parent = doc.importNode(n, false); // no descendants
1643 parent.appendChild(child);
1644 n = n.getParentNode();
1645 }
1646
1647 doc.appendChild(parent); // need to put the copied node into a document
1648 // else it won't have a parent doc (DOMSource can't work with it
1649 // without it having a document parent).
1650
1651 } else if(structure.indexOf("parent") != -1) {
1652 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1653 //parentOfCurrent.appendChild(current);
1654 doc.appendChild(parentOfCurrent);
1655 }
1656
1657 // a request for siblings is independently tested for
1658 if(structure.indexOf("siblings") != -1) {
1659 // only import parent if we didn't already import
1660 // it for a request for ancestors or parent
1661 if(parentOfCurrent == null) {
1662 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1663 doc.appendChild(parentOfCurrent); // this becomes the root
1664 }
1665 // now the siblings of current (children of parentOfCurrent)
1666 NodeList children = parentOfOriginal.getChildNodes();
1667 for(int i = 0; i < children.getLength(); i++) {
1668 Node n = children.item(i);
1669
1670 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1671 if((Element)n != original) { // skip original which was already imported
1672 Node child = doc.importNode(n, false); // no descendants
1673 parentOfCurrent.appendChild(child);
1674 } else { // already imported Current element, insert at this position
1675 parentOfCurrent.appendChild(current);
1676 }
1677
1678 }
1679 }
1680 } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests
1681 // (sibling request adds the current node into a particular position)
1682 parentOfCurrent.appendChild(current);
1683 // need to put the copied node into a document
1684 // else it won't have a parent doc (DOMSource can't work with it
1685 // without it having a document parent).
1686 } else { // when only children or descendants were requested, current becomes root document
1687 doc.appendChild(current);
1688 }
1689 }
1690
1691 // if we are not recursively copying all descendants, then copy just
1692 // the childnodes of current:
1693 if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children
1694
1695 // get e's children and copy them into the new document
1696 NodeList children = original.getChildNodes();
1697 for(int i = 0; i < children.getLength(); i++) {
1698 // create copy
1699 Node n = doc.importNode(children.item(i), false);
1700 // attach it to parent
1701 current.appendChild(n);
1702
1703 // Now we need to indicate whether this new node (child) is a leaf
1704 // or not. (This is necessary for getChildrenOfSection(), else
1705 // it's hard to know if the children are leaves or have further
1706 // subsections.
1707 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1708 // we're dealing only with section children
1709
1710 // Check if the matching original had children:
1711 Element originalsChild = (Element)children.item(i);
1712 NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT);
1713 if(grandchildren.getLength() > 0) {
1714 // original's child has children, so indicate this
1715 // in the copied child:
1716 Element child = (Element)current;
1717 // child.setAttribute(TYPE, INTERNAL_NODE);
1718
1719 }
1720 }
1721 }
1722 }
1723
1724 return doc.getDocumentElement();
1725 }
1726
1727
1728 /** Return the TOC substructure with the requested structural info.
1729 * @return an element containing a copy if element e with either only its child
1730 * elements or with all its descendants and/or its ancestors or only its parent
1731 * and/or its siblings (depending on what the parameter structure specifies).
1732 * Returns null if the element, e, passed in is null.
1733 * @param doc - the new document into whose root element the structural information
1734 * will be inserted as attributes.
1735 * @param e - the element to start copying from and whose structure is requested.
1736 * @param info - a string containing any combination of the values: numChildren,
1737 * numSiblings, siblingPosition. The requested info gets added as attributes to
1738 * the returned root element.
1739 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1740 */
1741 protected Element getStructureInfo(Document doc, Element e, String info)
1742 {
1743 if(e == null) {
1744 return null;
1745 }
1746
1747 Element root = doc.getDocumentElement();
1748
1749 if(!info.equals("")) {
1750 if(info.indexOf("numChildren") != -1) {
1751 //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength();
1752 int numChildren = 0;
1753
1754 NodeList children = e.getChildNodes();
1755 for(int i = 0; i < children.getLength(); i++) {
1756 Node n = children.item(i);
1757 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1758 numChildren++;
1759 }
1760 }
1761
1762 root.setAttribute("numChildren", Integer.toString(numChildren));
1763 }
1764
1765 if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings
1766 int numSiblings = 0;
1767 int siblingPosition = 0;
1768
1769 Node parent = e.getParentNode();
1770 if(parent == null) {
1771 numSiblings = 0;
1772 siblingPosition = 1;
1773 } else {
1774 //numSiblings = parent.getChildNodes().getLength();
1775 NodeList siblings = parent.getChildNodes();
1776
1777 for(int i = 0; i < siblings.getLength(); i++) {
1778 Node n = siblings.item(i);
1779 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1780 if(e == (Element)n) {
1781 siblingPosition = numSiblings+1;
1782 } else { // count every sibling section element, except e itself
1783 numSiblings++;
1784 }
1785 }
1786 }
1787 }
1788
1789 if(info.indexOf("numSiblings") != -1) {
1790 root.setAttribute("numSiblings", Integer.toString(numSiblings));
1791 }
1792
1793 if(info.indexOf("siblingPosition") != -1) {
1794 root.setAttribute("siblingPosition", Integer.toString(siblingPosition));
1795 }
1796 }
1797 }
1798
1799 return root;
1800 }
1801
1802
1803 /**
1804 * Return a datastream of a document, given the document's id
1805 * and the item id of the datastream which is to be retrieved.
1806 * @return the XML (in String form) of the item denoted by itemID
1807 * that's part of the fedora data object denoted by docPID.
1808 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1809 * Can't retrieve images denoted by itemID using this method, only items
1810 * that are of XML format.
1811 * @param docPID - pid of a greenstone document in the fedora repository.
1812 * @param itemID - the itemID of a datastream of the fedora object
1813 * identified by docPID.
1814 */
1815 protected String getItem(String docPID, String itemID)
1816 throws RemoteException, UnsupportedEncodingException
1817 {
1818 // MIMETypedStream getDatastreamDissemination(
1819 // String pid, String dsID, asOfDateTime)
1820 MIMETypedStream datastream
1821 = APIA.getDatastreamDissemination(docPID, itemID, null);
1822 return new String(datastream.getStream(), UTF8);
1823 }
1824
1825 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1826 * returns "1.2.1".
1827 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1828 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1829 * However, the string str is returned unchanged if the prefix does not occur
1830 * at the start of str.
1831 * @return the String parameter str without the prefix.
1832 * It can be used to return the number of an itemID of a greenstone document
1833 * stored in the fedora repository without the given prefix.
1834 * @param prefix - the prefix which ought to be removed from the itemID.
1835 * @param str - the value of the itemID.
1836 */
1837 protected String removePrefix(String str, String prefix) {
1838 // do nothing in those cases where the prefix is not in param str
1839 if(!str.startsWith(prefix))
1840 return str;
1841 // otherwise:
1842 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1843 return "1" + str.substring(prefix.length());
1844 } else {
1845 return str.substring(prefix.length());
1846 }
1847 }
1848
1849 /** Given a number of the form x(.y.z), this method returns this number
1850 * as is, except when x = 1, in which case, it would return .y.z
1851 * That is, given number=3.2.1, this method would return 3.2.1
1852 * But, given number=1.2.3, this method would return .2.3.
1853 * When number=1, it is NOT a special case: "" is returned as explained.
1854 * @param number - a proper (fedora-greenstone document) section number
1855 * @return the same number as it ought to be for the associated EX, DC datastreama.
1856 */
1857 protected String convertToMetaNumber(String number) {
1858 if(number.startsWith("1.") || number.equals("1"))
1859 return number.substring(1); // remove the first char: the initial '1'
1860 else return number;
1861 }
1862
1863 /** @return fedora's baseURL. It's of the form
1864 * "http://localhost:8080/fedora" */
1865 public String getBaseURL() { return baseURL; }
1866
1867 /** @return the portAddressURL (in use) of the Fedora APIA
1868 * web service (should be the endpoint location in the APIA's
1869 * WSDL file).
1870 * It's usually of the form baseURL+"/services/access" */
1871 public String getPortAddressURL() {
1872 return this.baseURL + this.portAddressSuffix;
1873 }
1874
1875 /** @return the baseURL for gsdlAssocFiles */
1876 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1877
1878 public static void main(String args[]) {
1879 try {
1880 FedoraConnection fedoraCon
1881 = new FedoraConnection(new File("fedoraGS3.properties"));
1882
1883 String[] pids = null;
1884 pids = fedoraCon.getCollections();
1885 String[] titles = fedoraCon.getCollectionTitles(pids);
1886 for(int i = 0; i < pids.length; i++) {
1887 System.out.println("extracted title:" + titles[i]);
1888 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1889 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1890 for(int j = 0; j < docPIDs.length; j++) {
1891 System.out.println("\tExtr doc title: " + docTitles[j]);
1892 }
1893 }
1894
1895 String PID = "greenstone:gs2mgdemo-collection";
1896 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1897 String dcXML = fedoraCon.getDC(PID);
1898 String exXML = fedoraCon.getEX(PID);
1899 String tocXML = fedoraCon.getTOC(docPID);
1900 System.out.println("Dublin Core Metadata for " + PID
1901 + " is:\n" + dcXML);
1902 System.out.println("GS3 extracted metadata for " + PID
1903 + " is:\n" + exXML);
1904 System.out.println("Table of Contents for " + docPID
1905 + " is:\n" + tocXML);
1906
1907
1908 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1909 System.out.println("\nSection names for " + docPID + " are:");
1910 for(int i = 0; i < sectionNames.length; i++)
1911 System.out.println(sectionNames[i]);
1912
1913 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1914 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1915 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1916 System.out.println("\nSection numbers for " + docPID + " are:");
1917 for(int i = 0; i < sectionNumbers.length; i++) {
1918 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1919 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1920 }
1921
1922 String sectionID = "SECTION1"; //SECTION1.5
1923 System.out.println("\n");
1924
1925 System.out.println(
1926 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1927 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1928 for(int i = 0; i < pids.length; i++)
1929 System.out.println(pids[i]);
1930
1931 System.out.println(
1932 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1933 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1934 for(int i = 0; i < pids.length; i++)
1935 System.out.println(pids[i]);
1936
1937 System.out.println("\nDone - exiting.");
1938 System.exit(0);
1939 } catch(RemoteException re) {
1940 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1941 re.printStackTrace();
1942 } catch(Exception e) {
1943 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1944 e.printStackTrace();
1945 //LOG.error("Unable to instantiate FedoraConnection\n" + e, e);
1946 }
1947 }
1948}
Note: See TracBrowser for help on using the repository browser.