source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 21835

Last change on this file since 21835 was 21835, checked in by ak19, 14 years ago

Browse takes a list of classifierIDs, not a single one.

File size: 81.1 KB
Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import fedora.client.utility.AutoFinder;
25import fedora.server.access.FedoraAPIAServiceLocator;
26// The object for accessing FedoraAPI-A web services:
27import fedora.server.access.FedoraAPIA;
28
29// The definitions for all complex fedora types:
30import fedora.server.types.gen.MIMETypedStream;
31import fedora.server.types.gen.RepositoryInfo;
32import fedora.server.types.gen.FieldSearchResult;
33import fedora.server.types.gen.FieldSearchQuery;
34import fedora.server.types.gen.DatastreamDef;
35import fedora.server.types.gen.ObjectFields;
36import fedora.server.types.gen.Condition;
37import fedora.server.types.gen.ComparisonOperator;
38//import fedora.server.types.gen.*;
39
40import javax.net.ssl.SSLHandshakeException;
41import java.net.ConnectException;
42import org.xml.sax.SAXException;
43import java.io.UnsupportedEncodingException;
44import java.io.IOException;
45import javax.xml.parsers.ParserConfigurationException;
46import java.net.MalformedURLException;
47import java.rmi.RemoteException;
48
49import java.io.StringReader;
50import java.io.FileInputStream;
51import java.io.File;
52import java.util.TreeSet;
53import java.util.Properties;
54import java.util.Vector;
55
56import java.awt.GridLayout;
57import javax.swing.JLabel;
58import javax.swing.JOptionPane;
59import javax.swing.JPanel;
60import javax.swing.JPasswordField;
61import javax.swing.JTextField;
62
63import org.apache.log4j.Logger;
64import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
65import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
66import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
68import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
69
70import javax.xml.parsers.DocumentBuilderFactory;
71import javax.xml.parsers.DocumentBuilder;
72import javax.xml.transform.*;
73
74import org.xml.sax.InputSource;
75import org.w3c.dom.Document;
76import org.w3c.dom.Element;
77import org.w3c.dom.NodeList;
78import org.w3c.dom.Node;
79
80/** Class that establishes a connection with Fedora's web services (via
81 * Java stub classes for the same) and then provides methods to retrieve
82 * Greenstone-specific data, such as the TOC, EX, DC,and Section
83 * datastreams of the Greenstone documents stored in Fedora's repository.
84 * These datastreams are returned as Strings without any changes being
85 * made to them.
86 * @author ak19
87*/
88public class FedoraConnection implements FedoraGS3DL {
89 /** The logging instance for this class */
90 private static final Logger LOG = Logger.getLogger(
91 FedoraConnection.class.getName());
92
93 /** The version of fedora that is supported by class FedoraConnection */
94 protected static final String SUPPORTED_VERSION = "2.2.1";
95
96 /* Some fixed strings of known literals */
97 protected static final String TYPE = "type";
98 protected static final String INTERNAL_NODE = "internalNode";
99 protected static final String GET= "/get/";
100
101 // The DemoSOAPClient declares and uses the following as a static member
102 // Probably none of the APIA methods (web service methods) remembers
103 // state, that might explain why we can use it as a static member then.
104 /** The object used to access the Fedora API-A web service methods */
105 protected static FedoraAPIA APIA;
106
107 /** Version of the running fedora server */
108 protected String fedoraVersion;
109 /** The location of the fedora server, usually of the form
110 * http://localhost:8080/fedora */
111 protected String baseURL;
112
113 /** The user-specified portAddressSuffix of the Fedora Access web services
114 * (endpoint URL in the WSDL), usually of the form
115 * http://localhost:8080/fedora/services/access
116 * Users can tell FedoraGS3 to try accessing that first by setting
117 * the "port.address.suffix" property in the properties file.
118 * FedoraGS3 itself will not write the portAddressSuffix currently used in
119 * the file for next time, but leave whatever value was entered in the
120 * properties file. The portAddress--not just suffix--currently in use (once
121 * the FedoraAPIA handle has been instantiated) can be obtained through
122 * getPortAddressURL() method. */
123 protected String portAddressSuffix;
124
125 /** The part of the portAddress that comes after the baseURL. It is usually:
126 * "/services/access" */
127 protected static final String defaultPortAddressSuffix = "/services/access";
128
129 /** The preferred language of the displat content */
130 protected String lang;
131 /** The maximum number of collections to retrieve */
132 protected int maxresults;
133 /** DocumentBuilder used to create and parse XML documents */
134 protected DocumentBuilder builder;
135
136 /** Static method that returns the version of Fedora supported by this
137 * class FedoraConnection. */
138 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
139 /** The version of the running Fedora server, which may or may not
140 * match the supported version. */
141 public String getFedoraVersion() { return fedoraVersion; }
142
143 /** @return the default language used to query for titles (and anything else
144 * where there are multiple language options). Upon initialisation, this
145 * defaults to English. */
146 public String getLanguage() { return lang; }
147
148 /** Sets the the default language used to query for titles (and anything else
149 * where there are multiple language options). If the default language for any
150 * query is not available, then English ("en") is used. If that's not available
151 * then the first other available language is used.
152 * @param lang - the two-letter language code to set the default language to.
153 */
154 public void setLanguage(String lang) { this.lang = lang; }
155
156 /** The default maximum number of search results returned for a search. Upon
157 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
158 public int getMaxResults() { return maxresults; }
159
160 /** Set the default maximum number of search results returned for a search.
161 * @param maxresults - the new default maximum number of search results to
162 * be returned. */
163 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
164
165 /** Code for this constructor is from DemoSOAPClient.java.
166 * Instantiates the APIA handle using the protocol, host, port, fedora
167 * server repository username and password.
168 * @param host - the fedora server host (may be prefixed with http:// or
169 * https:// if parameter protocol is empty). If there's no protocol, and
170 * no protocol prefixed to the host, then the protocol defaults to http.
171 * @param protocol - either http or https (or empty "")
172 * @param port - the port on which fedora is running.
173 * @param fedoraServerUsername - the administrator username required to
174 * access the fedora server's repository. ("fedoraAdmin" unless changed).
175 * @param fedoraServerPassword - the fedora server repository's
176 * administrator password. If none was set on fedora installation, this
177 * can be empty (""). */
178 public FedoraConnection(String protocol, String host, int port,
179 String fedoraServerUsername, String fedoraServerPassword)
180 throws ParserConfigurationException, MalformedURLException,
181 SSLHandshakeException, RemoteException, AuthenticationFailedException,
182 NotAFedoraServerException, ConnectException, Exception
183 {
184 try {
185 init(protocol, host, Integer.toString(port),
186 fedoraServerUsername, fedoraServerPassword);
187 } /*catch(RemoteException re) { //subclass of IOException
188 throw re;
189 } catch(SSLHandshakeException ssle) { //subclass of IOException
190 // this is also of type IOException
191 throw ssle;
192 }*/ catch(IOException ioe) { // connected to the wrong server
193 String exceptMsg = ioe.getMessage().toLowerCase();
194 if(exceptMsg.indexOf("request failed") != -1
195 || exceptMsg.indexOf("404") != -1)
196 throw new NotAFedoraServerException();
197 else // the IOException is not due the cause we thought it was, so
198 throw ioe; // rethrow whatever other IOException was caught (which
199 // could have been RemoteException or SSLHandshakeException
200 // or some other cause)
201 }
202 }
203
204 /** Default constructor which takes input from the user to get host, port,
205 * fedora username and password.
206 * It keeps looping to display authentication popup, until valid values are
207 * entered:
208 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
209 * This popup keeps appearing until the password and username are correct (as
210 * long as there's indeed a fedora server listening at the given host and port).
211 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
212 * the 'https' protocol to the host string when it should have been 'http';
213 * OR the ssl connection failed for some other reason.
214 * Allowing for the 1st case, the authentication popup is displayed just once
215 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
216 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
217 * it takes a long time for the SSLHandshakeException to be thrown.
218 * (c) if the connection is refused, then a ConnectException is thrown.
219 * In that case, it's
220 * EITHER because the host and port values that were entered are wrong (and
221 * the authentication popup dialog is redisplayed just once more allowing
222 * the user to correct host/port values)
223 * OR the entered host and part were right but the fedora server at this
224 * host and port is not running.
225 * On the second consecutive attempt where a ConnectionException is thrown,
226 * it's no longer processed but rethrown, as there's no use in redisplaying
227 * the authentication popup when the problem is not an authentication issue.
228 * (d) Another IOException (other than the SSLHandshakeException of (b))
229 * occurs when there is indeed a server listening at the host and port
230 * entered, but it's not a Fedora server, because it is unable to process
231 * Fedora requests. If the expected message is found in the exception, than
232 * the authentication popup is displayed. However, other causes for an
233 * IOException are not handled. In such cases, the IOException is rethrown.
234 * (Note that IOException is not in the throws clause - other causes for
235 * it being unknown, it can be be considered as the more generic Exception.
236 */
237 public FedoraConnection()
238 throws ParserConfigurationException, MalformedURLException,
239 CancelledException, ConnectException, RemoteException,
240 SSLHandshakeException, Exception
241 {
242 Properties properties = new Properties();
243 // loop to display fedora server authentication popup to
244 // get user input
245 setInitialisationProperties(properties);
246 properties = null; // finished
247 }
248
249 /** Single argument constructor that takes the name of the properties file
250 * defining the values of the initialisation parameters required to
251 * instantiate a FedoraConnection. These are fedora server username, password,
252 * host and port. If these values are not present in the file, they are set
253 * to "" before showing the initialisation input dialog.
254 * @param propertyFile is the name of the properties file specifying the
255 * values for Fedora server username, password, host and port. */
256 public FedoraConnection(File propertyFile)
257 throws ParserConfigurationException, MalformedURLException,
258 CancelledException, ConnectException, RemoteException,
259 SSLHandshakeException, Exception
260 {
261 Properties properties = new Properties();
262 // Load the properties from the given file
263 try{
264 if(propertyFile.exists()) {
265 properties.load(new FileInputStream(propertyFile));
266 }
267 } catch(Exception e) {
268 // If the file didn't exist or could not be located,
269 // then we just continue by creating empty properties
270 LOG.warn("Exception loading from propertyFile "
271 + propertyFile + ": " + e);
272 }
273
274 // Go through the process of showing the initialisation dialog
275 setInitialisationProperties(properties);
276
277 // Now let's save whatever values the user may have entered into the
278 // input dialog as the default values for next time the dialog shows
279 try {
280 java.io.FileOutputStream out = new java.io.FileOutputStream(
281 propertyFile); // same file as properties loading file
282 // First make sure errormessage gets stored as "" and doesn't
283 // cause problems next time.
284 properties.setProperty("errormessage", "");
285 // Don't save passwords
286 properties.setProperty("password", "");
287 // If the portAddressSuffix is in the file already, then it's
288 // user-specified and we shouldn't change it. But if there is no
289 // such property in the file, then create it and write it to the file
290 // with an empty string value:
291 String portSuffix = properties.getProperty("port.address.suffix");
292 if(portSuffix == null) {
293 properties.setProperty("port.address.suffix", "");
294 }
295
296 properties.store(out, "fedoraGS3 properties"); // write properties
297 // Javadoc states that "The output stream remains open after this
298 // method (Properties.store) returns." So we close it here
299 out.close();
300 } catch(Exception e) {
301 LOG.warn("Exception writing to propertyFile "
302 + propertyFile + ": " + e);
303 }
304 properties = null; // finished
305 }
306
307 /** Method that loops to display the dialog that retrieves the
308 * fedora server initialisation properties from the user. If there
309 * is a property file with values set already, it will display
310 * the previously entered values by loading them from that file.
311 * Otherwise, input fields in the dialog are empty.
312 * @param properties the Properties Hashmap storing values for
313 * username, password, host and port (and any errormessage). */
314 protected void setInitialisationProperties(Properties properties)
315 throws ParserConfigurationException, MalformedURLException,
316 CancelledException, ConnectException, RemoteException,
317 SSLHandshakeException, Exception
318 {
319 // keep looping to display authentication popup, until valid values are
320 // entered (except when a ConnectionRefused Exception is caught - this
321 // needs to be rethrown):
322 boolean authenticated = true;
323 // reset any error messages that may have been stored (should not be
324 // the case, but if there had been any difficulty during storing, it
325 // may not have written out an empty errorMessage)
326 properties.setProperty("errormessage", "");
327 do{
328 // show the Authentication-popup:
329 // By passing the HashMap Properties, user-updated values will
330 // be persistent in the authentication-popup fields (rather than
331 // reset to the default initial values).
332 properties = showAuthenticationPopup(properties);
333 String fedoraServerUsername = properties.getProperty("username", "");
334 String fedoraServerPassword = properties.getProperty("password", "");
335 String host = properties.getProperty("host", "");
336 String port = properties.getProperty("port", "");
337 //String protocol = host.startsWith("http") ? "" : "http://";
338 String protocol = "http://";
339 if(host.startsWith("http") || host.startsWith("https"))
340 protocol = "";
341 // NOTE THAT: if a fedora server at https:// is not accessible,
342 // it takes a long time for the authentication popup to reappear.
343
344 try{
345 this.portAddressSuffix
346 = properties.getProperty("port.address.suffix", "");
347 // Use the FedoraClient utility to get the SOAP stub for APIA.
348 // This SOAP stub enables the client to connect to a Fedora
349 // repository via the API-A web service interface.
350 init(protocol, host, port,
351 fedoraServerUsername, fedoraServerPassword);
352 // will throw Exception if it can't instantiate APIA
353
354 // if no exception thrown in the initialisation statement above,
355 // then we have been authenticated:
356 authenticated = true;
357 } catch(AuthenticationFailedException afe) {
358 authenticated = false;
359 properties.setProperty("errormessage", afe.getMessage());
360 } catch(RemoteException e) { // causes could be various
361 String reason = e.getMessage();
362 if(e.getCause() != null) {
363 // For instance, if a ConnectException indicating
364 // 'Connection Refused' or a java.net.UnknownHostException
365 // caused the RemoteException
366
367 // Strip out prefix "Nested exception is..." from the
368 // encapsulating Exception's message, by using the Cause's
369 // message. Keep Exception classname to give it some context:
370 reason = e.getCause().getClass().getName() + ": "
371 + e.getCause().getMessage();
372 // Give some more information if the connection was refused.
373 // (This can also happen when the Fedora server is not running)
374 if(e.getCause().getClass().equals(ConnectException.class)) {
375 reason += FedoraGS3Exception.connectionRefusedMessage;
376 }
377 }
378 // if the message indicates that a server was running there,
379 // then we tell the user it was not a Fedora server
380 if(reason.toLowerCase().contains("404")
381 || reason.toLowerCase().contains("request failed"))
382 {
383 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
384 }
385 authenticated = false;
386 properties.setProperty("errormessage", reason);
387 } catch(ConnectException e) {
388 properties.setProperty("errormessage",
389 FedoraGS3Exception.connectionRefusedMessage);
390 authenticated = false;
391 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
392 // be handled before IOException, as it's an IOException subclass.
393 authenticated = false;
394 properties.setProperty("errormessage",
395 FedoraGS3Exception.sslHandshakeExceptionMessage);
396 // we won't prefix the host with http for the user, as https
397 // might be right after all, and something else might have gone
398 // during the connection attempt instead.
399 //host = host.replace("https", "http"); //setting it for them
400 //properties.setProperty("host", host);
401 } catch(IOException ioe) { // occurs when we try to connect to a
402 // host/port where some server other than Fedora's is listening
403 // (e.g. if we end up connecting to GS3's host and port).
404 // In that case, we can get exception messages like a 404:
405 // "Unable to instantiate FedoraConnection
406 // java.io.IOException: Request failed [404 /fedora/describe]"
407 // Test this by trying to connect to localhost at 9090 where GS3 is
408 String exceptMsg = ioe.getMessage().toLowerCase();
409 if(exceptMsg.indexOf("request failed") != -1
410 || exceptMsg.indexOf("404") != -1)
411 {
412 properties.setProperty("errormessage",
413 NotAFedoraServerException.MESSAGE
414 + "\n(" + ioe.getMessage() + ")");
415 } else if(exceptMsg.indexOf("401") != -1
416 || exceptMsg.indexOf("500") != -1)
417 {
418 authenticated = false;
419 properties.setProperty("errormessage", ioe.getMessage());
420 } else { // the exception occurred for some other reason, rethrow it
421 throw ioe;
422 }
423 }
424 } while(!authenticated); // will keep showing popup until auhentication
425 // and connection input values are valid
426 }
427
428 /**
429 * Static method that displays a popup to allow the user to provide Fedora
430 * authentication (username, pwd) and connection (protocol+host, port) details.
431 * @param properties is a Properties HashMap where the property Keys which must
432 * have been put in here in advance (even with "" Values if appropriate) are:
433 * <pre>
434 * - username
435 * - password
436 * - host (may - but need not - be prefixed with either of the protocols
437 * "http://" and "https://"
438 * - port
439 * - errorMessage (displayed near the top of the popup dialog). Can be "".
440 * </pre>
441 * The values stored in the properties HashMap for the above property are
442 * initially displayed in the fields and the user can overwrite them.
443 * This is useful in such cases where invalid values were entered and this
444 * popup must be redisplayed to allow the user to correct their previous input.
445 * @return the same HashMap Properties which was passed as parameter. */
446 protected static Properties showAuthenticationPopup(Properties properties)
447 throws CancelledException
448 {
449 // Retrieve all the properties -- defaults to "" if any are null
450 JTextField usernameField = new JTextField(
451 properties.getProperty("username", "fedoraAdmin"));
452 JTextField passwordField = new JPasswordField(
453 properties.getProperty("password", ""));
454 JTextField hostField = new JTextField(
455 properties.getProperty("host", "localhost"));
456 JTextField portField = new JTextField(
457 properties.getProperty("port", "8080"));
458
459 JPanel panel = new JPanel(new GridLayout(4,2));
460 panel.add(new JLabel("User Name"));
461 panel.add(usernameField);
462 panel.add(new JLabel("Password"));
463 panel.add(passwordField);
464 panel.add(new JLabel("Host"));
465 panel.add(hostField);
466 panel.add(new JLabel("Port"));
467 panel.add(portField);
468
469 String heading = "Fedora Server Admin Authentication:";
470 String errorMessage = properties.getProperty("errormessage", "");
471 if(!errorMessage.equals("")) {
472 heading = "=> " + errorMessage + "\n\n" + heading;
473 }
474 int option = JOptionPane.showConfirmDialog(null, new Object[] {
475 heading, panel},
476 "Enter Network Password",
477 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
478
479 if (option == JOptionPane.OK_OPTION) {
480 String fedoraServerUsername = usernameField.getText();
481 String fedoraServerPassword = passwordField.getText();
482 String host = hostField.getText();
483 String port = portField.getText();
484 properties.setProperty("username", fedoraServerUsername);
485 properties.setProperty("password", fedoraServerPassword);
486 properties.setProperty("host", host);
487 properties.setProperty("port", port);
488 } else { // Cancel option
489 throw new CancelledException();
490 }
491 return properties;
492 }
493
494 /** Init method that is called by the constructor to set some
495 * important member variables including instantiating the APIA object
496 * used to invoke the Fedora APIA web service operations.
497 * @param protocol can be http or https
498 * @param host is the name of the Fedora server host
499 * @param port is the port number (String form) of the Fedora server
500 * @param fedoraServerUsername is the user name to access the Fedora
501 * Server
502 * @param fedoraServerPassword is the password needed to access the
503 * Fedora Server
504 */
505 protected void init(String protocol, String host, String port,
506 String fedoraServerUsername, String fedoraServerPassword)
507 throws ParserConfigurationException, MalformedURLException,
508 AuthenticationFailedException, RemoteException, Exception
509 {
510 // initialise member variables
511 lang = ENGLISH;
512 maxresults = Integer.MAX_VALUE;
513 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
514 builder = factory.newDocumentBuilder();
515
516 // (protocol is "" if host already contains protocol)
517 if(!protocol.equals("") && !protocol.endsWith("://"))
518 protocol += "://";
519 // now create baseURL = protocol://host:port/fedora
520 this.baseURL = protocol + host + ":" + port + "/fedora";
521
522 // Get the FedoraAPIA handle to/stub of the Fedora web services
523 // New way of instantiating connection to Fedora is dependent on
524 // fewer files of FedoraClient.jar
525 FedoraAPIAServiceLocator serviceLocator
526 = new FedoraAPIAServiceLocator(fedoraServerUsername,
527 fedoraServerPassword);
528
529 APIA = null;
530 boolean isUserSpecifiedPortAddressSuffix = false;
531 // try any portAddressSuffix specified by the user
532 if(!this.portAddressSuffix.equals("")) {
533 isUserSpecifiedPortAddressSuffix = true;
534 this.createAPIA(serviceLocator, this.portAddressSuffix,
535 "user-specified", isUserSpecifiedPortAddressSuffix);
536 }
537
538 // If the user-specified portAddressSuffix failed or if there was none
539 // given, then APIA will be null, so we will try with the default
540 // portAddressSuffix. This time all exceptions will be passed on.
541 if(APIA == null) {
542 isUserSpecifiedPortAddressSuffix = false;
543 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
544 "default", isUserSpecifiedPortAddressSuffix);
545 }
546
547 }
548
549 /** Tries to create the FedoraAPIA instance using the serviceLocator
550 * and the given portSuffix. The APIA instance is obtained for the
551 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
552 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
553 * Remote Exception from AXIS that it can't find the target service to
554 * invoke is ignored so that the caller can retry with the default port-
555 * address suffix first before giving up. */
556 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
557 String portSuffix, String messageInsert,
558 boolean isUserSpecifiedPortAddressSuffix)
559 throws Exception
560 {
561 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
562 // this.portAddressSuffix : defaultPortAddressSuffix;
563
564 try {
565 LOG.debug( "Trying to connect to Fedora using the given"
566 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
567 + baseURL + portSuffix);
568 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
569 new java.net.URL(baseURL+portSuffix));
570 // let's test whether we're authenticated (otherwise a
571 // RemoteException will be thrown to indicate that the
572 // password was incorrect.)
573 RepositoryInfo repositoryInfo = APIA.describeRepository();
574 // throws RemoteException if pwd wrong or for other reasons
575 // in which case describeRepository() service is unavailable
576 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
577 // If we come all the way here, no exceptions were thrown:
578 this.portAddressSuffix = portSuffix; // store the one currently in use
579 } catch(RemoteException re) {
580 // if we're here, then APIA was unable to call the web service
581 // If this was because the fedora authentication failed, then
582 // let's throw a custom exception
583 String message = re.getMessage().toLowerCase();
584 // Looking for something Unauthorized(401)
585 if(message.indexOf("unauthorized") != -1
586 || message.indexOf("401") != -1)
587 {
588 throw new AuthenticationFailedException();
589 } else if(isUserSpecifiedPortAddressSuffix
590 && re.getMessage().contains(
591 FedoraGS3Exception.missingTargetService))
592 {
593 LOG.warn("Failed to connect to Fedora APIA services at given"
594 + " port address:\n" + portSuffix
595 + "\nException: " + re.getMessage());
596 // APIA.describeRepository can throw a remote exception
597 // whereby AXIS says the target service is missing and can't
598 // be invoked (FedoraGS3Exception.missingTargetService)
599 // Don't rethrow this, if AXIS can't find the user-specified
600 // portAddressSuffix, we will try with the default suffix next
601 APIA = null;
602 } else { // if trying default portAddressSuffix or if any other
603 // RemoteException was generated (whose cause is something
604 // other than an authentication failure) rethrow it.
605 throw re;
606 }
607 } catch(Exception e) { // Other Exceptions
608 // Could possibly be a ServiceException when using ServiceLocator
609 if(isUserSpecifiedPortAddressSuffix) {
610 APIA = null; // we won't throw other exceptions yet until
611 // we have tried the default PortAddressSuffix for the baseURL
612 } else {
613 throw new FedoraGS3InitFailureException(e);
614 }
615 }
616 }
617
618 /** Gets all greenstone collections. Searches for greenstone:*-collection.
619 * Method getCollections() defaults to getting only those objects in fedora's
620 * repository whose pids are of the format greenstone:*-collection.
621 * The use of AutoFinder and findObjects is shown in
622 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
623 * The Fedora-APIA's method definition of findObjects is:
624 * <pre>
625 * fedora-types:FieldSearchResult findObjects(
626 * fedora-types:ArrayOfString resultFields,
627 * xsd:nonNegativeInteger maxResults,
628 * fedora-types:FieldSearchQuery query )
629 * </pre>
630 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
631 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
632 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
633 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html>Type definition of 2.2.1 FieldSearchQuery</a>
634 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
635 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
636 *
637 * @return an array of Strings containing the pids of all collections
638 * matching the format greenstone:*-collection.
639 */
640 public String[] getCollections() throws RemoteException
641 {
642 // Available constructors:
643 // FieldSearchQuery(java.util.List conditions)
644 // FieldSearchQuery(java.lang.String terms)
645 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
646 FieldSearchQuery query = new FieldSearchQuery();
647 query.setTerms(queryStr);
648 query.setConditions(null);
649 // we'd like pid and title returned for each object
650 // we pass maxResults=null to get all objects that match
651 // (i.e. all collections)
652 String[] pids = null;
653
654 FieldSearchResult collection = AutoFinder.findObjects(
655 APIA, new String[]{"pid", "title"}, maxresults, query);
656 ObjectFields[] results = collection.getResultList();
657 pids = new String[results.length];
658 for(int i = 0; i < results.length; i++) {
659 pids[i] = results[i].getPid();
660 }
661 return pids;
662 }
663
664 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
665 * top-level documents or document sections - have a DC datastream. This
666 * method returns the content (XML) of the DC datastream as it is stored in
667 * fedora's repository.
668 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
669 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
670 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
671 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
672 * The method signature is:
673 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
674 * where dsID = itemID (look at datastreams page of running fedora instance)
675 * To access the XML content of the MIMETypedObject returned, we use its method
676 * bytes[] getStream(), but when instantiating a String from this, we have to
677 * use the String() contructor where we can specify the charset encoding (in
678 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
679 * @return a String version of the XML in the DC datastream for the fedora
680 * object denoted by pid.
681 * @param pid - the fedora persistent identifier for an item in the fedora
682 * repository.
683 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
684 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
685 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
686 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
687 */
688 public String getDC(String pid)
689 throws RemoteException, UnsupportedEncodingException
690 {
691 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
692 // datastream ID, dsID = itemID, look at a running fedora
693 MIMETypedStream dcStream
694 = APIA.getDatastreamDissemination(pid, DC, null);
695 //asOfDateTime = null to get the current version of the dataStream
696
697 // need to set the charset encoding to UTF8
698 return new String(dcStream.getStream(), UTF8);
699 }
700
701 /** All "greenstone:*" objects in fedora (be they collections be they
702 * collections, top-level documents or document sections) have an EX
703 * datastream. This method returns the content (XML) of the EX datastream as
704 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
705 * @return a String version of the XML in the EX datastream for the fedora
706 * object denoted by pid.
707 * @param pid - the fedora persistent identifier for an item in the fedora
708 * repository.
709 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
710 * asOfDateTime).
711 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
712 * @see String getDC(String pid) throws Exception
713 * */
714 public String getEX(String pid)
715 throws RemoteException, UnsupportedEncodingException
716 {
717 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
718 //asOfDateTime = null to get the current version of the dataStream
719
720 // need to set the charset encoding to UTF8
721 return new String(exStream.getStream(), UTF8);
722 }
723
724 /** Some "greenstone:*" top-level documents in the fedora repository (but not
725 * greenstone collections or document sections) have a DLS metadata datastream.
726 * This method returns the content (XML) of the DLS datastream as is. (It calls
727 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
728 * @return a String version of the XML in the DLS datastream for the fedora
729 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
730 * @param pid - the fedora persistent identifier for an item in the fedora
731 * repository.
732 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
733 * asOfDateTime).
734 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
735 * @see String getDC(String pid) throws Exception
736 * */
737 public String getDLS(String pid)
738 throws RemoteException, UnsupportedEncodingException
739 {
740 MIMETypedStream dlsStream = null;
741 // If there is no DLS datastream, it throws an exception (whose class
742 // fedora.server.errors.DatastreamNotFoundException can't be imported
743 // here (it's not in the client side fedora.server.* package, but on
744 // the server side package of that name):
745 try{
746 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
747 //asOfDateTime=null to get the current version of the dataStream
748 } catch(RemoteException e) {
749 //These two don't work:
750 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
751 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
752
753 if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
754 { // there is no DLS data stream for this document
755 return "";
756 }
757 else { // different problem, exception due to different cause
758 throw(e);
759 }
760 }
761 if(dlsStream == null)
762 return "";
763 // need to set the charset encoding to UTF8
764 return new String(dlsStream.getStream(), UTF8);
765 }
766
767 /** All "greenstone:*" objects in fedora (be they collections or documents)
768 * have a TOC datastream, unless they have only 1 section (SECTION1).
769 * This method returns the content (XML) of the TOC datastream as is.
770 * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
771 * @return a String version of the XML in the TOC datastream for the fedora
772 * object denoted by pid.
773 * @param pid - the fedora persistent identifier for an item in the fedora
774 * repository.
775 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
776 * asOfDateTime)
777 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
778 * @see String getDC(String pid) throws Exception
779 * */
780 public String getTOC(String pid)
781 throws RemoteException, UnsupportedEncodingException
782 {
783 try {
784 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
785 //asOfDateTime = null to get the current version of the dataStream
786 // need to set the charset encoding to UTF8
787 return new String(tocStream.getStream(), UTF8);
788 } catch(RemoteException re) {
789 // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
790 return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
791 }
792 }
793
794 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
795 * for the collections indicated by collPIDs.
796 * @param collPIDs - an array of Strings denoting the pids for greenstone
797 * collections stored in the fedora repositoryl. These should be of the
798 * format "greenstone:&lt;collectionName&gt;-collection". */
799 public String[] getCollectionNames(String[] collPIDs) {
800 String[] collNames = new String[collPIDs.length];
801 for(int i = 0; i < collPIDs.length; i++)
802 collNames[i] = getCollectionName(collPIDs[i]);
803 return collNames;
804 }
805
806 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
807 * in the parameter collNames.
808 * @param collNames - a list of names of greenstone collections
809 * stored in the fedora repository. */
810 public String[] getCollectionPIDs(String[] collNames) {
811 String[] collPIDs = new String[collNames.length];
812 for(int i = 0; i < collNames.length; i++)
813 collPIDs[i] = getCollectionName(collNames[i]);
814 return collPIDs;
815 }
816
817 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
818 * denoted by parameter collName.
819 * @param collName - the name of a greenstone collection stored
820 * stored in the fedora repository. */
821 public String getCollectionPID(String collName) {
822 return GREENSTONE_+collName+_COLLECTION;
823 }
824
825 /**
826 * Gets the title of the collection denoted by the given collection's pid by
827 * retrieving the title metadata for it from the collection's EX datastream.
828 * @return the title (in the default language, else English, else the
829 * first title found) for the particular collection denoted by its PID.
830 * @param collPID is the pid of a greenstone collection in the fedora
831 * repository. */
832 public String getCollectionTitle(String collPID)
833 throws RemoteException, UnsupportedEncodingException,
834 SAXException, IOException
835 {
836 String title = null; // has to be null initially, we do a check on it
837 // Parse the EX datastream (XML), and in its DOM, find the
838 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
839 // There might be one OR several of those with attribute
840 // name="collectionname". If there's only one, then get that.
841 // If there are several, there would possibly a be qualifier attribute,
842 // in which case get qualifier=lang (where lang is the member variable)
843 // If there is no qualifier with the requested language, then get the
844 // english one which is likely to be there, else return the title for
845 // the first collectionname .
846
847 MIMETypedStream exdata
848 = APIA.getDatastreamDissemination(collPID, EX, null);
849 String exStream = new String(exdata.getStream(), UTF8);
850
851 InputSource source = new InputSource(new StringReader(exStream));
852 Document doc = builder.parse(source);
853 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
854 NodeList children = docEl.getChildNodes();
855
856 String firstName = "";
857 String englishName = "";
858 for(int i = 0; i < children.getLength(); i++ ) {
859 Node n = children.item(i);
860 if(n.getNodeType() == Node.ELEMENT_NODE) {
861 Element e = (Element)n;
862 if(e.hasAttribute(NAME)
863 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
864 firstName = FedoraCommons.getValue(e);
865 if(!e.hasAttribute(QUALIFIER)) {
866 title = FedoraCommons.getValue(e);
867 break;
868 }
869 else if(e.getAttribute(QUALIFIER).equals(lang)) {
870 title = FedoraCommons.getValue(e);
871 break;
872 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
873 englishName = FedoraCommons.getValue(e);
874 }
875 }
876 }
877 }
878
879 // if the title is still not set to that of the requested language,
880 // then try setting it to the collection name in English. If English
881 // isn't available, then set it to the first collection name provided
882 // (in whichever language).
883 if(title == null) {
884 title = englishName.equals("") ? firstName : englishName;
885 }
886 doc = null;
887 return title;
888 }
889
890 /** @return the collection titles for all the collections indicated by
891 * collPIDs.
892 * @param collPIDs - a list of pids identifying greenstone collections
893 * stored in the fedora repository. */
894 public String[] getCollectionTitles(String[] collPIDs)
895 throws RemoteException, UnsupportedEncodingException,
896 SAXException, IOException
897 {
898 String[] titles = new String[collPIDs.length];
899
900 // parse each EX datastream (XML) which contains the gs3-extracted meta.
901 for(int i = 0; i < collPIDs.length; i++) {
902 titles[i] = getCollectionTitle(collPIDs[i]);
903 }
904 return titles;
905 }
906
907 /** @return the title metadata for the given doc objects of a collection.
908 * These titles are returned in the same order as the given docIDs.
909 * (The docPIDs already contain the collection name anyway.)
910 * @param docPIDs - a list of pids identifying documents stored in the
911 * fedora repository. */
912 public String[] getDocTitles(String[] docPIDs)
913 throws RemoteException, UnsupportedEncodingException,
914 SAXException, IOException
915 {
916 String[] titles = new String[docPIDs.length];
917 for(int i = 0; i < docPIDs.length; i++) {
918 titles[i] = getDocTitle(docPIDs[i]);
919 }
920 return titles;
921 }
922
923 /** Gets the title metadata for a particular doc object in a collection
924 * denoted by docPID. The docPID already contains the collection name.
925 * @return the title for the fedora document item denoted by docPID
926 * @param docPID is the pid of the document in the fedora repository
927 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
928 public String getDocTitle(String docPID)
929 throws RemoteException, UnsupportedEncodingException,
930 SAXException, IOException
931 {
932 // We need the extracted metadata file, and find its
933 // documentElement's child
934 // <ex:metadata name="Title">sometitle</ex:metadata>
935 // where the title we return is sometitle
936
937 String title = "";
938 MIMETypedStream exdata
939 = APIA.getDatastreamDissemination(docPID, EX, null);
940 String exStream = new String(exdata.getStream(), UTF8);
941 return getTitle(exStream);
942 }
943
944 /** Given a string representation of a document's or document section's
945 * EX datastream -- which is a greenstone extracted metadata XML file --
946 * of the form:
947 * &lt;ex&gt;
948 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
949 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
950 * ...
951 * &lt;/ex&gt;
952 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
953 * returns the value embedded in that element ('sometitle' in
954 * the example above).
955 * @return the title metadata of the document/document section whose EX
956 * datastream is passed as parameter
957 * @param exStream the EX datastream in String form of the document or
958 * document section. */
959 protected String getTitle(String exStream)
960 throws SAXException, IOException
961 {
962 String title = "";
963 InputSource source = new InputSource(new StringReader(exStream));
964 Document doc = builder.parse(source);
965 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
966 NodeList children = docEl.getChildNodes();
967
968 // Cycle through all the *element* children of <ex:ex></ex:ex>
969 // which are all of the form:
970 // <ex:metadata name="somename">somevalue</ex:metadata>
971 // Find the one where name="Title", its value is the title
972 for(int i = 0; i < children.getLength(); i++ ) {
973 Node n = children.item(i);
974 if(n.getNodeType() == Node.ELEMENT_NODE) {
975 Element e = (Element)n;
976 if(e.hasAttribute(NAME)
977 && e.getAttribute(NAME).equals(TITLE)) {
978 title = FedoraCommons.getValue(e);
979 break;
980 }
981 }
982 }
983 return title;
984 }
985
986 /** @return the title metadata for the given document sections.
987 * These titles are returned in the same order as the given docPIDs
988 * and associated sectionIDs.
989 * (The docPIDs already contain the collection name anyway.)
990 * @param docPIDs - a list of pids identifying documents stored in the
991 * fedora repository.
992 * @param sectionIDs - a list of sectionIDs identifying individual sections
993 * of documents stored in the fedora repository whose titles are requested. */
994 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
995 throws RemoteException, UnsupportedEncodingException,
996 SAXException, IOException
997 {
998 String[] titles = new String[docPIDs.length];
999 for(int i = 0; i < docPIDs.length; i++) {
1000 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1001 }
1002 return titles;
1003 }
1004
1005 /** @return the title metadata for the given document section.
1006 * (The docPID already contain the collection name anyway.)
1007 * @param docPID - a pid identifying a document in the fedora repository.
1008 * @param sectionID - the sectionID of the section of the
1009 * document whose title is requested. */
1010 public String getSectionTitle(String docPID, String sectionID)
1011 throws UnsupportedEncodingException, RemoteException,
1012 SAXException, IOException
1013 {
1014 String ex = this.getSectionEXMetadata(docPID, sectionID);
1015 return getTitle(ex);
1016 }
1017
1018 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1019 * returns the PIDs of the data objects found, with the exception of
1020 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1021 * collection PID.
1022 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1023 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1024 * the collection and not an object of the same collection) are returned.
1025 * All pids that do not map to a collection are assumed to be documents!
1026 * @return a list of the pids of all the (doc) objects in a collection.
1027 * @param colPID is the pid of the greenstone collection stored in
1028 * the fedora repository. */
1029 public String[] getCollectionDocs(String colPID)
1030 throws RemoteException
1031 {
1032 String colName = getCollectionName(colPID);
1033 //LOG.debug("colName: " + colName);
1034
1035 // Search fedora objects for pid=greenstone:<colName>-*
1036 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1037 // searches for "greenstone:"+colName+"-*";
1038 FieldSearchQuery query = new FieldSearchQuery();
1039 query.setTerms(queryStr);
1040 query.setConditions(null);
1041 String[] pids = null;
1042
1043 FieldSearchResult objects = AutoFinder.findObjects(
1044 APIA, new String[]{"pid", "title"}, maxresults, query);
1045 ObjectFields[] results = objects.getResultList();
1046
1047 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1048 // that's not a document object:
1049 pids = new String[results.length-1]; // not storing collection object
1050 int index = 0; // keeps track of docPid index
1051 for(int i = 0; i < results.length; i++) {
1052 // check it's not a collection object
1053 if(!results[i].getPid().endsWith(_COLLECTION)) {
1054 pids[index] = results[i].getPid();
1055 index++;
1056 }
1057 }
1058
1059 return pids;
1060 }
1061
1062 /** Given the pid of a document fedora data object, this method will return
1063 * all itemIDs that are part of that data object and are Sections. For further
1064 * information see interface Comparable (implemented by String), SortedSet
1065 * and TreeSet.
1066 * @return an array of itemIDs of the Sections of the document,
1067 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1068 * @param docPID is a fedora pid identifying a greenstone document object.
1069 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1070 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1071 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1072 */
1073 public String[] getSectionNames(String docPID) throws RemoteException {
1074 // DatastreamDef[] listDatastreams(
1075 // java.lang.String pid, java.lang.String asOfDateTime)
1076
1077 // listDatastreams returns information on each item (including itemID=dsID)
1078 // in the document object indicated by docPID
1079
1080 // Need to give an object version number, because null for asOfDateTime
1081 // does not return any datastreams!
1082 String[] times = APIA.getObjectHistory(docPID);
1083
1084 DatastreamDef[] datastreams = APIA.listDatastreams(
1085 docPID, times[times.length-1]);
1086
1087 // TreeSet is a SortedSet. We're going to put Strings into it,
1088 // and Strings implement interface Comparable already.
1089 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1090 for(int i = 0; i < datastreams.length; i++) {
1091 String itemID = datastreams[i].getID();
1092 if (itemID.startsWith("SECTION"))
1093 orderedList.add(itemID);
1094 }
1095
1096 String[] sectionNames = new String[orderedList.size()];
1097 orderedList.toArray(sectionNames);
1098 orderedList = null;
1099 return sectionNames;
1100 }
1101
1102 /** Given the pid of a document fedora data object, this method will return all
1103 * itemIDs that are part of that data object and are Sections, but just the
1104 * Section numbers are returned. For further information see interface Comparable
1105 * (implemented by String), SortedSet and TreeSet.
1106 * @return an array of itemIDs of the Section numbers of the document
1107 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1108 * @param docPID is a fedora pid identifying a greenstone document object.
1109 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1110 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1111 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1112 */
1113 public String[] getSectionNumbers(String docPID) throws RemoteException {
1114 String[] times = APIA.getObjectHistory(docPID);
1115
1116 DatastreamDef[] datastreams
1117 = APIA.listDatastreams(docPID, times[times.length-1]);
1118 //Vector v = new Vector(datastreams.length);
1119 TreeSet orderedList = new TreeSet();
1120
1121 for(int i = 0; i < datastreams.length; i++) {
1122 String itemID = datastreams[i].getID();
1123 if (itemID.startsWith("SECTION")) {
1124 //int index = SECTION.length();
1125 //itemID = itemID.substring(index);
1126 itemID = removePrefix(itemID, SECTION);
1127 orderedList.add(itemID);
1128 }
1129 }
1130
1131 String[] sectionNumbers = new String[orderedList.size()];
1132 orderedList.toArray(sectionNumbers);
1133 orderedList = null;
1134
1135 return sectionNumbers;
1136 }
1137
1138 /** @return the titles for the document sections denoted by the parameters.
1139 * @param docPID is a fedora pid identifying a greenstone document object.
1140 * @param sectionIDs is a list of identifiers identifying sections in the
1141 * document denoted by docPID, whose titles need to be returned. Each
1142 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1143 * or a section number (eg. 1.5.1). */
1144 public String[] getTitles(String docPID, String[] sectionIDs)
1145 throws RemoteException, UnsupportedEncodingException,
1146 SAXException, IOException
1147 {
1148 String[] titles = new String[sectionIDs.length];
1149 for(int i = 0; i < titles.length; i++)
1150 titles[i] = getTitle(docPID, sectionIDs[i]);
1151 return titles;
1152 }
1153
1154 /** @return the title for the document section denoted by the parameters.
1155 * @param docPID is a fedora pid identifying a greenstone document object.
1156 * @param sectionID identifies the particular section in the document denoted
1157 * by docPID, whose title needs to be returned. The sectionID may be either a
1158 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1159 public String getTitle(String docPID, String sectionID)
1160 throws RemoteException, UnsupportedEncodingException,
1161 SAXException, IOException
1162 {
1163 // Compose the itemID for the EX data stream from the number in the
1164 // sectionID:
1165 String exID = removePrefix(sectionID, SECTION);
1166 exID = EX+convertToMetaNumber(exID);
1167
1168 // Retrieve the extracted metadata stream (EX, in XML) for the given
1169 // section
1170 String exStream = getItem(docPID, exID);
1171
1172 // Extract the title from the XML, look for:
1173 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1174 InputSource source = new InputSource(new StringReader(exStream));
1175 Document doc = builder.parse(source);
1176 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1177 NodeList children = docEl.getElementsByTagName(
1178 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1179 for(int i = 0; i < children.getLength(); i++) {
1180 Element e = (Element)children.item(i);
1181 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1182 return FedoraCommons.getValue(e); // extract and return the title
1183 }
1184 return ""; // if we got here, then we couldn't find a title
1185 }
1186
1187 /** @return the section's XML (as a String) as it is stored in fedora.
1188 * Works out if sectionID is a sectionName or sectionNumber.
1189 * @param docPID - a fedora pid identifying a greenstone document object.
1190 * @param sectionID - identifyies the particular section in the
1191 * document denoted by docPID, may be a section name or number. */
1192 public String getSection(String docPID, String sectionID)
1193 throws RemoteException, UnsupportedEncodingException
1194 {
1195 if(!sectionID.startsWith(SECTION)) // then it has only section number
1196 sectionID = SECTION+sectionID;
1197
1198 String sectionXML = this.getItem(docPID, sectionID);
1199 return sectionXML;
1200 }
1201
1202 /** @return the required section's DC metadata XML datastream.
1203 * @param docPID - a fedora pid identifying a greenstone document object.
1204 * @param sectionID - identifyies the particular section in the
1205 * document denoted by docPID, may be a section name or number. */
1206 public String getSectionDCMetadata(String docPID, String sectionID)
1207 throws RemoteException, UnsupportedEncodingException
1208 {
1209 String dcID = removePrefix(sectionID, SECTION);
1210 // ensure we have just the section number
1211 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1212
1213 // now get the DC datastream for that number
1214 String dcXML = this.getItem(docPID, dcID);
1215 return dcXML;
1216 }
1217
1218 /** Returns the section EX metadata XML datastream for SectionID which may be
1219 * a section name or number. Currently a few EX files are named awkwardly:
1220 * the EX file for section 1.* is actually associated with datastream EX.*.
1221 * But subsequent EX datastreams are named appropriately: for instance,
1222 * EX2.1.1 matches with section 2.1.1
1223 * @return the required section's EX metadata XML datastream.
1224 * @param docPID - a fedora pid identifying a greenstone document object.
1225 * @param sectionID - identifyies the particular section in the
1226 * document denoted by docPID, may be a section name or number. */
1227 public String getSectionEXMetadata(String docPID, String sectionID)
1228 throws RemoteException, UnsupportedEncodingException
1229 {
1230 String exID = removePrefix(sectionID, SECTION);
1231 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1232
1233 // now get the EX datastream for that for number
1234 String exXML = this.getItem(docPID, exID);
1235 return exXML;
1236 }
1237
1238 /** @return the XML content of the TOC of just that portion of the TOC which
1239 * contains the section denoted by sectionID and its direct child subsections.
1240 * The children are returned in the order they are encountered, which
1241 * happens to be in the required order of ascending sectionID.
1242 * @param docPID - a fedora pid identifying a greenstone document object.
1243 * @param sectionID - identifyies the particular section in the
1244 * document denoted by docPID, may be a section name or number. */
1245 public Element getChildrenOfSectionXML(String docPID, String sectionID)
1246 throws RemoteException, UnsupportedEncodingException,
1247 SAXException, IOException
1248 {
1249 // Store just the number
1250 String sectionNumber = removePrefix(sectionID, SECTION);
1251 // get the TOC XML datastream as a String
1252 String xmlTOC = getTOC(docPID);
1253
1254 // convert it into a DOM document
1255 InputSource source = new InputSource(new StringReader(xmlTOC));
1256 Document doc = builder.parse(source);
1257 // toplevel element docEl = <Section id="1"></Section>
1258 Element docEl = doc.getDocumentElement();
1259
1260 // check whether we're requested to return the toplevel element itself
1261 if(sectionID.equals("") || // subSection of entire docPID is requested
1262 (docEl.hasAttribute(ID) && docEl.getAttribute(ID).equals(sectionNumber)))
1263 return getSubstructure(docEl, false);
1264
1265 // Otherwise, get all <Section> elements and find the
1266 // <Section id="sectionNumber"></Section> and return that and its
1267 // children
1268 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1269 for(int i = 0; i < sections.getLength(); i++) {
1270 Element e = (Element)sections.item(i);
1271 if(e.hasAttribute(ID)
1272 && e.getAttribute(ID).equals(sectionNumber))
1273 {
1274 //System.err.println("Found: " + e.getAttribute(ID));
1275 return getSubstructure(e, false); // false: get just e and children
1276 }
1277 }
1278 return null; // not found
1279 }
1280
1281 /** @return a string representing the XML content of the TOC of just
1282 * that portion of the TOC which contains the section denoted by sectionID
1283 * and its direct child subsections.
1284 * The children are returned in the order they are encountered, which
1285 * happens to be in the required order of ascending sectionID.
1286 * @param docPID - a fedora pid identifying a greenstone document object.
1287 * @param sectionID - identifyies the particular section in the
1288 * document denoted by docPID, may be a section name or number. */
1289 public String getChildrenOfSection(String docPID, String sectionID)
1290 throws RemoteException, UnsupportedEncodingException,
1291 SAXException, IOException, TransformerException
1292 {
1293 Element children = getChildrenOfSectionXML(docPID, sectionID);
1294 return (children == null) ? "" : FedoraCommons.elementToString(children);
1295 }
1296
1297 /** @return the part of the TOC XML file (which outlines doc structure)
1298 * relating to the given section. This includes the section denoted by
1299 * sectionID as well as all descendent subsections thereof.
1300 * @param docPID - a fedora pid identifying a greenstone document object.
1301 * @param sectionID - identifyies the particular section in the
1302 * document denoted by docPID, may be a section name or number. */
1303 public Element getSubsectionXML(String docPID, String sectionID)
1304 throws RemoteException, UnsupportedEncodingException,
1305 SAXException, IOException
1306 {
1307 // get the TableOfContents (TOC) XML datastream as a String
1308 String xmlTOC = getTOC(docPID);
1309
1310 // convert it into a DOM document
1311 InputSource source = new InputSource(new StringReader(xmlTOC));
1312 Document doc = builder.parse(source);
1313 // toplevel element docEl = <Section id="1"></Section>
1314 Element docEl = doc.getDocumentElement();
1315
1316 if(sectionID.equals("")) // subSection of entire docPID is requested
1317 return docEl;
1318
1319 // Store just the number
1320 String sectionNumber = removePrefix(sectionID, SECTION);
1321 // Check whether we're requested to return the toplevel element itself
1322 // If sectionNumber=1, then the top-level element/document element
1323 // of the TOC XML is requested, so return the TOC as is.
1324 if(sectionNumber.equals("1")) {
1325 return docEl;
1326 }
1327
1328 // Get all <Section> elements and find the
1329 // <Section id="sectionNumber"></Section> and return that
1330 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1331 for(int i = 0; i < sections.getLength(); i++) {
1332 Element e = (Element)sections.item(i);
1333 if(e.hasAttribute(ID)
1334 && e.getAttribute(ID).equals(sectionNumber)) {
1335 //System.err.println("Found: " + e.getAttribute(ID));
1336 return getSubstructure(e, true); // true:get all descendents
1337 }
1338 }
1339 return null; // not found
1340 }
1341
1342 /** @return a String representation of the part of the TOC XML file
1343 * (which outlines doc structure) relating to the given section. This
1344 * includes the section denoted by sectionID as well as all descendent
1345 * subsections thereof.
1346 * @param docPID a fedora pid identifying a greenstone document object.
1347 * @param sectionID identifyies the particular section in the
1348 * document denoted by docPID, may be a section name or number. */
1349 public String getSubsection(String docPID, String sectionID)
1350 throws RemoteException, UnsupportedEncodingException, SAXException,
1351 IOException, TransformerException
1352 {
1353 // Store just the number
1354 String sectionNumber = removePrefix(sectionID, SECTION);
1355 // get the TableOfContents (TOC) XML datastream as a String
1356 String xmlTOC = getTOC(docPID);
1357
1358 // Check whether we're requested to return the toplevel element itself
1359 // If sectionNumber=1, then the top-level element/document element
1360 // of the TOC XML is requested, so return the TOC as is.
1361 if(sectionNumber.equals("1"))
1362 return xmlTOC;
1363
1364 // else
1365 Element subsection = getSubsectionXML(docPID, sectionID);
1366 return (subsection == null) ? "" : FedoraCommons.elementToString(subsection);
1367 }
1368
1369 /** Implements browsing document titles of a greenstone collection stored in
1370 * the fedora repository by letter.
1371 * @return the document pids whose titles start with the given letter.
1372 * @param letter - the starting letter to browse by.
1373 */
1374 public String[] browseTitlesByLetter(final String collName, final String letter)
1375 throws RemoteException, FedoraVersionNotSupportedException
1376 {
1377 String[] pids = null;
1378
1379 // We want to do the following kind of search (assuming letter=f
1380 // and collName=demo):
1381 // pid~greenstone:demo* title~f*
1382
1383 // We don't need to normalise the letter first (to search titles starting
1384 // with both uppercase and lowercase versions of the letter), because
1385 // Fedora always searches for both.
1386 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1387 // ANYWHERE in their titles!
1388 // SOLUTION: search the collection for all titles containing f as given,
1389 // retrieving pid and title fields. Then from the list of results, select
1390 // only those titles that start with the given letter.
1391 // This may seem an unnecessarily cumbersome job (when it looked like it
1392 // should have worked with just title~f*), BUT, at least the resulting
1393 // documents will be reduced to a set of titles containing f; rather than
1394 // having to search *all* documents in the collection.
1395 final String title = letter+WILDCARD;
1396
1397 FieldSearchResult objects = findObjectsWithTitlesContaining(
1398 collName, title);
1399 ObjectFields[] results = objects.getResultList();
1400 TreeSet v = new TreeSet(); // TreeSet to return the results in
1401 //alphabetical order
1402 for(int i = 0; i < results.length; i++) {
1403 // from the result list, select those titles that don't
1404 // just *contain* the letter, but actually start with it:
1405 String resultTitle = results[i].getTitle(0);
1406 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1407 String pid = results[i].getPid();
1408 // skip the collection object itself
1409 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1410 v.add(pid);
1411 //LOG.debug(resultTitle);
1412 }
1413 }
1414 }
1415 pids = new String[v.size()];
1416 v.toArray(pids);
1417 return pids;
1418 }
1419
1420 /** Implements querying document DC titles of a greenstone collection stored in
1421 * the fedora repository for a term that may occur anywhere in their titles.
1422 * @return the document pids whose DC titles contain the parameter term.
1423 * @param titleContents - the word or phrase to search the collection's
1424 * document titles for. Only one word, and this method finds Greenstone
1425 * DOCUMENT titles CONTAINING that word (if any).
1426 * @param startsWith - if true, searches for titles that start with
1427 * titleContents. Else it searches for titles that contain titleContents. */
1428 public String[] searchDocumentTitles(String collName, String titleContents,
1429 boolean startsWith)
1430 throws RemoteException, FedoraVersionNotSupportedException
1431 {
1432 String[] pids = null;
1433
1434 // We want to do the following kind of search (when written in Fedora's
1435 // REST format - see http://localhost:8080/fedora/search):
1436 // pid~greenstone:<colname>* title~<1st word of titleContents>
1437
1438 // We don't need to normalise the word first (to search titles starting
1439 // with both uppercase and lowercase versions of it), because
1440 // Fedora always searches for the normalised word.
1441
1442 // 2 difficulties:
1443 // - We can only search for single words with Fedora's Conditional Search.
1444 // Obtain pids and titles of documents containing the first word and then
1445 // we filter the titles to those containing the entire phrase of
1446 // titleContents.
1447 // - Searching for title~FirstWord returns all documents containing
1448 // this word ANYWHERE in their titles. If parameter startsWith is false,
1449 // then this is fine. But if parameter startsWith is true, then go
1450 // through all the resulting titles found (containing FirstWord), select
1451 // only pids of those titles that contain the entire phrase titleContents
1452
1453 final String pid = GREENSTONE_+collName+WILDCARD;
1454
1455 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1456 // if titleContents is a phrase (contains space), then it's not
1457 // a single word, in which case search for just the first word
1458 String title = titleContents; // assume it's a single word
1459 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1460 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1461
1462 FieldSearchResult objects = findObjectsWithTitlesContaining(
1463 collName, title);
1464 if(objects == null) {
1465 final String[] empty = {};
1466 return empty;
1467 }
1468
1469 // Go through all the titles found and for those that match the criteria*,
1470 // store their pid. *Criteria: titles that start with OR contain the
1471 // word OR phrase of titleContents.
1472 ObjectFields[] results = objects.getResultList();
1473 Vector v = new Vector(); // return pids in the order found
1474 for(int i = 0; i < results.length; i++) {
1475 // from the result list, select those titles that don't
1476 // just *contain* the first word, but the entire phrase of
1477 // words in titleContents:
1478 String resultTitle = results[i].getTitle(0);
1479 boolean accepted = false; // accept the resultTitle found
1480
1481 String resultPID = results[i].getPid();
1482 // skip the collection object itself, since it's not a document
1483 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1484 accepted = false;
1485 }
1486 // if titleContents is a single word and we are checking
1487 // whether resultTitle contains titleContents:
1488 else if(indexOfFirstSpace == -1) { // titleContents is a single word
1489 if(!startsWith) // titles that *contain* the word titleContents
1490 accepted = true; //accept all titles found
1491 // else startWith: accept titles starting with word titleContents
1492 else if (resultTitle.toLowerCase().startsWith(
1493 titleContents.toLowerCase()))
1494 accepted = true;
1495
1496 }
1497 else { // otherwise, titleContents is a phrase of >1 word, need
1498 // to check that the result title contains the entire phrase
1499 if(startsWith && resultTitle.toLowerCase().startsWith(
1500 titleContents.toLowerCase()))
1501 accepted = true;
1502 else if(!startsWith && resultTitle.toLowerCase().contains(
1503 titleContents.toLowerCase()))
1504 accepted = true;
1505 }
1506
1507 // if the resultTitle fit the criteria, store its pid
1508 if(accepted) {
1509 v.add(resultPID);
1510 //System.out.println(resultTitle);
1511 }
1512
1513 }
1514 pids = new String[v.size()];
1515 v.toArray(pids);
1516 return pids;
1517 }
1518
1519
1520 /**
1521 * @param collName - the collection of documents we'll be searching in.
1522 * @param titleWord - the word we'll be searching the document titles for.
1523 * (Fedora's search returns all objects whose title contains that word).
1524 *
1525 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1526 * (see link):
1527 * <pre>
1528 * "There are two search methods: a search on all fields or a search on
1529 * specific fields. To search all fields the setTerms function of the
1530 * FieldSearchQuery must be used, with the paramater being the desired string.
1531 *
1532 * To search by specific fields, you must create an array of Condition
1533 * objects. Each condition consists of three parts:
1534 * the field to be searched (.setProperty()),
1535 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1536 * and the search string (.setValue())"
1537 * </pre>
1538 * We want to use the second search method above when browsing and searching,
1539 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1540 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1541 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1542 *
1543 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1544 * web services are defined. (The web.xml defines the "Servlets for REST-based
1545 * interfaces to the Fedora Repository Server").
1546 * Do a search on the word "search":
1547 * fedora.server.access.FieldSearchServlet is the class we need to look at
1548 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1549 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1550 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1551 * is what's used in the fedora client application that makes use of
1552 * the SOAP-based interface.
1553 *
1554 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1555 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1556 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1557 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1558 */
1559 protected FieldSearchResult findObjectsWithTitlesContaining(
1560 String collName, final String titleWord)
1561 throws RemoteException, FedoraVersionNotSupportedException
1562 {
1563 // Searching for pids of the form "greenstone:gs2mgdemo*";
1564 final String pid = GREENSTONE_+collName+WILDCARD;
1565
1566 Condition[] conditions = new Condition[2];
1567 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1568 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1569
1570 FieldSearchQuery query = new FieldSearchQuery();
1571 query.setConditions(conditions);
1572
1573 // We'd like pid and title returned for each object, because we'll make
1574 // use of title. We pass maxResults=null to get all objects that match
1575 // (i.e. all collections).
1576 FieldSearchResult objects = null;
1577 final String[] retrieveFields = {"pid", "title"};
1578 try {
1579 objects = AutoFinder.findObjects(
1580 APIA, retrieveFields, maxresults, query);
1581 // collection = APIA.findObjects(new String[]{"pid", "title"},
1582 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1583 } catch(RemoteException ex) {
1584 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1585 // fedoraVersion is too low, searching/browsing is not possible
1586 // (because class Condition has changed after 2.0, from 2.1.1
1587 // onwards)
1588 throw new FedoraVersionNotSupportedException(fedoraVersion);
1589 } else {
1590 LOG.error(
1591 "Remote exception when calling web service operation " +
1592 "findObject() to execute search:\n" + ex.getMessage());
1593 ex.printStackTrace();
1594 throw ex;
1595 }
1596 }
1597 return objects; // return the FieldSearchResult objects found
1598 }
1599
1600 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1601 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1602 * @param docPID - pid of a greenstone document in the fedora repository. */
1603 public String getDocName(String docPID) {
1604 return docPID.substring(docPID.indexOf('-')+1);
1605 }
1606
1607 /** @return the &lt;name&gt; in the parameter collPID
1608 * (greenstone:&lt;name&gt;-collection)
1609 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1610 * in the docPID (greenstone:&lt;name&gt;-docID).
1611 * @param collPID - pid of a greenstone collection in the fedora repository. */
1612 public String getCollectionName(String collPID) {
1613 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1614 }
1615
1616 /** Convert the given Element to a String representing the same XML.
1617 * @return an element containing a copy element e with either only its child
1618 * elements or with all its descendents (depending on whether parameter
1619 * descendents is true or false).
1620 * @param e - the element to start copying from.
1621 * @param descendents - if true, e is copied with all its descendetns into the
1622 * element that's returned. If false, only e and its direct children are copied
1623 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1624 */
1625 protected Element getSubstructure(Element e, boolean descendents)
1626 {
1627 Document doc = builder.newDocument();
1628 Node n = doc.importNode(e, descendents);
1629 // descendents=true: import/copy descendents.
1630 // Else, copy just current node e (later copy its direct children)
1631 doc.appendChild(n); // need to put the copied node into a document
1632 // else it won't have a parent doc (DOMSource can't work with it
1633 // without it having a document parent).
1634
1635 // if we are not recursively copying all descendents, then copy just
1636 // the childnodes:
1637 if(!descendents) { // then copy just the children
1638 // get e's children and copy them into the new document
1639 NodeList children = e.getChildNodes();
1640 for(int i = 0; i < children.getLength(); i++) {
1641 // create copy
1642 n = doc.importNode(children.item(i), false);
1643 // attach it to parent
1644 doc.getDocumentElement().appendChild(n);
1645
1646 // Now we need to indicate whether this new node (child) is a leaf
1647 // or not. (This is necessary for getChildrenOfSection(), else
1648 // it's hard to know if the children are leaves or have further
1649 // subsections.
1650 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1651 // we're dealing only with section children
1652
1653 // Check if the matching original had children:
1654 Element originalsChild = (Element)children.item(i);
1655 NodeList grandchildren =
1656 originalsChild.getElementsByTagName(SECTION_ELEMENT);
1657 if(grandchildren.getLength() > 0) {
1658 // original's child has children, so indicate this
1659 // in the copied child:
1660 Element child = (Element)n;
1661 child.setAttribute(TYPE, INTERNAL_NODE);
1662 }
1663 }
1664 }
1665 }
1666 return doc.getDocumentElement();
1667 }
1668
1669
1670 /**
1671 * Return a datastream of a document, given the document's id
1672 * and the item id of the datastream which is to be retrieved.
1673 * @return the XML (in String form) of the item denoted by itemID
1674 * that's part of the fedora data object denoted by docPID.
1675 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1676 * Can't retrieve images denoted by itemID using this method, only items
1677 * that are of XML format.
1678 * @param docPID - pid of a greenstone document in the fedora repository.
1679 * @param itemID - the itemID of a datastream of the fedora object
1680 * identified by docPID.
1681 */
1682 protected String getItem(String docPID, String itemID)
1683 throws RemoteException, UnsupportedEncodingException
1684 {
1685 // MIMETypedStream getDatastreamDissemination(
1686 // String pid, String dsID, asOfDateTime)
1687 MIMETypedStream datastream
1688 = APIA.getDatastreamDissemination(docPID, itemID, null);
1689 return new String(datastream.getStream(), UTF8);
1690 }
1691
1692 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1693 * returns "1.2.1".
1694 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1695 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1696 * However, the string str is returned unchanged if the prefix does not occur
1697 * at the start of str.
1698 * @return the String parameter str without the prefix.
1699 * It can be used to return the number of an itemID of a greenstone document
1700 * stored in the fedora repository without the given prefix.
1701 * @param prefix - the prefix which ought to be removed from the itemID.
1702 * @param str - the value of the itemID.
1703 */
1704 protected String removePrefix(String str, String prefix) {
1705 // do nothing in those cases where the prefix is not in param str
1706 if(!str.startsWith(prefix))
1707 return str;
1708 // otherwise:
1709 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1710 return "1" + str.substring(prefix.length());
1711 } else {
1712 return str.substring(prefix.length());
1713 }
1714 }
1715
1716 /** Given a number of the form x(.y.z), this method returns this number
1717 * as is, except when x = 1, in which case, it would return .y.z
1718 * That is, given number=3.2.1, this method would return 3.2.1
1719 * But, given number=1.2.3, this method would return .2.3.
1720 * When number=1, it is NOT a special case: "" is returned as explained.
1721 * @param number - a proper (fedora-greenstone document) section number
1722 * @return the same number as it ought to be for the associated EX, DC datastreama.
1723 */
1724 protected String convertToMetaNumber(String number) {
1725 if(number.startsWith("1.") || number.equals("1"))
1726 return number.substring(1); // remove the first char: the initial '1'
1727 else return number;
1728 }
1729
1730 /** @return fedora's baseURL. It's of the form
1731 * "http://localhost:8080/fedora" */
1732 public String getBaseURL() { return baseURL; }
1733
1734 /** @return the portAddressURL (in use) of the Fedora APIA
1735 * web service (should be the endpoint location in the APIA's
1736 * WSDL file).
1737 * It's usually of the form baseURL+"/services/access" */
1738 public String getPortAddressURL() {
1739 return this.baseURL + this.portAddressSuffix;
1740 }
1741
1742 /** @return the baseURL for gsdlAssocFiles */
1743 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1744
1745 public static void main(String args[]) {
1746 try {
1747 FedoraConnection fedoraCon
1748 = new FedoraConnection(new File("fedoraGS3.properties"));
1749
1750 String[] pids = null;
1751 pids = fedoraCon.getCollections();
1752 String[] titles = fedoraCon.getCollectionTitles(pids);
1753 for(int i = 0; i < pids.length; i++) {
1754 System.out.println("extracted title:" + titles[i]);
1755 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1756 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1757 for(int j = 0; j < docPIDs.length; j++) {
1758 System.out.println("\tExtr doc title: " + docTitles[j]);
1759 }
1760 }
1761
1762 String PID = "greenstone:gs2mgdemo-collection";
1763 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1764 String dcXML = fedoraCon.getDC(PID);
1765 String exXML = fedoraCon.getEX(PID);
1766 String tocXML = fedoraCon.getTOC(docPID);
1767 System.out.println("Dublin Core Metadata for " + PID
1768 + " is:\n" + dcXML);
1769 System.out.println("GS3 extracted metadata for " + PID
1770 + " is:\n" + exXML);
1771 System.out.println("Table of Contents for " + docPID
1772 + " is:\n" + tocXML);
1773
1774
1775 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1776 System.out.println("\nSection names for " + docPID + " are:");
1777 for(int i = 0; i < sectionNames.length; i++)
1778 System.out.println(sectionNames[i]);
1779
1780 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1781 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1782 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1783 System.out.println("\nSection numbers for " + docPID + " are:");
1784 for(int i = 0; i < sectionNumbers.length; i++) {
1785 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1786 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1787 }
1788
1789 String sectionID = "SECTION1"; //SECTION1.5
1790 System.out.println("\n");
1791 System.out.println(sectionID+ " - entire subsection:\n"
1792 + fedoraCon.getSubsection(docPID, sectionID));
1793
1794 System.out.println(sectionID + " and children:\n"
1795 + fedoraCon.getChildrenOfSection(docPID, sectionID));
1796
1797 System.out.println(
1798 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1799 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1800 for(int i = 0; i < pids.length; i++)
1801 System.out.println(pids[i]);
1802
1803 System.out.println(
1804 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1805 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1806 for(int i = 0; i < pids.length; i++)
1807 System.out.println(pids[i]);
1808
1809 System.out.println("\nDone - exiting.");
1810 System.exit(0);
1811 } catch(RemoteException re) {
1812 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1813 re.printStackTrace();
1814 } catch(Exception e) {
1815 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1816 e.printStackTrace();
1817 //LOG.error("Unable to instantiate FedoraConnection\n" + e);
1818 }
1819 }
1820}
Note: See TracBrowser for help on using the repository browser.