source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 21855

Last change on this file since 21855 was 21855, checked in by ak19, 14 years ago

Previously unused constructor forgot to initialise a variable (portAddressSuffix) as was required

File size: 81.1 KB
Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import fedora.client.utility.AutoFinder;
25import fedora.server.access.FedoraAPIAServiceLocator;
26// The object for accessing FedoraAPI-A web services:
27import fedora.server.access.FedoraAPIA;
28
29// The definitions for all complex fedora types:
30import fedora.server.types.gen.MIMETypedStream;
31import fedora.server.types.gen.RepositoryInfo;
32import fedora.server.types.gen.FieldSearchResult;
33import fedora.server.types.gen.FieldSearchQuery;
34import fedora.server.types.gen.DatastreamDef;
35import fedora.server.types.gen.ObjectFields;
36import fedora.server.types.gen.Condition;
37import fedora.server.types.gen.ComparisonOperator;
38//import fedora.server.types.gen.*;
39
40import javax.net.ssl.SSLHandshakeException;
41import java.net.ConnectException;
42import org.xml.sax.SAXException;
43import java.io.UnsupportedEncodingException;
44import java.io.IOException;
45import javax.xml.parsers.ParserConfigurationException;
46import java.net.MalformedURLException;
47import java.rmi.RemoteException;
48
49import java.io.StringReader;
50import java.io.FileInputStream;
51import java.io.File;
52import java.util.TreeSet;
53import java.util.Properties;
54import java.util.Vector;
55
56import java.awt.GridLayout;
57import javax.swing.JLabel;
58import javax.swing.JOptionPane;
59import javax.swing.JPanel;
60import javax.swing.JPasswordField;
61import javax.swing.JTextField;
62
63import org.apache.log4j.Logger;
64import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
65import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
66import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
68import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
69
70import javax.xml.parsers.DocumentBuilderFactory;
71import javax.xml.parsers.DocumentBuilder;
72import javax.xml.transform.*;
73
74import org.xml.sax.InputSource;
75import org.w3c.dom.Document;
76import org.w3c.dom.Element;
77import org.w3c.dom.NodeList;
78import org.w3c.dom.Node;
79
80/** Class that establishes a connection with Fedora's web services (via
81 * Java stub classes for the same) and then provides methods to retrieve
82 * Greenstone-specific data, such as the TOC, EX, DC,and Section
83 * datastreams of the Greenstone documents stored in Fedora's repository.
84 * These datastreams are returned as Strings without any changes being
85 * made to them.
86 * @author ak19
87*/
88public class FedoraConnection implements FedoraGS3DL {
89 /** The logging instance for this class */
90 private static final Logger LOG = Logger.getLogger(
91 FedoraConnection.class.getName());
92
93 /** The version of fedora that is supported by class FedoraConnection */
94 protected static final String SUPPORTED_VERSION = "2.2.1";
95
96 /* Some fixed strings of known literals */
97 protected static final String TYPE = "type";
98 protected static final String INTERNAL_NODE = "internalNode";
99 protected static final String GET= "/get/";
100
101 // The DemoSOAPClient declares and uses the following as a static member
102 // Probably none of the APIA methods (web service methods) remembers
103 // state, that might explain why we can use it as a static member then.
104 /** The object used to access the Fedora API-A web service methods */
105 protected static FedoraAPIA APIA;
106
107 /** Version of the running fedora server */
108 protected String fedoraVersion;
109 /** The location of the fedora server, usually of the form
110 * http://localhost:8080/fedora */
111 protected String baseURL;
112
113 /** The user-specified portAddressSuffix of the Fedora Access web services
114 * (endpoint URL in the WSDL), usually of the form
115 * http://localhost:8080/fedora/services/access
116 * Users can tell FedoraGS3 to try accessing that first by setting
117 * the "port.address.suffix" property in the properties file.
118 * FedoraGS3 itself will not write the portAddressSuffix currently used in
119 * the file for next time, but leave whatever value was entered in the
120 * properties file. The portAddress--not just suffix--currently in use (once
121 * the FedoraAPIA handle has been instantiated) can be obtained through
122 * getPortAddressURL() method. */
123 protected String portAddressSuffix;
124
125 /** The part of the portAddress that comes after the baseURL. It is usually:
126 * "/services/access" */
127 protected static final String defaultPortAddressSuffix = "/services/access";
128
129 /** The preferred language of the displat content */
130 protected String lang;
131 /** The maximum number of collections to retrieve */
132 protected int maxresults;
133 /** DocumentBuilder used to create and parse XML documents */
134 protected DocumentBuilder builder;
135
136 /** Static method that returns the version of Fedora supported by this
137 * class FedoraConnection. */
138 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
139 /** The version of the running Fedora server, which may or may not
140 * match the supported version. */
141 public String getFedoraVersion() { return fedoraVersion; }
142
143 /** @return the default language used to query for titles (and anything else
144 * where there are multiple language options). Upon initialisation, this
145 * defaults to English. */
146 public String getLanguage() { return lang; }
147
148 /** Sets the the default language used to query for titles (and anything else
149 * where there are multiple language options). If the default language for any
150 * query is not available, then English ("en") is used. If that's not available
151 * then the first other available language is used.
152 * @param lang - the two-letter language code to set the default language to.
153 */
154 public void setLanguage(String lang) { this.lang = lang; }
155
156 /** The default maximum number of search results returned for a search. Upon
157 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
158 public int getMaxResults() { return maxresults; }
159
160 /** Set the default maximum number of search results returned for a search.
161 * @param maxresults - the new default maximum number of search results to
162 * be returned. */
163 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
164
165 /** Code for this constructor is from DemoSOAPClient.java.
166 * Instantiates the APIA handle using the protocol, host, port, fedora
167 * server repository username and password.
168 * @param host - the fedora server host (may be prefixed with http:// or
169 * https:// if parameter protocol is empty). If there's no protocol, and
170 * no protocol prefixed to the host, then the protocol defaults to http.
171 * @param protocol - either http or https (or empty "")
172 * @param port - the port on which fedora is running.
173 * @param fedoraServerUsername - the administrator username required to
174 * access the fedora server's repository. ("fedoraAdmin" unless changed).
175 * @param fedoraServerPassword - the fedora server repository's
176 * administrator password. If none was set on fedora installation, this
177 * can be empty (""). */
178 public FedoraConnection(String protocol, String host, int port,
179 String fedoraServerUsername, String fedoraServerPassword)
180 throws ParserConfigurationException, MalformedURLException,
181 SSLHandshakeException, RemoteException, AuthenticationFailedException,
182 NotAFedoraServerException, ConnectException, Exception
183 {
184 try {
185 this.portAddressSuffix = "";
186 init(protocol, host, Integer.toString(port),
187 fedoraServerUsername, fedoraServerPassword);
188 } /*catch(RemoteException re) { //subclass of IOException
189 throw re;
190 } catch(SSLHandshakeException ssle) { //subclass of IOException
191 // this is also of type IOException
192 throw ssle;
193 }*/ catch(IOException ioe) { // connected to the wrong server
194 String exceptMsg = ioe.getMessage().toLowerCase();
195 if(exceptMsg.indexOf("request failed") != -1
196 || exceptMsg.indexOf("404") != -1)
197 throw new NotAFedoraServerException();
198 else // the IOException is not due the cause we thought it was, so
199 throw ioe; // rethrow whatever other IOException was caught (which
200 // could have been RemoteException or SSLHandshakeException
201 // or some other cause)
202 }
203 }
204
205 /** Default constructor which takes input from the user to get host, port,
206 * fedora username and password.
207 * It keeps looping to display authentication popup, until valid values are
208 * entered:
209 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
210 * This popup keeps appearing until the password and username are correct (as
211 * long as there's indeed a fedora server listening at the given host and port).
212 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
213 * the 'https' protocol to the host string when it should have been 'http';
214 * OR the ssl connection failed for some other reason.
215 * Allowing for the 1st case, the authentication popup is displayed just once
216 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
217 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
218 * it takes a long time for the SSLHandshakeException to be thrown.
219 * (c) if the connection is refused, then a ConnectException is thrown.
220 * In that case, it's
221 * EITHER because the host and port values that were entered are wrong (and
222 * the authentication popup dialog is redisplayed just once more allowing
223 * the user to correct host/port values)
224 * OR the entered host and part were right but the fedora server at this
225 * host and port is not running.
226 * On the second consecutive attempt where a ConnectionException is thrown,
227 * it's no longer processed but rethrown, as there's no use in redisplaying
228 * the authentication popup when the problem is not an authentication issue.
229 * (d) Another IOException (other than the SSLHandshakeException of (b))
230 * occurs when there is indeed a server listening at the host and port
231 * entered, but it's not a Fedora server, because it is unable to process
232 * Fedora requests. If the expected message is found in the exception, than
233 * the authentication popup is displayed. However, other causes for an
234 * IOException are not handled. In such cases, the IOException is rethrown.
235 * (Note that IOException is not in the throws clause - other causes for
236 * it being unknown, it can be be considered as the more generic Exception.
237 */
238 public FedoraConnection()
239 throws ParserConfigurationException, MalformedURLException,
240 CancelledException, ConnectException, RemoteException,
241 SSLHandshakeException, Exception
242 {
243 Properties properties = new Properties();
244 // loop to display fedora server authentication popup to
245 // get user input
246 setInitialisationProperties(properties);
247 properties = null; // finished
248 }
249
250 /** Single argument constructor that takes the name of the properties file
251 * defining the values of the initialisation parameters required to
252 * instantiate a FedoraConnection. These are fedora server username, password,
253 * host and port. If these values are not present in the file, they are set
254 * to "" before showing the initialisation input dialog.
255 * @param propertyFile is the name of the properties file specifying the
256 * values for Fedora server username, password, host and port. */
257 public FedoraConnection(File propertyFile)
258 throws ParserConfigurationException, MalformedURLException,
259 CancelledException, ConnectException, RemoteException,
260 SSLHandshakeException, Exception
261 {
262 Properties properties = new Properties();
263 // Load the properties from the given file
264 try{
265 if(propertyFile.exists()) {
266 properties.load(new FileInputStream(propertyFile));
267 }
268 } catch(Exception e) {
269 // If the file didn't exist or could not be located,
270 // then we just continue by creating empty properties
271 LOG.warn("Exception loading from propertyFile "
272 + propertyFile + ": " + e);
273 }
274
275 // Go through the process of showing the initialisation dialog
276 setInitialisationProperties(properties);
277
278 // Now let's save whatever values the user may have entered into the
279 // input dialog as the default values for next time the dialog shows
280 try {
281 java.io.FileOutputStream out = new java.io.FileOutputStream(
282 propertyFile); // same file as properties loading file
283 // First make sure errormessage gets stored as "" and doesn't
284 // cause problems next time.
285 properties.setProperty("errormessage", "");
286 // Don't save passwords
287 properties.setProperty("password", "");
288 // If the portAddressSuffix is in the file already, then it's
289 // user-specified and we shouldn't change it. But if there is no
290 // such property in the file, then create it and write it to the file
291 // with an empty string value:
292 String portSuffix = properties.getProperty("port.address.suffix");
293 if(portSuffix == null) {
294 properties.setProperty("port.address.suffix", "");
295 }
296
297 properties.store(out, "fedoraGS3 properties"); // write properties
298 // Javadoc states that "The output stream remains open after this
299 // method (Properties.store) returns." So we close it here
300 out.close();
301 } catch(Exception e) {
302 LOG.warn("Exception writing to propertyFile "
303 + propertyFile + ": " + e);
304 }
305 properties = null; // finished
306 }
307
308 /** Method that loops to display the dialog that retrieves the
309 * fedora server initialisation properties from the user. If there
310 * is a property file with values set already, it will display
311 * the previously entered values by loading them from that file.
312 * Otherwise, input fields in the dialog are empty.
313 * @param properties the Properties Hashmap storing values for
314 * username, password, host and port (and any errormessage). */
315 protected void setInitialisationProperties(Properties properties)
316 throws ParserConfigurationException, MalformedURLException,
317 CancelledException, ConnectException, RemoteException,
318 SSLHandshakeException, Exception
319 {
320 // keep looping to display authentication popup, until valid values are
321 // entered (except when a ConnectionRefused Exception is caught - this
322 // needs to be rethrown):
323 boolean authenticated = true;
324 // reset any error messages that may have been stored (should not be
325 // the case, but if there had been any difficulty during storing, it
326 // may not have written out an empty errorMessage)
327 properties.setProperty("errormessage", "");
328 do{
329 // show the Authentication-popup:
330 // By passing the HashMap Properties, user-updated values will
331 // be persistent in the authentication-popup fields (rather than
332 // reset to the default initial values).
333 properties = showAuthenticationPopup(properties);
334 String fedoraServerUsername = properties.getProperty("username", "");
335 String fedoraServerPassword = properties.getProperty("password", "");
336 String host = properties.getProperty("host", "");
337 String port = properties.getProperty("port", "");
338 //String protocol = host.startsWith("http") ? "" : "http://";
339 String protocol = "http://";
340 if(host.startsWith("http") || host.startsWith("https"))
341 protocol = "";
342 // NOTE THAT: if a fedora server at https:// is not accessible,
343 // it takes a long time for the authentication popup to reappear.
344
345 try{
346 this.portAddressSuffix
347 = properties.getProperty("port.address.suffix", "");
348 // Use the FedoraClient utility to get the SOAP stub for APIA.
349 // This SOAP stub enables the client to connect to a Fedora
350 // repository via the API-A web service interface.
351 init(protocol, host, port,
352 fedoraServerUsername, fedoraServerPassword);
353 // will throw Exception if it can't instantiate APIA
354
355 // if no exception thrown in the initialisation statement above,
356 // then we have been authenticated:
357 authenticated = true;
358 } catch(AuthenticationFailedException afe) {
359 authenticated = false;
360 properties.setProperty("errormessage", afe.getMessage());
361 } catch(RemoteException e) { // causes could be various
362 String reason = e.getMessage();
363 if(e.getCause() != null) {
364 // For instance, if a ConnectException indicating
365 // 'Connection Refused' or a java.net.UnknownHostException
366 // caused the RemoteException
367
368 // Strip out prefix "Nested exception is..." from the
369 // encapsulating Exception's message, by using the Cause's
370 // message. Keep Exception classname to give it some context:
371 reason = e.getCause().getClass().getName() + ": "
372 + e.getCause().getMessage();
373 // Give some more information if the connection was refused.
374 // (This can also happen when the Fedora server is not running)
375 if(e.getCause().getClass().equals(ConnectException.class)) {
376 reason += FedoraGS3Exception.connectionRefusedMessage;
377 }
378 }
379 // if the message indicates that a server was running there,
380 // then we tell the user it was not a Fedora server
381 if(reason.toLowerCase().contains("404")
382 || reason.toLowerCase().contains("request failed"))
383 {
384 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
385 }
386 authenticated = false;
387 properties.setProperty("errormessage", reason);
388 } catch(ConnectException e) {
389 properties.setProperty("errormessage",
390 FedoraGS3Exception.connectionRefusedMessage);
391 authenticated = false;
392 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
393 // be handled before IOException, as it's an IOException subclass.
394 authenticated = false;
395 properties.setProperty("errormessage",
396 FedoraGS3Exception.sslHandshakeExceptionMessage);
397 // we won't prefix the host with http for the user, as https
398 // might be right after all, and something else might have gone
399 // during the connection attempt instead.
400 //host = host.replace("https", "http"); //setting it for them
401 //properties.setProperty("host", host);
402 } catch(IOException ioe) { // occurs when we try to connect to a
403 // host/port where some server other than Fedora's is listening
404 // (e.g. if we end up connecting to GS3's host and port).
405 // In that case, we can get exception messages like a 404:
406 // "Unable to instantiate FedoraConnection
407 // java.io.IOException: Request failed [404 /fedora/describe]"
408 // Test this by trying to connect to localhost at 9090 where GS3 is
409 String exceptMsg = ioe.getMessage().toLowerCase();
410 if(exceptMsg.indexOf("request failed") != -1
411 || exceptMsg.indexOf("404") != -1)
412 {
413 properties.setProperty("errormessage",
414 NotAFedoraServerException.MESSAGE
415 + "\n(" + ioe.getMessage() + ")");
416 } else if(exceptMsg.indexOf("401") != -1
417 || exceptMsg.indexOf("500") != -1)
418 {
419 authenticated = false;
420 properties.setProperty("errormessage", ioe.getMessage());
421 } else { // the exception occurred for some other reason, rethrow it
422 throw ioe;
423 }
424 }
425 } while(!authenticated); // will keep showing popup until auhentication
426 // and connection input values are valid
427 }
428
429 /**
430 * Static method that displays a popup to allow the user to provide Fedora
431 * authentication (username, pwd) and connection (protocol+host, port) details.
432 * @param properties is a Properties HashMap where the property Keys which must
433 * have been put in here in advance (even with "" Values if appropriate) are:
434 * <pre>
435 * - username
436 * - password
437 * - host (may - but need not - be prefixed with either of the protocols
438 * "http://" and "https://"
439 * - port
440 * - errorMessage (displayed near the top of the popup dialog). Can be "".
441 * </pre>
442 * The values stored in the properties HashMap for the above property are
443 * initially displayed in the fields and the user can overwrite them.
444 * This is useful in such cases where invalid values were entered and this
445 * popup must be redisplayed to allow the user to correct their previous input.
446 * @return the same HashMap Properties which was passed as parameter. */
447 protected static Properties showAuthenticationPopup(Properties properties)
448 throws CancelledException
449 {
450 // Retrieve all the properties -- defaults to "" if any are null
451 JTextField usernameField = new JTextField(
452 properties.getProperty("username", "fedoraAdmin"));
453 JTextField passwordField = new JPasswordField(
454 properties.getProperty("password", ""));
455 JTextField hostField = new JTextField(
456 properties.getProperty("host", "localhost"));
457 JTextField portField = new JTextField(
458 properties.getProperty("port", "8080"));
459
460 JPanel panel = new JPanel(new GridLayout(4,2));
461 panel.add(new JLabel("User Name"));
462 panel.add(usernameField);
463 panel.add(new JLabel("Password"));
464 panel.add(passwordField);
465 panel.add(new JLabel("Host"));
466 panel.add(hostField);
467 panel.add(new JLabel("Port"));
468 panel.add(portField);
469
470 String heading = "Fedora Server Admin Authentication:";
471 String errorMessage = properties.getProperty("errormessage", "");
472 if(!errorMessage.equals("")) {
473 heading = "=> " + errorMessage + "\n\n" + heading;
474 }
475 int option = JOptionPane.showConfirmDialog(null, new Object[] {
476 heading, panel},
477 "Enter Network Password",
478 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
479
480 if (option == JOptionPane.OK_OPTION) {
481 String fedoraServerUsername = usernameField.getText();
482 String fedoraServerPassword = passwordField.getText();
483 String host = hostField.getText();
484 String port = portField.getText();
485 properties.setProperty("username", fedoraServerUsername);
486 properties.setProperty("password", fedoraServerPassword);
487 properties.setProperty("host", host);
488 properties.setProperty("port", port);
489 } else { // Cancel option
490 throw new CancelledException();
491 }
492 return properties;
493 }
494
495 /** Init method that is called by the constructor to set some
496 * important member variables including instantiating the APIA object
497 * used to invoke the Fedora APIA web service operations.
498 * @param protocol can be http or https
499 * @param host is the name of the Fedora server host
500 * @param port is the port number (String form) of the Fedora server
501 * @param fedoraServerUsername is the user name to access the Fedora
502 * Server
503 * @param fedoraServerPassword is the password needed to access the
504 * Fedora Server
505 */
506 protected void init(String protocol, String host, String port,
507 String fedoraServerUsername, String fedoraServerPassword)
508 throws ParserConfigurationException, MalformedURLException,
509 AuthenticationFailedException, RemoteException, Exception
510 {
511 // initialise member variables
512 lang = ENGLISH;
513 maxresults = Integer.MAX_VALUE;
514 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
515 builder = factory.newDocumentBuilder();
516
517 // (protocol is "" if host already contains protocol)
518 if(!protocol.equals("") && !protocol.endsWith("://"))
519 protocol += "://";
520 // now create baseURL = protocol://host:port/fedora
521 this.baseURL = protocol + host + ":" + port + "/fedora";
522
523 // Get the FedoraAPIA handle to/stub of the Fedora web services
524 // New way of instantiating connection to Fedora is dependent on
525 // fewer files of FedoraClient.jar
526 FedoraAPIAServiceLocator serviceLocator
527 = new FedoraAPIAServiceLocator(fedoraServerUsername,
528 fedoraServerPassword);
529
530 APIA = null;
531 boolean isUserSpecifiedPortAddressSuffix = false;
532 // try any portAddressSuffix specified by the user
533 if(!this.portAddressSuffix.equals("")) {
534 isUserSpecifiedPortAddressSuffix = true;
535 this.createAPIA(serviceLocator, this.portAddressSuffix,
536 "user-specified", isUserSpecifiedPortAddressSuffix);
537 }
538
539 // If the user-specified portAddressSuffix failed or if there was none
540 // given, then APIA will be null, so we will try with the default
541 // portAddressSuffix. This time all exceptions will be passed on.
542 if(APIA == null) {
543 isUserSpecifiedPortAddressSuffix = false;
544 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
545 "default", isUserSpecifiedPortAddressSuffix);
546 }
547
548 }
549
550 /** Tries to create the FedoraAPIA instance using the serviceLocator
551 * and the given portSuffix. The APIA instance is obtained for the
552 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
553 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
554 * Remote Exception from AXIS that it can't find the target service to
555 * invoke is ignored so that the caller can retry with the default port-
556 * address suffix first before giving up. */
557 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
558 String portSuffix, String messageInsert,
559 boolean isUserSpecifiedPortAddressSuffix)
560 throws Exception
561 {
562 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
563 // this.portAddressSuffix : defaultPortAddressSuffix;
564
565 try {
566 LOG.debug( "Trying to connect to Fedora using the given"
567 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
568 + baseURL + portSuffix);
569 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
570 new java.net.URL(baseURL+portSuffix));
571 // let's test whether we're authenticated (otherwise a
572 // RemoteException will be thrown to indicate that the
573 // password was incorrect.)
574 RepositoryInfo repositoryInfo = APIA.describeRepository();
575 // throws RemoteException if pwd wrong or for other reasons
576 // in which case describeRepository() service is unavailable
577 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
578 // If we come all the way here, no exceptions were thrown:
579 this.portAddressSuffix = portSuffix; // store the one currently in use
580 } catch(RemoteException re) {
581 // if we're here, then APIA was unable to call the web service
582 // If this was because the fedora authentication failed, then
583 // let's throw a custom exception
584 String message = re.getMessage().toLowerCase();
585 // Looking for something Unauthorized(401)
586 if(message.indexOf("unauthorized") != -1
587 || message.indexOf("401") != -1)
588 {
589 throw new AuthenticationFailedException();
590 } else if(isUserSpecifiedPortAddressSuffix
591 && re.getMessage().contains(
592 FedoraGS3Exception.missingTargetService))
593 {
594 LOG.warn("Failed to connect to Fedora APIA services at given"
595 + " port address:\n" + portSuffix
596 + "\nException: " + re.getMessage());
597 // APIA.describeRepository can throw a remote exception
598 // whereby AXIS says the target service is missing and can't
599 // be invoked (FedoraGS3Exception.missingTargetService)
600 // Don't rethrow this, if AXIS can't find the user-specified
601 // portAddressSuffix, we will try with the default suffix next
602 APIA = null;
603 } else { // if trying default portAddressSuffix or if any other
604 // RemoteException was generated (whose cause is something
605 // other than an authentication failure) rethrow it.
606 throw re;
607 }
608 } catch(Exception e) { // Other Exceptions
609 // Could possibly be a ServiceException when using ServiceLocator
610 if(isUserSpecifiedPortAddressSuffix) {
611 APIA = null; // we won't throw other exceptions yet until
612 // we have tried the default PortAddressSuffix for the baseURL
613 } else {
614 throw new FedoraGS3InitFailureException(e);
615 }
616 }
617 }
618
619 /** Gets all greenstone collections. Searches for greenstone:*-collection.
620 * Method getCollections() defaults to getting only those objects in fedora's
621 * repository whose pids are of the format greenstone:*-collection.
622 * The use of AutoFinder and findObjects is shown in
623 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
624 * The Fedora-APIA's method definition of findObjects is:
625 * <pre>
626 * fedora-types:FieldSearchResult findObjects(
627 * fedora-types:ArrayOfString resultFields,
628 * xsd:nonNegativeInteger maxResults,
629 * fedora-types:FieldSearchQuery query )
630 * </pre>
631 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
632 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
633 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
634 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html>Type definition of 2.2.1 FieldSearchQuery</a>
635 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
636 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
637 *
638 * @return an array of Strings containing the pids of all collections
639 * matching the format greenstone:*-collection.
640 */
641 public String[] getCollections() throws RemoteException
642 {
643 // Available constructors:
644 // FieldSearchQuery(java.util.List conditions)
645 // FieldSearchQuery(java.lang.String terms)
646 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
647 FieldSearchQuery query = new FieldSearchQuery();
648 query.setTerms(queryStr);
649 query.setConditions(null);
650 // we'd like pid and title returned for each object
651 // we pass maxResults=null to get all objects that match
652 // (i.e. all collections)
653 String[] pids = null;
654
655 FieldSearchResult collection = AutoFinder.findObjects(
656 APIA, new String[]{"pid", "title"}, maxresults, query);
657 ObjectFields[] results = collection.getResultList();
658 pids = new String[results.length];
659 for(int i = 0; i < results.length; i++) {
660 pids[i] = results[i].getPid();
661 }
662 return pids;
663 }
664
665 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
666 * top-level documents or document sections - have a DC datastream. This
667 * method returns the content (XML) of the DC datastream as it is stored in
668 * fedora's repository.
669 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
670 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
671 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
672 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
673 * The method signature is:
674 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
675 * where dsID = itemID (look at datastreams page of running fedora instance)
676 * To access the XML content of the MIMETypedObject returned, we use its method
677 * bytes[] getStream(), but when instantiating a String from this, we have to
678 * use the String() contructor where we can specify the charset encoding (in
679 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
680 * @return a String version of the XML in the DC datastream for the fedora
681 * object denoted by pid.
682 * @param pid - the fedora persistent identifier for an item in the fedora
683 * repository.
684 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
685 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
686 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
687 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
688 */
689 public String getDC(String pid)
690 throws RemoteException, UnsupportedEncodingException
691 {
692 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
693 // datastream ID, dsID = itemID, look at a running fedora
694 MIMETypedStream dcStream
695 = APIA.getDatastreamDissemination(pid, DC, null);
696 //asOfDateTime = null to get the current version of the dataStream
697
698 // need to set the charset encoding to UTF8
699 return new String(dcStream.getStream(), UTF8);
700 }
701
702 /** All "greenstone:*" objects in fedora (be they collections be they
703 * collections, top-level documents or document sections) have an EX
704 * datastream. This method returns the content (XML) of the EX datastream as
705 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
706 * @return a String version of the XML in the EX datastream for the fedora
707 * object denoted by pid.
708 * @param pid - the fedora persistent identifier for an item in the fedora
709 * repository.
710 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
711 * asOfDateTime).
712 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
713 * @see String getDC(String pid) throws Exception
714 * */
715 public String getEX(String pid)
716 throws RemoteException, UnsupportedEncodingException
717 {
718 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
719 //asOfDateTime = null to get the current version of the dataStream
720
721 // need to set the charset encoding to UTF8
722 return new String(exStream.getStream(), UTF8);
723 }
724
725 /** Some "greenstone:*" top-level documents in the fedora repository (but not
726 * greenstone collections or document sections) have a DLS metadata datastream.
727 * This method returns the content (XML) of the DLS datastream as is. (It calls
728 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
729 * @return a String version of the XML in the DLS datastream for the fedora
730 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
731 * @param pid - the fedora persistent identifier for an item in the fedora
732 * repository.
733 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
734 * asOfDateTime).
735 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
736 * @see String getDC(String pid) throws Exception
737 * */
738 public String getDLS(String pid)
739 throws RemoteException, UnsupportedEncodingException
740 {
741 MIMETypedStream dlsStream = null;
742 // If there is no DLS datastream, it throws an exception (whose class
743 // fedora.server.errors.DatastreamNotFoundException can't be imported
744 // here (it's not in the client side fedora.server.* package, but on
745 // the server side package of that name):
746 try{
747 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
748 //asOfDateTime=null to get the current version of the dataStream
749 } catch(RemoteException e) {
750 //These two don't work:
751 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
752 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
753
754 if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
755 { // there is no DLS data stream for this document
756 return "";
757 }
758 else { // different problem, exception due to different cause
759 throw(e);
760 }
761 }
762 if(dlsStream == null)
763 return "";
764 // need to set the charset encoding to UTF8
765 return new String(dlsStream.getStream(), UTF8);
766 }
767
768 /** All "greenstone:*" objects in fedora (be they collections or documents)
769 * have a TOC datastream, unless they have only 1 section (SECTION1).
770 * This method returns the content (XML) of the TOC datastream as is.
771 * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
772 * @return a String version of the XML in the TOC datastream for the fedora
773 * object denoted by pid.
774 * @param pid - the fedora persistent identifier for an item in the fedora
775 * repository.
776 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
777 * asOfDateTime)
778 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
779 * @see String getDC(String pid) throws Exception
780 * */
781 public String getTOC(String pid)
782 throws RemoteException, UnsupportedEncodingException
783 {
784 try {
785 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
786 //asOfDateTime = null to get the current version of the dataStream
787 // need to set the charset encoding to UTF8
788 return new String(tocStream.getStream(), UTF8);
789 } catch(RemoteException re) {
790 // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
791 return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
792 }
793 }
794
795 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
796 * for the collections indicated by collPIDs.
797 * @param collPIDs - an array of Strings denoting the pids for greenstone
798 * collections stored in the fedora repositoryl. These should be of the
799 * format "greenstone:&lt;collectionName&gt;-collection". */
800 public String[] getCollectionNames(String[] collPIDs) {
801 String[] collNames = new String[collPIDs.length];
802 for(int i = 0; i < collPIDs.length; i++)
803 collNames[i] = getCollectionName(collPIDs[i]);
804 return collNames;
805 }
806
807 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
808 * in the parameter collNames.
809 * @param collNames - a list of names of greenstone collections
810 * stored in the fedora repository. */
811 public String[] getCollectionPIDs(String[] collNames) {
812 String[] collPIDs = new String[collNames.length];
813 for(int i = 0; i < collNames.length; i++)
814 collPIDs[i] = getCollectionName(collNames[i]);
815 return collPIDs;
816 }
817
818 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
819 * denoted by parameter collName.
820 * @param collName - the name of a greenstone collection stored
821 * stored in the fedora repository. */
822 public String getCollectionPID(String collName) {
823 return GREENSTONE_+collName+_COLLECTION;
824 }
825
826 /**
827 * Gets the title of the collection denoted by the given collection's pid by
828 * retrieving the title metadata for it from the collection's EX datastream.
829 * @return the title (in the default language, else English, else the
830 * first title found) for the particular collection denoted by its PID.
831 * @param collPID is the pid of a greenstone collection in the fedora
832 * repository. */
833 public String getCollectionTitle(String collPID)
834 throws RemoteException, UnsupportedEncodingException,
835 SAXException, IOException
836 {
837 String title = null; // has to be null initially, we do a check on it
838 // Parse the EX datastream (XML), and in its DOM, find the
839 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
840 // There might be one OR several of those with attribute
841 // name="collectionname". If there's only one, then get that.
842 // If there are several, there would possibly a be qualifier attribute,
843 // in which case get qualifier=lang (where lang is the member variable)
844 // If there is no qualifier with the requested language, then get the
845 // english one which is likely to be there, else return the title for
846 // the first collectionname .
847
848 MIMETypedStream exdata
849 = APIA.getDatastreamDissemination(collPID, EX, null);
850 String exStream = new String(exdata.getStream(), UTF8);
851
852 InputSource source = new InputSource(new StringReader(exStream));
853 Document doc = builder.parse(source);
854 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
855 NodeList children = docEl.getChildNodes();
856
857 String firstName = "";
858 String englishName = "";
859 for(int i = 0; i < children.getLength(); i++ ) {
860 Node n = children.item(i);
861 if(n.getNodeType() == Node.ELEMENT_NODE) {
862 Element e = (Element)n;
863 if(e.hasAttribute(NAME)
864 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
865 firstName = FedoraCommons.getValue(e);
866 if(!e.hasAttribute(QUALIFIER)) {
867 title = FedoraCommons.getValue(e);
868 break;
869 }
870 else if(e.getAttribute(QUALIFIER).equals(lang)) {
871 title = FedoraCommons.getValue(e);
872 break;
873 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
874 englishName = FedoraCommons.getValue(e);
875 }
876 }
877 }
878 }
879
880 // if the title is still not set to that of the requested language,
881 // then try setting it to the collection name in English. If English
882 // isn't available, then set it to the first collection name provided
883 // (in whichever language).
884 if(title == null) {
885 title = englishName.equals("") ? firstName : englishName;
886 }
887 doc = null;
888 return title;
889 }
890
891 /** @return the collection titles for all the collections indicated by
892 * collPIDs.
893 * @param collPIDs - a list of pids identifying greenstone collections
894 * stored in the fedora repository. */
895 public String[] getCollectionTitles(String[] collPIDs)
896 throws RemoteException, UnsupportedEncodingException,
897 SAXException, IOException
898 {
899 String[] titles = new String[collPIDs.length];
900
901 // parse each EX datastream (XML) which contains the gs3-extracted meta.
902 for(int i = 0; i < collPIDs.length; i++) {
903 titles[i] = getCollectionTitle(collPIDs[i]);
904 }
905 return titles;
906 }
907
908 /** @return the title metadata for the given doc objects of a collection.
909 * These titles are returned in the same order as the given docIDs.
910 * (The docPIDs already contain the collection name anyway.)
911 * @param docPIDs - a list of pids identifying documents stored in the
912 * fedora repository. */
913 public String[] getDocTitles(String[] docPIDs)
914 throws RemoteException, UnsupportedEncodingException,
915 SAXException, IOException
916 {
917 String[] titles = new String[docPIDs.length];
918 for(int i = 0; i < docPIDs.length; i++) {
919 titles[i] = getDocTitle(docPIDs[i]);
920 }
921 return titles;
922 }
923
924 /** Gets the title metadata for a particular doc object in a collection
925 * denoted by docPID. The docPID already contains the collection name.
926 * @return the title for the fedora document item denoted by docPID
927 * @param docPID is the pid of the document in the fedora repository
928 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
929 public String getDocTitle(String docPID)
930 throws RemoteException, UnsupportedEncodingException,
931 SAXException, IOException
932 {
933 // We need the extracted metadata file, and find its
934 // documentElement's child
935 // <ex:metadata name="Title">sometitle</ex:metadata>
936 // where the title we return is sometitle
937
938 String title = "";
939 MIMETypedStream exdata
940 = APIA.getDatastreamDissemination(docPID, EX, null);
941 String exStream = new String(exdata.getStream(), UTF8);
942 return getTitle(exStream);
943 }
944
945 /** Given a string representation of a document's or document section's
946 * EX datastream -- which is a greenstone extracted metadata XML file --
947 * of the form:
948 * &lt;ex&gt;
949 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
950 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
951 * ...
952 * &lt;/ex&gt;
953 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
954 * returns the value embedded in that element ('sometitle' in
955 * the example above).
956 * @return the title metadata of the document/document section whose EX
957 * datastream is passed as parameter
958 * @param exStream the EX datastream in String form of the document or
959 * document section. */
960 protected String getTitle(String exStream)
961 throws SAXException, IOException
962 {
963 String title = "";
964 InputSource source = new InputSource(new StringReader(exStream));
965 Document doc = builder.parse(source);
966 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
967 NodeList children = docEl.getChildNodes();
968
969 // Cycle through all the *element* children of <ex:ex></ex:ex>
970 // which are all of the form:
971 // <ex:metadata name="somename">somevalue</ex:metadata>
972 // Find the one where name="Title", its value is the title
973 for(int i = 0; i < children.getLength(); i++ ) {
974 Node n = children.item(i);
975 if(n.getNodeType() == Node.ELEMENT_NODE) {
976 Element e = (Element)n;
977 if(e.hasAttribute(NAME)
978 && e.getAttribute(NAME).equals(TITLE)) {
979 title = FedoraCommons.getValue(e);
980 break;
981 }
982 }
983 }
984 return title;
985 }
986
987 /** @return the title metadata for the given document sections.
988 * These titles are returned in the same order as the given docPIDs
989 * and associated sectionIDs.
990 * (The docPIDs already contain the collection name anyway.)
991 * @param docPIDs - a list of pids identifying documents stored in the
992 * fedora repository.
993 * @param sectionIDs - a list of sectionIDs identifying individual sections
994 * of documents stored in the fedora repository whose titles are requested. */
995 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
996 throws RemoteException, UnsupportedEncodingException,
997 SAXException, IOException
998 {
999 String[] titles = new String[docPIDs.length];
1000 for(int i = 0; i < docPIDs.length; i++) {
1001 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1002 }
1003 return titles;
1004 }
1005
1006 /** @return the title metadata for the given document section.
1007 * (The docPID already contain the collection name anyway.)
1008 * @param docPID - a pid identifying a document in the fedora repository.
1009 * @param sectionID - the sectionID of the section of the
1010 * document whose title is requested. */
1011 public String getSectionTitle(String docPID, String sectionID)
1012 throws UnsupportedEncodingException, RemoteException,
1013 SAXException, IOException
1014 {
1015 String ex = this.getSectionEXMetadata(docPID, sectionID);
1016 return getTitle(ex);
1017 }
1018
1019 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1020 * returns the PIDs of the data objects found, with the exception of
1021 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1022 * collection PID.
1023 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1024 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1025 * the collection and not an object of the same collection) are returned.
1026 * All pids that do not map to a collection are assumed to be documents!
1027 * @return a list of the pids of all the (doc) objects in a collection.
1028 * @param colPID is the pid of the greenstone collection stored in
1029 * the fedora repository. */
1030 public String[] getCollectionDocs(String colPID)
1031 throws RemoteException
1032 {
1033 String colName = getCollectionName(colPID);
1034 //LOG.debug("colName: " + colName);
1035
1036 // Search fedora objects for pid=greenstone:<colName>-*
1037 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1038 // searches for "greenstone:"+colName+"-*";
1039 FieldSearchQuery query = new FieldSearchQuery();
1040 query.setTerms(queryStr);
1041 query.setConditions(null);
1042 String[] pids = null;
1043
1044 FieldSearchResult objects = AutoFinder.findObjects(
1045 APIA, new String[]{"pid", "title"}, maxresults, query);
1046 ObjectFields[] results = objects.getResultList();
1047
1048 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1049 // that's not a document object:
1050 pids = new String[results.length-1]; // not storing collection object
1051 int index = 0; // keeps track of docPid index
1052 for(int i = 0; i < results.length; i++) {
1053 // check it's not a collection object
1054 if(!results[i].getPid().endsWith(_COLLECTION)) {
1055 pids[index] = results[i].getPid();
1056 index++;
1057 }
1058 }
1059
1060 return pids;
1061 }
1062
1063 /** Given the pid of a document fedora data object, this method will return
1064 * all itemIDs that are part of that data object and are Sections. For further
1065 * information see interface Comparable (implemented by String), SortedSet
1066 * and TreeSet.
1067 * @return an array of itemIDs of the Sections of the document,
1068 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1069 * @param docPID is a fedora pid identifying a greenstone document object.
1070 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1071 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1072 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1073 */
1074 public String[] getSectionNames(String docPID) throws RemoteException {
1075 // DatastreamDef[] listDatastreams(
1076 // java.lang.String pid, java.lang.String asOfDateTime)
1077
1078 // listDatastreams returns information on each item (including itemID=dsID)
1079 // in the document object indicated by docPID
1080
1081 // Need to give an object version number, because null for asOfDateTime
1082 // does not return any datastreams!
1083 String[] times = APIA.getObjectHistory(docPID);
1084
1085 DatastreamDef[] datastreams = APIA.listDatastreams(
1086 docPID, times[times.length-1]);
1087
1088 // TreeSet is a SortedSet. We're going to put Strings into it,
1089 // and Strings implement interface Comparable already.
1090 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1091 for(int i = 0; i < datastreams.length; i++) {
1092 String itemID = datastreams[i].getID();
1093 if (itemID.startsWith("SECTION"))
1094 orderedList.add(itemID);
1095 }
1096
1097 String[] sectionNames = new String[orderedList.size()];
1098 orderedList.toArray(sectionNames);
1099 orderedList = null;
1100 return sectionNames;
1101 }
1102
1103 /** Given the pid of a document fedora data object, this method will return all
1104 * itemIDs that are part of that data object and are Sections, but just the
1105 * Section numbers are returned. For further information see interface Comparable
1106 * (implemented by String), SortedSet and TreeSet.
1107 * @return an array of itemIDs of the Section numbers of the document
1108 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1109 * @param docPID is a fedora pid identifying a greenstone document object.
1110 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1111 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1112 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1113 */
1114 public String[] getSectionNumbers(String docPID) throws RemoteException {
1115 String[] times = APIA.getObjectHistory(docPID);
1116
1117 DatastreamDef[] datastreams
1118 = APIA.listDatastreams(docPID, times[times.length-1]);
1119 //Vector v = new Vector(datastreams.length);
1120 TreeSet orderedList = new TreeSet();
1121
1122 for(int i = 0; i < datastreams.length; i++) {
1123 String itemID = datastreams[i].getID();
1124 if (itemID.startsWith("SECTION")) {
1125 //int index = SECTION.length();
1126 //itemID = itemID.substring(index);
1127 itemID = removePrefix(itemID, SECTION);
1128 orderedList.add(itemID);
1129 }
1130 }
1131
1132 String[] sectionNumbers = new String[orderedList.size()];
1133 orderedList.toArray(sectionNumbers);
1134 orderedList = null;
1135
1136 return sectionNumbers;
1137 }
1138
1139 /** @return the titles for the document sections denoted by the parameters.
1140 * @param docPID is a fedora pid identifying a greenstone document object.
1141 * @param sectionIDs is a list of identifiers identifying sections in the
1142 * document denoted by docPID, whose titles need to be returned. Each
1143 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1144 * or a section number (eg. 1.5.1). */
1145 public String[] getTitles(String docPID, String[] sectionIDs)
1146 throws RemoteException, UnsupportedEncodingException,
1147 SAXException, IOException
1148 {
1149 String[] titles = new String[sectionIDs.length];
1150 for(int i = 0; i < titles.length; i++)
1151 titles[i] = getTitle(docPID, sectionIDs[i]);
1152 return titles;
1153 }
1154
1155 /** @return the title for the document section denoted by the parameters.
1156 * @param docPID is a fedora pid identifying a greenstone document object.
1157 * @param sectionID identifies the particular section in the document denoted
1158 * by docPID, whose title needs to be returned. The sectionID may be either a
1159 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1160 public String getTitle(String docPID, String sectionID)
1161 throws RemoteException, UnsupportedEncodingException,
1162 SAXException, IOException
1163 {
1164 // Compose the itemID for the EX data stream from the number in the
1165 // sectionID:
1166 String exID = removePrefix(sectionID, SECTION);
1167 exID = EX+convertToMetaNumber(exID);
1168
1169 // Retrieve the extracted metadata stream (EX, in XML) for the given
1170 // section
1171 String exStream = getItem(docPID, exID);
1172
1173 // Extract the title from the XML, look for:
1174 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1175 InputSource source = new InputSource(new StringReader(exStream));
1176 Document doc = builder.parse(source);
1177 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1178 NodeList children = docEl.getElementsByTagName(
1179 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1180 for(int i = 0; i < children.getLength(); i++) {
1181 Element e = (Element)children.item(i);
1182 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1183 return FedoraCommons.getValue(e); // extract and return the title
1184 }
1185 return ""; // if we got here, then we couldn't find a title
1186 }
1187
1188 /** @return the section's XML (as a String) as it is stored in fedora.
1189 * Works out if sectionID is a sectionName or sectionNumber.
1190 * @param docPID - a fedora pid identifying a greenstone document object.
1191 * @param sectionID - identifyies the particular section in the
1192 * document denoted by docPID, may be a section name or number. */
1193 public String getSection(String docPID, String sectionID)
1194 throws RemoteException, UnsupportedEncodingException
1195 {
1196 if(!sectionID.startsWith(SECTION)) // then it has only section number
1197 sectionID = SECTION+sectionID;
1198
1199 String sectionXML = this.getItem(docPID, sectionID);
1200 return sectionXML;
1201 }
1202
1203 /** @return the required section's DC metadata XML datastream.
1204 * @param docPID - a fedora pid identifying a greenstone document object.
1205 * @param sectionID - identifyies the particular section in the
1206 * document denoted by docPID, may be a section name or number. */
1207 public String getSectionDCMetadata(String docPID, String sectionID)
1208 throws RemoteException, UnsupportedEncodingException
1209 {
1210 String dcID = removePrefix(sectionID, SECTION);
1211 // ensure we have just the section number
1212 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1213
1214 // now get the DC datastream for that number
1215 String dcXML = this.getItem(docPID, dcID);
1216 return dcXML;
1217 }
1218
1219 /** Returns the section EX metadata XML datastream for SectionID which may be
1220 * a section name or number. Currently a few EX files are named awkwardly:
1221 * the EX file for section 1.* is actually associated with datastream EX.*.
1222 * But subsequent EX datastreams are named appropriately: for instance,
1223 * EX2.1.1 matches with section 2.1.1
1224 * @return the required section's EX metadata XML datastream.
1225 * @param docPID - a fedora pid identifying a greenstone document object.
1226 * @param sectionID - identifyies the particular section in the
1227 * document denoted by docPID, may be a section name or number. */
1228 public String getSectionEXMetadata(String docPID, String sectionID)
1229 throws RemoteException, UnsupportedEncodingException
1230 {
1231 String exID = removePrefix(sectionID, SECTION);
1232 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1233
1234 // now get the EX datastream for that for number
1235 String exXML = this.getItem(docPID, exID);
1236 return exXML;
1237 }
1238
1239 /** @return the XML content of the TOC of just that portion of the TOC which
1240 * contains the section denoted by sectionID and its direct child subsections.
1241 * The children are returned in the order they are encountered, which
1242 * happens to be in the required order of ascending sectionID.
1243 * @param docPID - a fedora pid identifying a greenstone document object.
1244 * @param sectionID - identifyies the particular section in the
1245 * document denoted by docPID, may be a section name or number. */
1246 public Element getChildrenOfSectionXML(String docPID, String sectionID)
1247 throws RemoteException, UnsupportedEncodingException,
1248 SAXException, IOException
1249 {
1250 // Store just the number
1251 String sectionNumber = removePrefix(sectionID, SECTION);
1252 // get the TOC XML datastream as a String
1253 String xmlTOC = getTOC(docPID);
1254
1255 // convert it into a DOM document
1256 InputSource source = new InputSource(new StringReader(xmlTOC));
1257 Document doc = builder.parse(source);
1258 // toplevel element docEl = <Section id="1"></Section>
1259 Element docEl = doc.getDocumentElement();
1260
1261 // check whether we're requested to return the toplevel element itself
1262 if(sectionID.equals("") || // subSection of entire docPID is requested
1263 (docEl.hasAttribute(ID) && docEl.getAttribute(ID).equals(sectionNumber)))
1264 return getSubstructure(docEl, false);
1265
1266 // Otherwise, get all <Section> elements and find the
1267 // <Section id="sectionNumber"></Section> and return that and its
1268 // children
1269 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1270 for(int i = 0; i < sections.getLength(); i++) {
1271 Element e = (Element)sections.item(i);
1272 if(e.hasAttribute(ID)
1273 && e.getAttribute(ID).equals(sectionNumber))
1274 {
1275 //System.err.println("Found: " + e.getAttribute(ID));
1276 return getSubstructure(e, false); // false: get just e and children
1277 }
1278 }
1279 return null; // not found
1280 }
1281
1282 /** @return a string representing the XML content of the TOC of just
1283 * that portion of the TOC which contains the section denoted by sectionID
1284 * and its direct child subsections.
1285 * The children are returned in the order they are encountered, which
1286 * happens to be in the required order of ascending sectionID.
1287 * @param docPID - a fedora pid identifying a greenstone document object.
1288 * @param sectionID - identifyies the particular section in the
1289 * document denoted by docPID, may be a section name or number. */
1290 public String getChildrenOfSection(String docPID, String sectionID)
1291 throws RemoteException, UnsupportedEncodingException,
1292 SAXException, IOException, TransformerException
1293 {
1294 Element children = getChildrenOfSectionXML(docPID, sectionID);
1295 return (children == null) ? "" : FedoraCommons.elementToString(children);
1296 }
1297
1298 /** @return the part of the TOC XML file (which outlines doc structure)
1299 * relating to the given section. This includes the section denoted by
1300 * sectionID as well as all descendent subsections thereof.
1301 * @param docPID - a fedora pid identifying a greenstone document object.
1302 * @param sectionID - identifyies the particular section in the
1303 * document denoted by docPID, may be a section name or number. */
1304 public Element getSubsectionXML(String docPID, String sectionID)
1305 throws RemoteException, UnsupportedEncodingException,
1306 SAXException, IOException
1307 {
1308 // get the TableOfContents (TOC) XML datastream as a String
1309 String xmlTOC = getTOC(docPID);
1310
1311 // convert it into a DOM document
1312 InputSource source = new InputSource(new StringReader(xmlTOC));
1313 Document doc = builder.parse(source);
1314 // toplevel element docEl = <Section id="1"></Section>
1315 Element docEl = doc.getDocumentElement();
1316
1317 if(sectionID.equals("")) // subSection of entire docPID is requested
1318 return docEl;
1319
1320 // Store just the number
1321 String sectionNumber = removePrefix(sectionID, SECTION);
1322 // Check whether we're requested to return the toplevel element itself
1323 // If sectionNumber=1, then the top-level element/document element
1324 // of the TOC XML is requested, so return the TOC as is.
1325 if(sectionNumber.equals("1")) {
1326 return docEl;
1327 }
1328
1329 // Get all <Section> elements and find the
1330 // <Section id="sectionNumber"></Section> and return that
1331 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1332 for(int i = 0; i < sections.getLength(); i++) {
1333 Element e = (Element)sections.item(i);
1334 if(e.hasAttribute(ID)
1335 && e.getAttribute(ID).equals(sectionNumber)) {
1336 //System.err.println("Found: " + e.getAttribute(ID));
1337 return getSubstructure(e, true); // true:get all descendents
1338 }
1339 }
1340 return null; // not found
1341 }
1342
1343 /** @return a String representation of the part of the TOC XML file
1344 * (which outlines doc structure) relating to the given section. This
1345 * includes the section denoted by sectionID as well as all descendent
1346 * subsections thereof.
1347 * @param docPID a fedora pid identifying a greenstone document object.
1348 * @param sectionID identifyies the particular section in the
1349 * document denoted by docPID, may be a section name or number. */
1350 public String getSubsection(String docPID, String sectionID)
1351 throws RemoteException, UnsupportedEncodingException, SAXException,
1352 IOException, TransformerException
1353 {
1354 // Store just the number
1355 String sectionNumber = removePrefix(sectionID, SECTION);
1356 // get the TableOfContents (TOC) XML datastream as a String
1357 String xmlTOC = getTOC(docPID);
1358
1359 // Check whether we're requested to return the toplevel element itself
1360 // If sectionNumber=1, then the top-level element/document element
1361 // of the TOC XML is requested, so return the TOC as is.
1362 if(sectionNumber.equals("1"))
1363 return xmlTOC;
1364
1365 // else
1366 Element subsection = getSubsectionXML(docPID, sectionID);
1367 return (subsection == null) ? "" : FedoraCommons.elementToString(subsection);
1368 }
1369
1370 /** Implements browsing document titles of a greenstone collection stored in
1371 * the fedora repository by letter.
1372 * @return the document pids whose titles start with the given letter.
1373 * @param letter - the starting letter to browse by.
1374 */
1375 public String[] browseTitlesByLetter(final String collName, final String letter)
1376 throws RemoteException, FedoraVersionNotSupportedException
1377 {
1378 String[] pids = null;
1379
1380 // We want to do the following kind of search (assuming letter=f
1381 // and collName=demo):
1382 // pid~greenstone:demo* title~f*
1383
1384 // We don't need to normalise the letter first (to search titles starting
1385 // with both uppercase and lowercase versions of the letter), because
1386 // Fedora always searches for both.
1387 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1388 // ANYWHERE in their titles!
1389 // SOLUTION: search the collection for all titles containing f as given,
1390 // retrieving pid and title fields. Then from the list of results, select
1391 // only those titles that start with the given letter.
1392 // This may seem an unnecessarily cumbersome job (when it looked like it
1393 // should have worked with just title~f*), BUT, at least the resulting
1394 // documents will be reduced to a set of titles containing f; rather than
1395 // having to search *all* documents in the collection.
1396 final String title = letter+WILDCARD;
1397
1398 FieldSearchResult objects = findObjectsWithTitlesContaining(
1399 collName, title);
1400 ObjectFields[] results = objects.getResultList();
1401 TreeSet v = new TreeSet(); // TreeSet to return the results in
1402 //alphabetical order
1403 for(int i = 0; i < results.length; i++) {
1404 // from the result list, select those titles that don't
1405 // just *contain* the letter, but actually start with it:
1406 String resultTitle = results[i].getTitle(0);
1407 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1408 String pid = results[i].getPid();
1409 // skip the collection object itself
1410 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1411 v.add(pid);
1412 //LOG.debug(resultTitle);
1413 }
1414 }
1415 }
1416 pids = new String[v.size()];
1417 v.toArray(pids);
1418 return pids;
1419 }
1420
1421 /** Implements querying document DC titles of a greenstone collection stored in
1422 * the fedora repository for a term that may occur anywhere in their titles.
1423 * @return the document pids whose DC titles contain the parameter term.
1424 * @param titleContents - the word or phrase to search the collection's
1425 * document titles for. Only one word, and this method finds Greenstone
1426 * DOCUMENT titles CONTAINING that word (if any).
1427 * @param startsWith - if true, searches for titles that start with
1428 * titleContents. Else it searches for titles that contain titleContents. */
1429 public String[] searchDocumentTitles(String collName, String titleContents,
1430 boolean startsWith)
1431 throws RemoteException, FedoraVersionNotSupportedException
1432 {
1433 String[] pids = null;
1434
1435 // We want to do the following kind of search (when written in Fedora's
1436 // REST format - see http://localhost:8080/fedora/search):
1437 // pid~greenstone:<colname>* title~<1st word of titleContents>
1438
1439 // We don't need to normalise the word first (to search titles starting
1440 // with both uppercase and lowercase versions of it), because
1441 // Fedora always searches for the normalised word.
1442
1443 // 2 difficulties:
1444 // - We can only search for single words with Fedora's Conditional Search.
1445 // Obtain pids and titles of documents containing the first word and then
1446 // we filter the titles to those containing the entire phrase of
1447 // titleContents.
1448 // - Searching for title~FirstWord returns all documents containing
1449 // this word ANYWHERE in their titles. If parameter startsWith is false,
1450 // then this is fine. But if parameter startsWith is true, then go
1451 // through all the resulting titles found (containing FirstWord), select
1452 // only pids of those titles that contain the entire phrase titleContents
1453
1454 final String pid = GREENSTONE_+collName+WILDCARD;
1455
1456 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1457 // if titleContents is a phrase (contains space), then it's not
1458 // a single word, in which case search for just the first word
1459 String title = titleContents; // assume it's a single word
1460 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1461 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1462
1463 FieldSearchResult objects = findObjectsWithTitlesContaining(
1464 collName, title);
1465 if(objects == null) {
1466 final String[] empty = {};
1467 return empty;
1468 }
1469
1470 // Go through all the titles found and for those that match the criteria*,
1471 // store their pid. *Criteria: titles that start with OR contain the
1472 // word OR phrase of titleContents.
1473 ObjectFields[] results = objects.getResultList();
1474 Vector v = new Vector(); // return pids in the order found
1475 for(int i = 0; i < results.length; i++) {
1476 // from the result list, select those titles that don't
1477 // just *contain* the first word, but the entire phrase of
1478 // words in titleContents:
1479 String resultTitle = results[i].getTitle(0);
1480 boolean accepted = false; // accept the resultTitle found
1481
1482 String resultPID = results[i].getPid();
1483 // skip the collection object itself, since it's not a document
1484 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1485 accepted = false;
1486 }
1487 // if titleContents is a single word and we are checking
1488 // whether resultTitle contains titleContents:
1489 else if(indexOfFirstSpace == -1) { // titleContents is a single word
1490 if(!startsWith) // titles that *contain* the word titleContents
1491 accepted = true; //accept all titles found
1492 // else startWith: accept titles starting with word titleContents
1493 else if (resultTitle.toLowerCase().startsWith(
1494 titleContents.toLowerCase()))
1495 accepted = true;
1496
1497 }
1498 else { // otherwise, titleContents is a phrase of >1 word, need
1499 // to check that the result title contains the entire phrase
1500 if(startsWith && resultTitle.toLowerCase().startsWith(
1501 titleContents.toLowerCase()))
1502 accepted = true;
1503 else if(!startsWith && resultTitle.toLowerCase().contains(
1504 titleContents.toLowerCase()))
1505 accepted = true;
1506 }
1507
1508 // if the resultTitle fit the criteria, store its pid
1509 if(accepted) {
1510 v.add(resultPID);
1511 //System.out.println(resultTitle);
1512 }
1513
1514 }
1515 pids = new String[v.size()];
1516 v.toArray(pids);
1517 return pids;
1518 }
1519
1520
1521 /**
1522 * @param collName - the collection of documents we'll be searching in.
1523 * @param titleWord - the word we'll be searching the document titles for.
1524 * (Fedora's search returns all objects whose title contains that word).
1525 *
1526 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1527 * (see link):
1528 * <pre>
1529 * "There are two search methods: a search on all fields or a search on
1530 * specific fields. To search all fields the setTerms function of the
1531 * FieldSearchQuery must be used, with the paramater being the desired string.
1532 *
1533 * To search by specific fields, you must create an array of Condition
1534 * objects. Each condition consists of three parts:
1535 * the field to be searched (.setProperty()),
1536 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1537 * and the search string (.setValue())"
1538 * </pre>
1539 * We want to use the second search method above when browsing and searching,
1540 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1541 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1542 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1543 *
1544 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1545 * web services are defined. (The web.xml defines the "Servlets for REST-based
1546 * interfaces to the Fedora Repository Server").
1547 * Do a search on the word "search":
1548 * fedora.server.access.FieldSearchServlet is the class we need to look at
1549 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1550 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1551 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1552 * is what's used in the fedora client application that makes use of
1553 * the SOAP-based interface.
1554 *
1555 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1556 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1557 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1558 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1559 */
1560 protected FieldSearchResult findObjectsWithTitlesContaining(
1561 String collName, final String titleWord)
1562 throws RemoteException, FedoraVersionNotSupportedException
1563 {
1564 // Searching for pids of the form "greenstone:gs2mgdemo*";
1565 final String pid = GREENSTONE_+collName+WILDCARD;
1566
1567 Condition[] conditions = new Condition[2];
1568 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1569 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1570
1571 FieldSearchQuery query = new FieldSearchQuery();
1572 query.setConditions(conditions);
1573
1574 // We'd like pid and title returned for each object, because we'll make
1575 // use of title. We pass maxResults=null to get all objects that match
1576 // (i.e. all collections).
1577 FieldSearchResult objects = null;
1578 final String[] retrieveFields = {"pid", "title"};
1579 try {
1580 objects = AutoFinder.findObjects(
1581 APIA, retrieveFields, maxresults, query);
1582 // collection = APIA.findObjects(new String[]{"pid", "title"},
1583 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1584 } catch(RemoteException ex) {
1585 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1586 // fedoraVersion is too low, searching/browsing is not possible
1587 // (because class Condition has changed after 2.0, from 2.1.1
1588 // onwards)
1589 throw new FedoraVersionNotSupportedException(fedoraVersion);
1590 } else {
1591 LOG.error(
1592 "Remote exception when calling web service operation " +
1593 "findObject() to execute search:\n" + ex.getMessage());
1594 ex.printStackTrace();
1595 throw ex;
1596 }
1597 }
1598 return objects; // return the FieldSearchResult objects found
1599 }
1600
1601 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1602 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1603 * @param docPID - pid of a greenstone document in the fedora repository. */
1604 public String getDocName(String docPID) {
1605 return docPID.substring(docPID.indexOf('-')+1);
1606 }
1607
1608 /** @return the &lt;name&gt; in the parameter collPID
1609 * (greenstone:&lt;name&gt;-collection)
1610 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1611 * in the docPID (greenstone:&lt;name&gt;-docID).
1612 * @param collPID - pid of a greenstone collection in the fedora repository. */
1613 public String getCollectionName(String collPID) {
1614 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1615 }
1616
1617 /** Convert the given Element to a String representing the same XML.
1618 * @return an element containing a copy element e with either only its child
1619 * elements or with all its descendents (depending on whether parameter
1620 * descendents is true or false).
1621 * @param e - the element to start copying from.
1622 * @param descendents - if true, e is copied with all its descendetns into the
1623 * element that's returned. If false, only e and its direct children are copied
1624 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1625 */
1626 protected Element getSubstructure(Element e, boolean descendents)
1627 {
1628 Document doc = builder.newDocument();
1629 Node n = doc.importNode(e, descendents);
1630 // descendents=true: import/copy descendents.
1631 // Else, copy just current node e (later copy its direct children)
1632 doc.appendChild(n); // need to put the copied node into a document
1633 // else it won't have a parent doc (DOMSource can't work with it
1634 // without it having a document parent).
1635
1636 // if we are not recursively copying all descendents, then copy just
1637 // the childnodes:
1638 if(!descendents) { // then copy just the children
1639 // get e's children and copy them into the new document
1640 NodeList children = e.getChildNodes();
1641 for(int i = 0; i < children.getLength(); i++) {
1642 // create copy
1643 n = doc.importNode(children.item(i), false);
1644 // attach it to parent
1645 doc.getDocumentElement().appendChild(n);
1646
1647 // Now we need to indicate whether this new node (child) is a leaf
1648 // or not. (This is necessary for getChildrenOfSection(), else
1649 // it's hard to know if the children are leaves or have further
1650 // subsections.
1651 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1652 // we're dealing only with section children
1653
1654 // Check if the matching original had children:
1655 Element originalsChild = (Element)children.item(i);
1656 NodeList grandchildren =
1657 originalsChild.getElementsByTagName(SECTION_ELEMENT);
1658 if(grandchildren.getLength() > 0) {
1659 // original's child has children, so indicate this
1660 // in the copied child:
1661 Element child = (Element)n;
1662 child.setAttribute(TYPE, INTERNAL_NODE);
1663 }
1664 }
1665 }
1666 }
1667 return doc.getDocumentElement();
1668 }
1669
1670
1671 /**
1672 * Return a datastream of a document, given the document's id
1673 * and the item id of the datastream which is to be retrieved.
1674 * @return the XML (in String form) of the item denoted by itemID
1675 * that's part of the fedora data object denoted by docPID.
1676 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1677 * Can't retrieve images denoted by itemID using this method, only items
1678 * that are of XML format.
1679 * @param docPID - pid of a greenstone document in the fedora repository.
1680 * @param itemID - the itemID of a datastream of the fedora object
1681 * identified by docPID.
1682 */
1683 protected String getItem(String docPID, String itemID)
1684 throws RemoteException, UnsupportedEncodingException
1685 {
1686 // MIMETypedStream getDatastreamDissemination(
1687 // String pid, String dsID, asOfDateTime)
1688 MIMETypedStream datastream
1689 = APIA.getDatastreamDissemination(docPID, itemID, null);
1690 return new String(datastream.getStream(), UTF8);
1691 }
1692
1693 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1694 * returns "1.2.1".
1695 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1696 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1697 * However, the string str is returned unchanged if the prefix does not occur
1698 * at the start of str.
1699 * @return the String parameter str without the prefix.
1700 * It can be used to return the number of an itemID of a greenstone document
1701 * stored in the fedora repository without the given prefix.
1702 * @param prefix - the prefix which ought to be removed from the itemID.
1703 * @param str - the value of the itemID.
1704 */
1705 protected String removePrefix(String str, String prefix) {
1706 // do nothing in those cases where the prefix is not in param str
1707 if(!str.startsWith(prefix))
1708 return str;
1709 // otherwise:
1710 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1711 return "1" + str.substring(prefix.length());
1712 } else {
1713 return str.substring(prefix.length());
1714 }
1715 }
1716
1717 /** Given a number of the form x(.y.z), this method returns this number
1718 * as is, except when x = 1, in which case, it would return .y.z
1719 * That is, given number=3.2.1, this method would return 3.2.1
1720 * But, given number=1.2.3, this method would return .2.3.
1721 * When number=1, it is NOT a special case: "" is returned as explained.
1722 * @param number - a proper (fedora-greenstone document) section number
1723 * @return the same number as it ought to be for the associated EX, DC datastreama.
1724 */
1725 protected String convertToMetaNumber(String number) {
1726 if(number.startsWith("1.") || number.equals("1"))
1727 return number.substring(1); // remove the first char: the initial '1'
1728 else return number;
1729 }
1730
1731 /** @return fedora's baseURL. It's of the form
1732 * "http://localhost:8080/fedora" */
1733 public String getBaseURL() { return baseURL; }
1734
1735 /** @return the portAddressURL (in use) of the Fedora APIA
1736 * web service (should be the endpoint location in the APIA's
1737 * WSDL file).
1738 * It's usually of the form baseURL+"/services/access" */
1739 public String getPortAddressURL() {
1740 return this.baseURL + this.portAddressSuffix;
1741 }
1742
1743 /** @return the baseURL for gsdlAssocFiles */
1744 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1745
1746 public static void main(String args[]) {
1747 try {
1748 FedoraConnection fedoraCon
1749 = new FedoraConnection(new File("fedoraGS3.properties"));
1750
1751 String[] pids = null;
1752 pids = fedoraCon.getCollections();
1753 String[] titles = fedoraCon.getCollectionTitles(pids);
1754 for(int i = 0; i < pids.length; i++) {
1755 System.out.println("extracted title:" + titles[i]);
1756 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1757 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1758 for(int j = 0; j < docPIDs.length; j++) {
1759 System.out.println("\tExtr doc title: " + docTitles[j]);
1760 }
1761 }
1762
1763 String PID = "greenstone:gs2mgdemo-collection";
1764 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1765 String dcXML = fedoraCon.getDC(PID);
1766 String exXML = fedoraCon.getEX(PID);
1767 String tocXML = fedoraCon.getTOC(docPID);
1768 System.out.println("Dublin Core Metadata for " + PID
1769 + " is:\n" + dcXML);
1770 System.out.println("GS3 extracted metadata for " + PID
1771 + " is:\n" + exXML);
1772 System.out.println("Table of Contents for " + docPID
1773 + " is:\n" + tocXML);
1774
1775
1776 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1777 System.out.println("\nSection names for " + docPID + " are:");
1778 for(int i = 0; i < sectionNames.length; i++)
1779 System.out.println(sectionNames[i]);
1780
1781 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1782 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1783 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1784 System.out.println("\nSection numbers for " + docPID + " are:");
1785 for(int i = 0; i < sectionNumbers.length; i++) {
1786 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1787 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1788 }
1789
1790 String sectionID = "SECTION1"; //SECTION1.5
1791 System.out.println("\n");
1792 System.out.println(sectionID+ " - entire subsection:\n"
1793 + fedoraCon.getSubsection(docPID, sectionID));
1794
1795 System.out.println(sectionID + " and children:\n"
1796 + fedoraCon.getChildrenOfSection(docPID, sectionID));
1797
1798 System.out.println(
1799 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1800 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1801 for(int i = 0; i < pids.length; i++)
1802 System.out.println(pids[i]);
1803
1804 System.out.println(
1805 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1806 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1807 for(int i = 0; i < pids.length; i++)
1808 System.out.println(pids[i]);
1809
1810 System.out.println("\nDone - exiting.");
1811 System.exit(0);
1812 } catch(RemoteException re) {
1813 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1814 re.printStackTrace();
1815 } catch(Exception e) {
1816 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1817 e.printStackTrace();
1818 //LOG.error("Unable to instantiate FedoraConnection\n" + e);
1819 }
1820 }
1821}
Note: See TracBrowser for help on using the repository browser.