source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 21573

Last change on this file since 21573 was 15659, checked in by ak19, 16 years ago

Corrected previous. Browsing docs (by title) in a Fedora repository of Greenstone docs skips the collection itself, as it is not a document

File size: 80.8 KB
Line 
1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import fedora.client.utility.AutoFinder;
25import fedora.server.access.FedoraAPIAServiceLocator;
26// The object for accessing FedoraAPI-A web services:
27import fedora.server.access.FedoraAPIA;
28
29// The definitions for all complex fedora types:
30import fedora.server.types.gen.MIMETypedStream;
31import fedora.server.types.gen.RepositoryInfo;
32import fedora.server.types.gen.FieldSearchResult;
33import fedora.server.types.gen.FieldSearchQuery;
34import fedora.server.types.gen.DatastreamDef;
35import fedora.server.types.gen.ObjectFields;
36import fedora.server.types.gen.Condition;
37import fedora.server.types.gen.ComparisonOperator;
38//import fedora.server.types.gen.*;
39
40import javax.net.ssl.SSLHandshakeException;
41import java.net.ConnectException;
42import org.xml.sax.SAXException;
43import java.io.UnsupportedEncodingException;
44import java.io.IOException;
45import javax.xml.parsers.ParserConfigurationException;
46import java.net.MalformedURLException;
47import java.rmi.RemoteException;
48
49import java.io.StringReader;
50import java.io.FileInputStream;
51import java.io.File;
52import java.util.TreeSet;
53import java.util.Properties;
54import java.util.Vector;
55
56import java.awt.GridLayout;
57import javax.swing.JLabel;
58import javax.swing.JOptionPane;
59import javax.swing.JPanel;
60import javax.swing.JPasswordField;
61import javax.swing.JTextField;
62
63import org.apache.log4j.Logger;
64import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
65import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
66import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
68import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
69
70import javax.xml.parsers.DocumentBuilderFactory;
71import javax.xml.parsers.DocumentBuilder;
72import javax.xml.transform.*;
73
74import org.xml.sax.InputSource;
75import org.w3c.dom.Document;
76import org.w3c.dom.Element;
77import org.w3c.dom.NodeList;
78import org.w3c.dom.Node;
79
80/** Class that establishes a connection with Fedora's web services (via
81 * Java stub classes for the same) and then provides methods to retrieve
82 * Greenstone-specific data, such as the TOC, EX, DC,and Section
83 * datastreams of the Greenstone documents stored in Fedora's repository.
84 * These datastreams are returned as Strings without any changes being
85 * made to them.
86 * @author ak19
87*/
88public class FedoraConnection implements FedoraGS3DL {
89 /** The logging instance for this class */
90 private static final Logger LOG = Logger.getLogger(
91 FedoraConnection.class.getName());
92
93 /** The version of fedora that is supported by class FedoraConnection */
94 protected static final String SUPPORTED_VERSION = "2.2.1";
95
96 /* Some fixed strings of known literals */
97 protected static final String TYPE = "type";
98 protected static final String INTERNAL_NODE = "internalNode";
99 protected static final String GET= "/get/";
100
101 // The DemoSOAPClient declares and uses the following as a static member
102 // Probably none of the APIA methods (web service methods) remembers
103 // state, that might explain why we can use it as a static member then.
104 /** The object used to access the Fedora API-A web service methods */
105 protected static FedoraAPIA APIA;
106
107 /** Version of the running fedora server */
108 protected String fedoraVersion;
109 /** The location of the fedora server, usually of the form
110 * http://localhost:8080/fedora */
111 protected String baseURL;
112
113 /** The user-specified portAddressSuffix of the Fedora Access web services
114 * (endpoint URL in the WSDL), usually of the form
115 * http://localhost:8080/fedora/services/access
116 * Users can tell FedoraGS3 to try accessing that first by setting
117 * the "port.address.suffix" property in the properties file.
118 * FedoraGS3 itself will not write the portAddressSuffix currently used in
119 * the file for next time, but leave whatever value was entered in the
120 * properties file. The portAddress--not just suffix--currently in use (once
121 * the FedoraAPIA handle has been instantiated) can be obtained through
122 * getPortAddressURL() method. */
123 protected String portAddressSuffix;
124
125 /** The part of the portAddress that comes after the baseURL. It is usually:
126 * "/services/access" */
127 protected static final String defaultPortAddressSuffix = "/services/access";
128
129 /** The preferred language of the displat content */
130 protected String lang;
131 /** The maximum number of collections to retrieve */
132 protected int maxresults;
133 /** DocumentBuilder used to create and parse XML documents */
134 protected DocumentBuilder builder;
135
136 /** Static method that returns the version of Fedora supported by this
137 * class FedoraConnection. */
138 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
139 /** The version of the running Fedora server, which may or may not
140 * match the supported version. */
141 public String getFedoraVersion() { return fedoraVersion; }
142
143 /** @return the default language used to query for titles (and anything else
144 * where there are multiple language options). Upon initialisation, this
145 * defaults to English. */
146 public String getLanguage() { return lang; }
147
148 /** Sets the the default language used to query for titles (and anything else
149 * where there are multiple language options). If the default language for any
150 * query is not available, then English ("en") is used. If that's not available
151 * then the first other available language is used.
152 * @param lang - the two-letter language code to set the default language to.
153 */
154 public void setLanguage(String lang) { this.lang = lang; }
155
156 /** The default maximum number of search results returned for a search. Upon
157 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
158 public int getMaxResults() { return maxresults; }
159
160 /** Set the default maximum number of search results returned for a search.
161 * @param maxresults - the new default maximum number of search results to
162 * be returned. */
163 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
164
165 /** Code for this constructor is from DemoSOAPClient.java.
166 * Instantiates the APIA handle using the protocol, host, port, fedora
167 * server repository username and password.
168 * @param host - the fedora server host (may be prefixed with http:// or
169 * https:// if parameter protocol is empty). If there's no protocol, and
170 * no protocol prefixed to the host, then the protocol defaults to http.
171 * @param protocol - either http or https (or empty "")
172 * @param port - the port on which fedora is running.
173 * @param fedoraServerUsername - the administrator username required to
174 * access the fedora server's repository. ("fedoraAdmin" unless changed).
175 * @param fedoraServerPassword - the fedora server repository's
176 * administrator password. If none was set on fedora installation, this
177 * can be empty (""). */
178 public FedoraConnection(String protocol, String host, int port,
179 String fedoraServerUsername, String fedoraServerPassword)
180 throws ParserConfigurationException, MalformedURLException,
181 SSLHandshakeException, RemoteException, AuthenticationFailedException,
182 NotAFedoraServerException, ConnectException, Exception
183 {
184 try {
185 init(protocol, host, Integer.toString(port),
186 fedoraServerUsername, fedoraServerPassword);
187 } /*catch(RemoteException re) { //subclass of IOException
188 throw re;
189 } catch(SSLHandshakeException ssle) { //subclass of IOException
190 // this is also of type IOException
191 throw ssle;
192 }*/ catch(IOException ioe) { // connected to the wrong server
193 String exceptMsg = ioe.getMessage().toLowerCase();
194 if(exceptMsg.indexOf("request failed") != -1
195 || exceptMsg.indexOf("404") != -1)
196 throw new NotAFedoraServerException();
197 else // the IOException is not due the cause we thought it was, so
198 throw ioe; // rethrow whatever other IOException was caught (which
199 // could have been RemoteException or SSLHandshakeException
200 // or some other cause)
201 }
202 }
203
204 /** Default constructor which takes input from the user to get host, port,
205 * fedora username and password.
206 * It keeps looping to display authentication popup, until valid values are
207 * entered:
208 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
209 * This popup keeps appearing until the password and username are correct (as
210 * long as there's indeed a fedora server listening at the given host and port).
211 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
212 * the 'https' protocol to the host string when it should have been 'http';
213 * OR the ssl connection failed for some other reason.
214 * Allowing for the 1st case, the authentication popup is displayed just once
215 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
216 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
217 * it takes a long time for the SSLHandshakeException to be thrown.
218 * (c) if the connection is refused, then a ConnectException is thrown.
219 * In that case, it's
220 * EITHER because the host and port values that were entered are wrong (and
221 * the authentication popup dialog is redisplayed just once more allowing
222 * the user to correct host/port values)
223 * OR the entered host and part were right but the fedora server at this
224 * host and port is not running.
225 * On the second consecutive attempt where a ConnectionException is thrown,
226 * it's no longer processed but rethrown, as there's no use in redisplaying
227 * the authentication popup when the problem is not an authentication issue.
228 * (d) Another IOException (other than the SSLHandshakeException of (b))
229 * occurs when there is indeed a server listening at the host and port
230 * entered, but it's not a Fedora server, because it is unable to process
231 * Fedora requests. If the expected message is found in the exception, than
232 * the authentication popup is displayed. However, other causes for an
233 * IOException are not handled. In such cases, the IOException is rethrown.
234 * (Note that IOException is not in the throws clause - other causes for
235 * it being unknown, it can be be considered as the more generic Exception.
236 */
237 public FedoraConnection()
238 throws ParserConfigurationException, MalformedURLException,
239 CancelledException, ConnectException, RemoteException,
240 SSLHandshakeException, Exception
241 {
242 Properties properties = new Properties();
243 // loop to display fedora server authentication popup to
244 // get user input
245 setInitialisationProperties(properties);
246 properties = null; // finished
247 }
248
249 /** Single argument constructor that takes the name of the properties file
250 * defining the values of the initialisation parameters required to
251 * instantiate a FedoraConnection. These are fedora server username, password,
252 * host and port. If these values are not present in the file, they are set
253 * to "" before showing the initialisation input dialog.
254 * @param propertyFile is the name of the properties file specifying the
255 * values for Fedora server username, password, host and port. */
256 public FedoraConnection(File propertyFile)
257 throws ParserConfigurationException, MalformedURLException,
258 CancelledException, ConnectException, RemoteException,
259 SSLHandshakeException, Exception
260 {
261 Properties properties = new Properties();
262 // Load the properties from the given file
263 try{
264 if(propertyFile.exists()) {
265 properties.load(new FileInputStream(propertyFile));
266 }
267 } catch(Exception e) {
268 // If the file didn't exist or could not be located,
269 // then we just continue by creating empty properties
270 LOG.warn("Exception loading from propertyFile "
271 + propertyFile + ": " + e);
272 }
273
274 // Go through the process of showing the initialisation dialog
275 setInitialisationProperties(properties);
276
277 // Now let's save whatever values the user may have entered into the
278 // input dialog as the default values for next time the dialog shows
279 try {
280 java.io.FileOutputStream out = new java.io.FileOutputStream(
281 propertyFile); // same file as properties loading file
282 // First make sure errormessage gets stored as "" and doesn't
283 // cause problems next time.
284 properties.setProperty("errormessage", "");
285 // Don't save passwords
286 properties.setProperty("password", "");
287 // If the portAddressSuffix is in the file already, then it's
288 // user-specified and we shouldn't change it. But if there is no
289 // such property in the file, then create it and write it to the file
290 // with an empty string value:
291 String portSuffix = properties.getProperty("port.address.suffix");
292 if(portSuffix == null) {
293 properties.setProperty("port.address.suffix", "");
294 }
295
296 properties.store(out, "fedoraGS3 properties"); // write properties
297 // Javadoc states that "The output stream remains open after this
298 // method (Properties.store) returns." So we close it here
299 out.close();
300 } catch(Exception e) {
301 LOG.warn("Exception writing to propertyFile "
302 + propertyFile + ": " + e);
303 }
304 properties = null; // finished
305 }
306
307 /** Method that loops to display the dialog that retrieves the
308 * fedora server initialisation properties from the user. If there
309 * is a property file with values set already, it will display
310 * the previously entered values by loading them from that file.
311 * Otherwise, input fields in the dialog are empty.
312 * @param properties the Properties Hashmap storing values for
313 * username, password, host and port (and any errormessage). */
314 protected void setInitialisationProperties(Properties properties)
315 throws ParserConfigurationException, MalformedURLException,
316 CancelledException, ConnectException, RemoteException,
317 SSLHandshakeException, Exception
318 {
319 // keep looping to display authentication popup, until valid values are
320 // entered (except when a ConnectionRefused Exception is caught - this
321 // needs to be rethrown):
322 boolean authenticated = true;
323 // reset any error messages that may have been stored (should not be
324 // the case, but if there had been any difficulty during storing, it
325 // may not have written out an empty errorMessage)
326 properties.setProperty("errormessage", "");
327 do{
328 // show the Authentication-popup:
329 // By passing the HashMap Properties, user-updated values will
330 // be persistent in the authentication-popup fields (rather than
331 // reset to the default initial values).
332 properties = showAuthenticationPopup(properties);
333 String fedoraServerUsername = properties.getProperty("username", "");
334 String fedoraServerPassword = properties.getProperty("password", "");
335 String host = properties.getProperty("host", "");
336 String port = properties.getProperty("port", "");
337 //String protocol = host.startsWith("http") ? "" : "http://";
338 String protocol = "http://";
339 if(host.startsWith("http") || host.startsWith("https"))
340 protocol = "";
341 // NOTE THAT: if a fedora server at https:// is not accessible,
342 // it takes a long time for the authentication popup to reappear.
343
344 try{
345 this.portAddressSuffix
346 = properties.getProperty("port.address.suffix", "");
347 // Use the FedoraClient utility to get the SOAP stub for APIA.
348 // This SOAP stub enables the client to connect to a Fedora
349 // repository via the API-A web service interface.
350 init(protocol, host, port,
351 fedoraServerUsername, fedoraServerPassword);
352 // will throw Exception if it can't instantiate APIA
353
354 // if no exception thrown in the initialisation statement above,
355 // then we have been authenticated:
356 authenticated = true;
357 } catch(AuthenticationFailedException afe) {
358 authenticated = false;
359 properties.setProperty("errormessage", afe.getMessage());
360 } catch(RemoteException e) { // causes could be various
361 String reason = e.getMessage();
362 if(e.getCause() != null) {
363 // For instance, if a ConnectException indicating
364 // 'Connection Refused' or a java.net.UnknownHostException
365 // caused the RemoteException
366
367 // Strip out prefix "Nested exception is..." from the
368 // encapsulating Exception's message, by using the Cause's
369 // message. Keep Exception classname to give it some context:
370 reason = e.getCause().getClass().getName() + ": "
371 + e.getCause().getMessage();
372 // Give some more information if the connection was refused.
373 // (This can also happen when the Fedora server is not running)
374 if(e.getCause().getClass().equals(ConnectException.class)) {
375 reason += FedoraGS3Exception.connectionRefusedMessage;
376 }
377 }
378 // if the message indicates that a server was running there,
379 // then we tell the user it was not a Fedora server
380 if(reason.toLowerCase().contains("404")
381 || reason.toLowerCase().contains("request failed"))
382 {
383 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
384 }
385 authenticated = false;
386 properties.setProperty("errormessage", reason);
387 } catch(ConnectException e) {
388 properties.setProperty("errormessage",
389 FedoraGS3Exception.connectionRefusedMessage);
390 authenticated = false;
391 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
392 // be handled before IOException, as it's an IOException subclass.
393 authenticated = false;
394 properties.setProperty("errormessage",
395 FedoraGS3Exception.sslHandshakeExceptionMessage);
396 // we won't prefix the host with http for the user, as https
397 // might be right after all, and something else might have gone
398 // during the connection attempt instead.
399 //host = host.replace("https", "http"); //setting it for them
400 //properties.setProperty("host", host);
401 } catch(IOException ioe) { // occurs when we try to connect to a
402 // host/port where some server other than Fedora's is listening
403 // (e.g. if we end up connecting to GS3's host and port).
404 // In that case, we can get exception messages like a 404:
405 // "Unable to instantiate FedoraConnection
406 // java.io.IOException: Request failed [404 /fedora/describe]"
407 // Test this by trying to connect to localhost at 9090 where GS3 is
408 String exceptMsg = ioe.getMessage().toLowerCase();
409 if(exceptMsg.indexOf("request failed") != -1
410 || exceptMsg.indexOf("404") != -1)
411 {
412 properties.setProperty("errormessage",
413 NotAFedoraServerException.MESSAGE
414 + "\n(" + ioe.getMessage() + ")");
415 } else if(exceptMsg.indexOf("401") != -1
416 || exceptMsg.indexOf("500") != -1)
417 {
418 authenticated = false;
419 properties.setProperty("errormessage", ioe.getMessage());
420 } else { // the exception occurred for some other reason, rethrow it
421 throw ioe;
422 }
423 }
424 } while(!authenticated); // will keep showing popup until auhentication
425 // and connection input values are valid
426 }
427
428 /**
429 * Static method that displays a popup to allow the user to provide Fedora
430 * authentication (username, pwd) and connection (protocol+host, port) details.
431 * @param properties is a Properties HashMap where the property Keys which must
432 * have been put in here in advance (even with "" Values if appropriate) are:
433 * <pre>
434 * - username
435 * - password
436 * - host (may - but need not - be prefixed with either of the protocols
437 * "http://" and "https://"
438 * - port
439 * - errorMessage (displayed near the top of the popup dialog). Can be "".
440 * </pre>
441 * The values stored in the properties HashMap for the above property are
442 * initially displayed in the fields and the user can overwrite them.
443 * This is useful in such cases where invalid values were entered and this
444 * popup must be redisplayed to allow the user to correct their previous input.
445 * @return the same HashMap Properties which was passed as parameter. */
446 protected static Properties showAuthenticationPopup(Properties properties)
447 throws CancelledException
448 {
449 // Retrieve all the properties -- defaults to "" if any are null
450 JTextField usernameField = new JTextField(
451 properties.getProperty("username", "fedoraAdmin"));
452 JTextField passwordField = new JPasswordField(
453 properties.getProperty("password", ""));
454 JTextField hostField = new JTextField(
455 properties.getProperty("host", "localhost"));
456 JTextField portField = new JTextField(
457 properties.getProperty("port", "8080"));
458
459 JPanel panel = new JPanel(new GridLayout(4,2));
460 panel.add(new JLabel("User Name"));
461 panel.add(usernameField);
462 panel.add(new JLabel("Password"));
463 panel.add(passwordField);
464 panel.add(new JLabel("Host"));
465 panel.add(hostField);
466 panel.add(new JLabel("Port"));
467 panel.add(portField);
468
469 String heading = "Fedora Server Admin Authentication:";
470 String errorMessage = properties.getProperty("errormessage", "");
471 if(!errorMessage.equals("")) {
472 heading = "=> " + errorMessage + "\n\n" + heading;
473 }
474 int option = JOptionPane.showConfirmDialog(null, new Object[] {
475 heading, panel},
476 "Enter Network Password",
477 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
478
479 if (option == JOptionPane.OK_OPTION) {
480 String fedoraServerUsername = usernameField.getText();
481 String fedoraServerPassword = passwordField.getText();
482 String host = hostField.getText();
483 String port = portField.getText();
484 properties.setProperty("username", fedoraServerUsername);
485 properties.setProperty("password", fedoraServerPassword);
486 properties.setProperty("host", host);
487 properties.setProperty("port", port);
488 } else { // Cancel option
489 throw new CancelledException();
490 }
491 return properties;
492 }
493
494 /** Init method that is called by the constructor to set some
495 * important member variables including instantiating the APIA object
496 * used to invoke the Fedora APIA web service operations.
497 * @param protocol can be http or https
498 * @param host is the name of the Fedora server host
499 * @param port is the port number (String form) of the Fedora server
500 * @param fedoraServerUsername is the user name to access the Fedora
501 * Server
502 * @param fedoraServerPassword is the password needed to access the
503 * Fedora Server
504 */
505 protected void init(String protocol, String host, String port,
506 String fedoraServerUsername, String fedoraServerPassword)
507 throws ParserConfigurationException, MalformedURLException,
508 AuthenticationFailedException, RemoteException, Exception
509 {
510 // initialise member variables
511 lang = ENGLISH;
512 maxresults = Integer.MAX_VALUE;
513 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
514 builder = factory.newDocumentBuilder();
515
516 // (protocol is "" if host already contains protocol)
517 if(!protocol.equals("") && !protocol.endsWith("://"))
518 protocol += "://";
519 // now create baseURL = protocol://host:port/fedora
520 this.baseURL = protocol + host + ":" + port + "/fedora";
521
522 // Get the FedoraAPIA handle to/stub of the Fedora web services
523 // New way of instantiating connection to Fedora is dependent on
524 // fewer files of FedoraClient.jar
525 FedoraAPIAServiceLocator serviceLocator
526 = new FedoraAPIAServiceLocator(fedoraServerUsername,
527 fedoraServerPassword);
528
529 APIA = null;
530 boolean isUserSpecifiedPortAddressSuffix = false;
531 // try any portAddressSuffix specified by the user
532 if(!this.portAddressSuffix.equals("")) {
533 isUserSpecifiedPortAddressSuffix = true;
534 this.createAPIA(serviceLocator, this.portAddressSuffix,
535 "user-specified", isUserSpecifiedPortAddressSuffix);
536 }
537
538 // If the user-specified portAddressSuffix failed or if there was none
539 // given, then APIA will be null, so we will try with the default
540 // portAddressSuffix. This time all exceptions will be passed on.
541 if(APIA == null) {
542 isUserSpecifiedPortAddressSuffix = false;
543 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
544 "default", isUserSpecifiedPortAddressSuffix);
545 }
546
547 }
548
549 /** Tries to create the FedoraAPIA instance using the serviceLocator
550 * and the given portSuffix. The APIA instance is obtained for the
551 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
552 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
553 * Remote Exception from AXIS that it can't find the target service to
554 * invoke is ignored so that the caller can retry with the default port-
555 * address suffix first before giving up. */
556 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
557 String portSuffix, String messageInsert,
558 boolean isUserSpecifiedPortAddressSuffix)
559 throws Exception
560 {
561 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
562 // this.portAddressSuffix : defaultPortAddressSuffix;
563
564 try {
565 LOG.debug( "Trying to connect to Fedora using the given"
566 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
567 + baseURL + portSuffix);
568 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
569 new java.net.URL(baseURL+portSuffix));
570 // let's test whether we're authenticated (otherwise a
571 // RemoteException will be thrown to indicate that the
572 // password was incorrect.)
573 RepositoryInfo repositoryInfo = APIA.describeRepository();
574 // throws RemoteException if pwd wrong or for other reasons
575 // in which case describeRepository() service is unavailable
576 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
577 // If we come all the way here, no exceptions were thrown:
578 this.portAddressSuffix = portSuffix; // store the one currently in use
579 } catch(RemoteException re) {
580 // if we're here, then APIA was unable to call the web service
581 // If this was because the fedora authentication failed, then
582 // let's throw a custom exception
583 String message = re.getMessage().toLowerCase();
584 // Looking for something Unauthorized(401)
585 if(message.indexOf("unauthorized") != -1
586 || message.indexOf("401") != -1)
587 {
588 throw new AuthenticationFailedException();
589 } else if(isUserSpecifiedPortAddressSuffix
590 && re.getMessage().contains(
591 FedoraGS3Exception.missingTargetService))
592 {
593 LOG.warn("Failed to connect to Fedora APIA services at given"
594 + " port address:\n" + portSuffix
595 + "\nException: " + re.getMessage());
596 // APIA.describeRepository can throw a remote exception
597 // whereby AXIS says the target service is missing and can't
598 // be invoked (FedoraGS3Exception.missingTargetService)
599 // Don't rethrow this, if AXIS can't find the user-specified
600 // portAddressSuffix, we will try with the default suffix next
601 APIA = null;
602 } else { // if trying default portAddressSuffix or if any other
603 // RemoteException was generated (whose cause is something
604 // other than an authentication failure) rethrow it.
605 throw re;
606 }
607 } catch(Exception e) { // Other Exceptions
608 // Could possibly be a ServiceException when using ServiceLocator
609 if(isUserSpecifiedPortAddressSuffix) {
610 APIA = null; // we won't throw other exceptions yet until
611 // we have tried the default PortAddressSuffix for the baseURL
612 } else {
613 throw new FedoraGS3InitFailureException(e);
614 }
615 }
616 }
617
618 /** Gets all greenstone collections. Searches for greenstone:*-collection.
619 * Method getCollections() defaults to getting only those objects in fedora's
620 * repository whose pids are of the format greenstone:*-collection.
621 * The use of AutoFinder and findObjects is shown in
622 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
623 * The Fedora-APIA's method definition of findObjects is:
624 * <pre>
625 * fedora-types:FieldSearchResult findObjects(
626 * fedora-types:ArrayOfString resultFields,
627 * xsd:nonNegativeInteger maxResults,
628 * fedora-types:FieldSearchQuery query )
629 * </pre>
630 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
631 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
632 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
633 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html>Type definition of 2.2.1 FieldSearchQuery</a>
634 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
635 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
636 *
637 * @return an array of Strings containing the pids of all collections
638 * matching the format greenstone:*-collection.
639 */
640 public String[] getCollections() throws RemoteException
641 {
642 // Available constructors:
643 // FieldSearchQuery(java.util.List conditions)
644 // FieldSearchQuery(java.lang.String terms)
645 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
646 FieldSearchQuery query = new FieldSearchQuery();
647 query.setTerms(queryStr);
648 query.setConditions(null);
649 // we'd like pid and title returned for each object
650 // we pass maxResults=null to get all objects that match
651 // (i.e. all collections)
652 String[] pids = null;
653
654 FieldSearchResult collection = AutoFinder.findObjects(
655 APIA, new String[]{"pid", "title"}, maxresults, query);
656 ObjectFields[] results = collection.getResultList();
657 pids = new String[results.length];
658 for(int i = 0; i < results.length; i++) {
659 pids[i] = results[i].getPid();
660 }
661 return pids;
662 }
663
664 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
665 * top-level documents or document sections - have a DC datastream. This
666 * method returns the content (XML) of the DC datastream as it is stored in
667 * fedora's repository.
668 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
669 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
670 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
671 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
672 * The method signature is:
673 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
674 * where dsID = itemID (look at datastreams page of running fedora instance)
675 * To access the XML content of the MIMETypedObject returned, we use its method
676 * bytes[] getStream(), but when instantiating a String from this, we have to
677 * use the String() contructor where we can specify the charset encoding (in
678 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
679 * @return a String version of the XML in the DC datastream for the fedora
680 * object denoted by pid.
681 * @param pid - the fedora persistent identifier for an item in the fedora
682 * repository.
683 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
684 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
685 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
686 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
687 */
688 public String getDC(String pid)
689 throws RemoteException, UnsupportedEncodingException
690 {
691 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
692 // datastream ID, dsID = itemID, look at a running fedora
693 MIMETypedStream dcStream
694 = APIA.getDatastreamDissemination(pid, DC, null);
695 //asOfDateTime = null to get the current version of the dataStream
696
697 // need to set the charset encoding to UTF8
698 return new String(dcStream.getStream(), UTF8);
699 }
700
701 /** All "greenstone:*" objects in fedora (be they collections be they
702 * collections, top-level documents or document sections) have an EX
703 * datastream. This method returns the content (XML) of the EX datastream as
704 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
705 * @return a String version of the XML in the EX datastream for the fedora
706 * object denoted by pid.
707 * @param pid - the fedora persistent identifier for an item in the fedora
708 * repository.
709 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
710 * asOfDateTime).
711 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
712 * @see String getDC(String pid) throws Exception
713 * */
714 public String getEX(String pid)
715 throws RemoteException, UnsupportedEncodingException
716 {
717 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
718 //asOfDateTime = null to get the current version of the dataStream
719
720 // need to set the charset encoding to UTF8
721 return new String(exStream.getStream(), UTF8);
722 }
723
724 /** Some "greenstone:*" top-level documents in the fedora repository (but not
725 * greenstone collections or document sections) have a DLS metadata datastream.
726 * This method returns the content (XML) of the DLS datastream as is. (It calls
727 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
728 * @return a String version of the XML in the DLS datastream for the fedora
729 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
730 * @param pid - the fedora persistent identifier for an item in the fedora
731 * repository.
732 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
733 * asOfDateTime).
734 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
735 * @see String getDC(String pid) throws Exception
736 * */
737 public String getDLS(String pid)
738 throws RemoteException, UnsupportedEncodingException
739 {
740 MIMETypedStream dlsStream = null;
741 // If there is no DLS datastream, it throws an exception (whose class
742 // fedora.server.errors.DatastreamNotFoundException can't be imported
743 // here (it's not in the client side fedora.server.* package, but on
744 // the server side package of that name):
745 try{
746 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
747 //asOfDateTime=null to get the current version of the dataStream
748 } catch(RemoteException e) {
749 //These two don't work:
750 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
751 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
752
753 if(e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
754 { // there is no DLS data stream for this document
755 return "";
756 }
757 else { // different problem, exception due to different cause
758 throw(e);
759 }
760 }
761 if(dlsStream == null)
762 return "";
763 // need to set the charset encoding to UTF8
764 return new String(dlsStream.getStream(), UTF8);
765 }
766
767 /** All "greenstone:*" objects in fedora (be they collections or documents)
768 * have a TOC datastream. This method returns the content (XML) of the TOC
769 * datastream as is. (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
770 * @return a String version of the XML in the TOC datastream for the fedora
771 * object denoted by pid.
772 * @param pid - the fedora persistent identifier for an item in the fedora
773 * repository.
774 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
775 * asOfDateTime)
776 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
777 * @see String getDC(String pid) throws Exception
778 * */
779 public String getTOC(String pid)
780 throws RemoteException, UnsupportedEncodingException
781 {
782 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
783 //asOfDateTime = null to get the current version of the dataStream
784
785 // need to set the charset encoding to UTF8
786 return new String(tocStream.getStream(), UTF8);
787 }
788
789 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
790 * for the collections indicated by collPIDs.
791 * @param collPIDs - an array of Strings denoting the pids for greenstone
792 * collections stored in the fedora repositoryl. These should be of the
793 * format "greenstone:&lt;collectionName&gt;-collection". */
794 public String[] getCollectionNames(String[] collPIDs) {
795 String[] collNames = new String[collPIDs.length];
796 for(int i = 0; i < collPIDs.length; i++)
797 collNames[i] = getCollectionName(collPIDs[i]);
798 return collNames;
799 }
800
801 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
802 * in the parameter collNames.
803 * @param collNames - a list of names of greenstone collections
804 * stored in the fedora repository. */
805 public String[] getCollectionPIDs(String[] collNames) {
806 String[] collPIDs = new String[collNames.length];
807 for(int i = 0; i < collNames.length; i++)
808 collPIDs[i] = getCollectionName(collNames[i]);
809 return collPIDs;
810 }
811
812 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
813 * denoted by parameter collName.
814 * @param collName - the name of a greenstone collection stored
815 * stored in the fedora repository. */
816 public String getCollectionPID(String collName) {
817 return GREENSTONE_+collName+_COLLECTION;
818 }
819
820 /**
821 * Gets the title of the collection denoted by the given collection's pid by
822 * retrieving the title metadata for it from the collection's EX datastream.
823 * @return the title (in the default language, else English, else the
824 * first title found) for the particular collection denoted by its PID.
825 * @param collPID is the pid of a greenstone collection in the fedora
826 * repository. */
827 public String getCollectionTitle(String collPID)
828 throws RemoteException, UnsupportedEncodingException,
829 SAXException, IOException
830 {
831 String title = null; // has to be null initially, we do a check on it
832 // Parse the EX datastream (XML), and in its DOM, find the
833 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
834 // There might be one OR several of those with attribute
835 // name="collectionname". If there's only one, then get that.
836 // If there are several, there would possibly a be qualifier attribute,
837 // in which case get qualifier=lang (where lang is the member variable)
838 // If there is no qualifier with the requested language, then get the
839 // english one which is likely to be there, else return the title for
840 // the first collectionname .
841
842 MIMETypedStream exdata
843 = APIA.getDatastreamDissemination(collPID, EX, null);
844 String exStream = new String(exdata.getStream(), UTF8);
845
846 InputSource source = new InputSource(new StringReader(exStream));
847 Document doc = builder.parse(source);
848 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
849 NodeList children = docEl.getChildNodes();
850
851 String firstName = "";
852 String englishName = "";
853 for(int i = 0; i < children.getLength(); i++ ) {
854 Node n = children.item(i);
855 if(n.getNodeType() == Node.ELEMENT_NODE) {
856 Element e = (Element)n;
857 if(e.hasAttribute(NAME)
858 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
859 firstName = FedoraCommons.getValue(e);
860 if(!e.hasAttribute(QUALIFIER)) {
861 title = FedoraCommons.getValue(e);
862 break;
863 }
864 else if(e.getAttribute(QUALIFIER).equals(lang)) {
865 title = FedoraCommons.getValue(e);
866 break;
867 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
868 englishName = FedoraCommons.getValue(e);
869 }
870 }
871 }
872 }
873
874 // if the title is still not set to that of the requested language,
875 // then try setting it to the collection name in English. If English
876 // isn't available, then set it to the first collection name provided
877 // (in whichever language).
878 if(title == null) {
879 title = englishName.equals("") ? firstName : englishName;
880 }
881 doc = null;
882 return title;
883 }
884
885 /** @return the collection titles for all the collections indicated by
886 * collPIDs.
887 * @param collPIDs - a list of pids identifying greenstone collections
888 * stored in the fedora repository. */
889 public String[] getCollectionTitles(String[] collPIDs)
890 throws RemoteException, UnsupportedEncodingException,
891 SAXException, IOException
892 {
893 String[] titles = new String[collPIDs.length];
894
895 // parse each EX datastream (XML) which contains the gs3-extracted meta.
896 for(int i = 0; i < collPIDs.length; i++) {
897 titles[i] = getCollectionTitle(collPIDs[i]);
898 }
899 return titles;
900 }
901
902 /** @return the title metadata for the given doc objects of a collection.
903 * These titles are returned in the same order as the given docIDs.
904 * (The docPIDs already contain the collection name anyway.)
905 * @param docPIDs - a list of pids identifying documents stored in the
906 * fedora repository. */
907 public String[] getDocTitles(String[] docPIDs)
908 throws RemoteException, UnsupportedEncodingException,
909 SAXException, IOException
910 {
911 String[] titles = new String[docPIDs.length];
912 for(int i = 0; i < docPIDs.length; i++) {
913 titles[i] = getDocTitle(docPIDs[i]);
914 }
915 return titles;
916 }
917
918 /** Gets the title metadata for a particular doc object in a collection
919 * denoted by docPID. The docPID already contains the collection name.
920 * @return the title for the fedora document item denoted by docPID
921 * @param docPID is the pid of the document in the fedora repository
922 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
923 public String getDocTitle(String docPID)
924 throws RemoteException, UnsupportedEncodingException,
925 SAXException, IOException
926 {
927 // We need the extracted metadata file, and find its
928 // documentElement's child
929 // <ex:metadata name="Title">sometitle</ex:metadata>
930 // where the title we return is sometitle
931
932 String title = "";
933 MIMETypedStream exdata
934 = APIA.getDatastreamDissemination(docPID, EX, null);
935 String exStream = new String(exdata.getStream(), UTF8);
936 return getTitle(exStream);
937 }
938
939 /** Given a string representation of a document's or document section's
940 * EX datastream -- which is a greenstone extracted metadata XML file --
941 * of the form:
942 * &lt;ex&gt;
943 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
944 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
945 * ...
946 * &lt;/ex&gt;
947 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
948 * returns the value embedded in that element ('sometitle' in
949 * the example above).
950 * @return the title metadata of the document/document section whose EX
951 * datastream is passed as parameter
952 * @param exStream the EX datastream in String form of the document or
953 * document section. */
954 protected String getTitle(String exStream)
955 throws SAXException, IOException
956 {
957 String title = "";
958 InputSource source = new InputSource(new StringReader(exStream));
959 Document doc = builder.parse(source);
960 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
961 NodeList children = docEl.getChildNodes();
962
963 // Cycle through all the *element* children of <ex:ex></ex:ex>
964 // which are all of the form:
965 // <ex:metadata name="somename">somevalue</ex:metadata>
966 // Find the one where name="Title", its value is the title
967 for(int i = 0; i < children.getLength(); i++ ) {
968 Node n = children.item(i);
969 if(n.getNodeType() == Node.ELEMENT_NODE) {
970 Element e = (Element)n;
971 if(e.hasAttribute(NAME)
972 && e.getAttribute(NAME).equals(TITLE)) {
973 title = FedoraCommons.getValue(e);
974 break;
975 }
976 }
977 }
978 return title;
979 }
980
981 /** @return the title metadata for the given document sections.
982 * These titles are returned in the same order as the given docPIDs
983 * and associated sectionIDs.
984 * (The docPIDs already contain the collection name anyway.)
985 * @param docPIDs - a list of pids identifying documents stored in the
986 * fedora repository.
987 * @param sectionIDs - a list of sectionIDs identifying individual sections
988 * of documents stored in the fedora repository whose titles are requested. */
989 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
990 throws RemoteException, UnsupportedEncodingException,
991 SAXException, IOException
992 {
993 String[] titles = new String[docPIDs.length];
994 for(int i = 0; i < docPIDs.length; i++) {
995 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
996 }
997 return titles;
998 }
999
1000 /** @return the title metadata for the given document section.
1001 * (The docPID already contain the collection name anyway.)
1002 * @param docPID - a pid identifying a document in the fedora repository.
1003 * @param sectionID - the sectionID of the section of the
1004 * document whose title is requested. */
1005 public String getSectionTitle(String docPID, String sectionID)
1006 throws UnsupportedEncodingException, RemoteException,
1007 SAXException, IOException
1008 {
1009 String ex = this.getSectionEXMetadata(docPID, sectionID);
1010 return getTitle(ex);
1011 }
1012
1013 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1014 * returns the PIDs of the data objects found, with the exception of
1015 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1016 * collection PID.
1017 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1018 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1019 * the collection and not an object of the same collection) are returned.
1020 * All pids that do not map to a collection are assumed to be documents!
1021 * @return a list of the pids of all the (doc) objects in a collection.
1022 * @param colPID is the pid of the greenstone collection stored in
1023 * the fedora repository. */
1024 public String[] getCollectionDocs(String colPID)
1025 throws RemoteException
1026 {
1027 String colName = getCollectionName(colPID);
1028 //LOG.debug("colName: " + colName);
1029
1030 // Search fedora objects for pid=greenstone:<colName>-*
1031 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1032 // searches for "greenstone:"+colName+"-*";
1033 FieldSearchQuery query = new FieldSearchQuery();
1034 query.setTerms(queryStr);
1035 query.setConditions(null);
1036 String[] pids = null;
1037
1038 FieldSearchResult objects = AutoFinder.findObjects(
1039 APIA, new String[]{"pid", "title"}, maxresults, query);
1040 ObjectFields[] results = objects.getResultList();
1041
1042 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1043 // that's not a document object:
1044 pids = new String[results.length-1]; // not storing collection object
1045 int index = 0; // keeps track of docPid index
1046 for(int i = 0; i < results.length; i++) {
1047 // check it's not a collection object
1048 if(!results[i].getPid().endsWith(_COLLECTION)) {
1049 pids[index] = results[i].getPid();
1050 index++;
1051 }
1052 }
1053
1054 return pids;
1055 }
1056
1057 /** Given the pid of a document fedora data object, this method will return
1058 * all itemIDs that are part of that data object and are Sections. For further
1059 * information see interface Comparable (implemented by String), SortedSet
1060 * and TreeSet.
1061 * @return an array of itemIDs of the Sections of the document,
1062 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1063 * @param docPID is a fedora pid identifying a greenstone document object.
1064 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1065 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1066 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1067 */
1068 public String[] getSectionNames(String docPID) throws RemoteException {
1069 // DatastreamDef[] listDatastreams(
1070 // java.lang.String pid, java.lang.String asOfDateTime)
1071
1072 // listDatastreams returns information on each item (including itemID=dsID)
1073 // in the document object indicated by docPID
1074
1075 // Need to give an object version number, because null for asOfDateTime
1076 // does not return any datastreams!
1077 String[] times = APIA.getObjectHistory(docPID);
1078
1079 DatastreamDef[] datastreams = APIA.listDatastreams(
1080 docPID, times[times.length-1]);
1081
1082 // TreeSet is a SortedSet. We're going to put Strings into it,
1083 // and Strings implement interface Comparable already.
1084 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1085 for(int i = 0; i < datastreams.length; i++) {
1086 String itemID = datastreams[i].getID();
1087 if (itemID.startsWith("SECTION"))
1088 orderedList.add(itemID);
1089 }
1090
1091 String[] sectionNames = new String[orderedList.size()];
1092 orderedList.toArray(sectionNames);
1093 orderedList = null;
1094 return sectionNames;
1095 }
1096
1097 /** Given the pid of a document fedora data object, this method will return all
1098 * itemIDs that are part of that data object and are Sections, but just the
1099 * Section numbers are returned. For further information see interface Comparable
1100 * (implemented by String), SortedSet and TreeSet.
1101 * @return an array of itemIDs of the Section numbers of the document
1102 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1103 * @param docPID is a fedora pid identifying a greenstone document object.
1104 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1105 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1106 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1107 */
1108 public String[] getSectionNumbers(String docPID) throws RemoteException {
1109 String[] times = APIA.getObjectHistory(docPID);
1110
1111 DatastreamDef[] datastreams
1112 = APIA.listDatastreams(docPID, times[times.length-1]);
1113 //Vector v = new Vector(datastreams.length);
1114 TreeSet orderedList = new TreeSet();
1115
1116 for(int i = 0; i < datastreams.length; i++) {
1117 String itemID = datastreams[i].getID();
1118 if (itemID.startsWith("SECTION")) {
1119 //int index = SECTION.length();
1120 //itemID = itemID.substring(index);
1121 itemID = removePrefix(itemID, SECTION);
1122 orderedList.add(itemID);
1123 }
1124 }
1125
1126 String[] sectionNumbers = new String[orderedList.size()];
1127 orderedList.toArray(sectionNumbers);
1128 orderedList = null;
1129
1130 return sectionNumbers;
1131 }
1132
1133 /** @return the titles for the document sections denoted by the parameters.
1134 * @param docPID is a fedora pid identifying a greenstone document object.
1135 * @param sectionIDs is a list of identifiers identifying sections in the
1136 * document denoted by docPID, whose titles need to be returned. Each
1137 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1138 * or a section number (eg. 1.5.1). */
1139 public String[] getTitles(String docPID, String[] sectionIDs)
1140 throws RemoteException, UnsupportedEncodingException,
1141 SAXException, IOException
1142 {
1143 String[] titles = new String[sectionIDs.length];
1144 for(int i = 0; i < titles.length; i++)
1145 titles[i] = getTitle(docPID, sectionIDs[i]);
1146 return titles;
1147 }
1148
1149 /** @return the title for the document section denoted by the parameters.
1150 * @param docPID is a fedora pid identifying a greenstone document object.
1151 * @param sectionID identifies the particular section in the document denoted
1152 * by docPID, whose title needs to be returned. The sectionID may be either a
1153 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1154 public String getTitle(String docPID, String sectionID)
1155 throws RemoteException, UnsupportedEncodingException,
1156 SAXException, IOException
1157 {
1158 // Compose the itemID for the EX data stream from the number in the
1159 // sectionID:
1160 String exID = removePrefix(sectionID, SECTION);
1161 exID = EX+convertToMetaNumber(exID);
1162
1163 // Retrieve the extracted metadata stream (EX, in XML) for the given
1164 // section
1165 String exStream = getItem(docPID, exID);
1166
1167 // Extract the title from the XML, look for:
1168 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1169 InputSource source = new InputSource(new StringReader(exStream));
1170 Document doc = builder.parse(source);
1171 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1172 NodeList children = docEl.getElementsByTagName(
1173 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1174 for(int i = 0; i < children.getLength(); i++) {
1175 Element e = (Element)children.item(i);
1176 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1177 return FedoraCommons.getValue(e); // extract and return the title
1178 }
1179 return ""; // if we got here, then we couldn't find a title
1180 }
1181
1182 /** @return the section's XML (as a String) as it is stored in fedora.
1183 * Works out if sectionID is a sectionName or sectionNumber.
1184 * @param docPID - a fedora pid identifying a greenstone document object.
1185 * @param sectionID - identifyies the particular section in the
1186 * document denoted by docPID, may be a section name or number. */
1187 public String getSection(String docPID, String sectionID)
1188 throws RemoteException, UnsupportedEncodingException
1189 {
1190 if(!sectionID.startsWith(SECTION)) // then it has only section number
1191 sectionID = SECTION+sectionID;
1192
1193 String sectionXML = this.getItem(docPID, sectionID);
1194 return sectionXML;
1195 }
1196
1197 /** @return the required section's DC metadata XML datastream.
1198 * @param docPID - a fedora pid identifying a greenstone document object.
1199 * @param sectionID - identifyies the particular section in the
1200 * document denoted by docPID, may be a section name or number. */
1201 public String getSectionDCMetadata(String docPID, String sectionID)
1202 throws RemoteException, UnsupportedEncodingException
1203 {
1204 String dcID = removePrefix(sectionID, SECTION);
1205 // ensure we have just the section number
1206 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1207
1208 // now get the DC datastream for that number
1209 String dcXML = this.getItem(docPID, dcID);
1210 return dcXML;
1211 }
1212
1213 /** Returns the section EX metadata XML datastream for SectionID which may be
1214 * a section name or number. Currently a few EX files are named awkwardly:
1215 * the EX file for section 1.* is actually associated with datastream EX.*.
1216 * But subsequent EX datastreams are named appropriately: for instance,
1217 * EX2.1.1 matches with section 2.1.1
1218 * @return the required section's EX metadata XML datastream.
1219 * @param docPID - a fedora pid identifying a greenstone document object.
1220 * @param sectionID - identifyies the particular section in the
1221 * document denoted by docPID, may be a section name or number. */
1222 public String getSectionEXMetadata(String docPID, String sectionID)
1223 throws RemoteException, UnsupportedEncodingException
1224 {
1225 String exID = removePrefix(sectionID, SECTION);
1226 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1227
1228 // now get the EX datastream for that for number
1229 String exXML = this.getItem(docPID, exID);
1230 return exXML;
1231 }
1232
1233 /** @return the XML content of the TOC of just that portion of the TOC which
1234 * contains the section denoted by sectionID and its direct child subsections.
1235 * The children are returned in the order they are encountered, which
1236 * happens to be in the required order of ascending sectionID.
1237 * @param docPID - a fedora pid identifying a greenstone document object.
1238 * @param sectionID - identifyies the particular section in the
1239 * document denoted by docPID, may be a section name or number. */
1240 public Element getChildrenOfSectionXML(String docPID, String sectionID)
1241 throws RemoteException, UnsupportedEncodingException,
1242 SAXException, IOException
1243 {
1244 // Store just the number
1245 String sectionNumber = removePrefix(sectionID, SECTION);
1246 // get the TOC XML datastream as a String
1247 String xmlTOC = getTOC(docPID);
1248
1249 // convert it into a DOM document
1250 InputSource source = new InputSource(new StringReader(xmlTOC));
1251 Document doc = builder.parse(source);
1252 // toplevel element docEl = <Section id="1"></Section>
1253 Element docEl = doc.getDocumentElement();
1254
1255 // check whether we're requested to return the toplevel element itself
1256 if(sectionID.equals("") || // subSection of entire docPID is requested
1257 (docEl.hasAttribute(ID) && docEl.getAttribute(ID).equals(sectionNumber)))
1258 return getSubstructure(docEl, false);
1259
1260 // Otherwise, get all <Section> elements and find the
1261 // <Section id="sectionNumber"></Section> and return that and its
1262 // children
1263 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1264 for(int i = 0; i < sections.getLength(); i++) {
1265 Element e = (Element)sections.item(i);
1266 if(e.hasAttribute(ID)
1267 && e.getAttribute(ID).equals(sectionNumber))
1268 {
1269 //System.err.println("Found: " + e.getAttribute(ID));
1270 return getSubstructure(e, false); // false: get just e and children
1271 }
1272 }
1273 return null; // not found
1274 }
1275
1276 /** @return a string representing the XML content of the TOC of just
1277 * that portion of the TOC which contains the section denoted by sectionID
1278 * and its direct child subsections.
1279 * The children are returned in the order they are encountered, which
1280 * happens to be in the required order of ascending sectionID.
1281 * @param docPID - a fedora pid identifying a greenstone document object.
1282 * @param sectionID - identifyies the particular section in the
1283 * document denoted by docPID, may be a section name or number. */
1284 public String getChildrenOfSection(String docPID, String sectionID)
1285 throws RemoteException, UnsupportedEncodingException,
1286 SAXException, IOException, TransformerException
1287 {
1288 Element children = getChildrenOfSectionXML(docPID, sectionID);
1289 return (children == null) ? "" : FedoraCommons.elementToString(children);
1290 }
1291
1292 /** @return the part of the TOC XML file (which outlines doc structure)
1293 * relating to the given section. This includes the section denoted by
1294 * sectionID as well as all descendent subsections thereof.
1295 * @param docPID - a fedora pid identifying a greenstone document object.
1296 * @param sectionID - identifyies the particular section in the
1297 * document denoted by docPID, may be a section name or number. */
1298 public Element getSubsectionXML(String docPID, String sectionID)
1299 throws RemoteException, UnsupportedEncodingException,
1300 SAXException, IOException
1301 {
1302 // get the TableOfContents (TOC) XML datastream as a String
1303 String xmlTOC = getTOC(docPID);
1304
1305 // convert it into a DOM document
1306 InputSource source = new InputSource(new StringReader(xmlTOC));
1307 Document doc = builder.parse(source);
1308 // toplevel element docEl = <Section id="1"></Section>
1309 Element docEl = doc.getDocumentElement();
1310
1311 if(sectionID.equals("")) // subSection of entire docPID is requested
1312 return docEl;
1313
1314 // Store just the number
1315 String sectionNumber = removePrefix(sectionID, SECTION);
1316 // Check whether we're requested to return the toplevel element itself
1317 // If sectionNumber=1, then the top-level element/document element
1318 // of the TOC XML is requested, so return the TOC as is.
1319 if(sectionNumber.equals("1"))
1320 return docEl;
1321
1322 // Get all <Section> elements and find the
1323 // <Section id="sectionNumber"></Section> and return that
1324 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1325 for(int i = 0; i < sections.getLength(); i++) {
1326 Element e = (Element)sections.item(i);
1327 if(e.hasAttribute(ID)
1328 && e.getAttribute(ID).equals(sectionNumber)) {
1329 //System.err.println("Found: " + e.getAttribute(ID));
1330 return getSubstructure(e, true); // true:get all descendents
1331 }
1332 }
1333 return null; // not found
1334 }
1335
1336 /** @return a String representation of the part of the TOC XML file
1337 * (which outlines doc structure) relating to the given section. This
1338 * includes the section denoted by sectionID as well as all descendent
1339 * subsections thereof.
1340 * @param docPID a fedora pid identifying a greenstone document object.
1341 * @param sectionID identifyies the particular section in the
1342 * document denoted by docPID, may be a section name or number. */
1343 public String getSubsection(String docPID, String sectionID)
1344 throws RemoteException, UnsupportedEncodingException, SAXException,
1345 IOException, TransformerException
1346 {
1347 // Store just the number
1348 String sectionNumber = removePrefix(sectionID, SECTION);
1349 // get the TableOfContents (TOC) XML datastream as a String
1350 String xmlTOC = getTOC(docPID);
1351
1352 // Check whether we're requested to return the toplevel element itself
1353 // If sectionNumber=1, then the top-level element/document element
1354 // of the TOC XML is requested, so return the TOC as is.
1355 if(sectionNumber.equals("1"))
1356 return xmlTOC;
1357
1358 // else
1359 Element subsection = getSubsectionXML(docPID, sectionID);
1360 return (subsection == null) ? "" : FedoraCommons.elementToString(subsection);
1361 }
1362
1363 /** Implements browsing document titles of a greenstone collection stored in
1364 * the fedora repository by letter.
1365 * @return the document pids whose titles start with the given letter.
1366 * @param letter - the starting letter to browse by.
1367 */
1368 public String[] browseTitlesByLetter(final String collName, final String letter)
1369 throws RemoteException, FedoraVersionNotSupportedException
1370 {
1371 String[] pids = null;
1372
1373 // We want to do the following kind of search (assuming letter=f
1374 // and collName=demo):
1375 // pid~greenstone:demo* title~f*
1376
1377 // We don't need to normalise the letter first (to search titles starting
1378 // with both uppercase and lowercase versions of the letter), because
1379 // Fedora always searches for both.
1380 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1381 // ANYWHERE in their titles!
1382 // SOLUTION: search the collection for all titles containing f as given,
1383 // retrieving pid and title fields. Then from the list of results, select
1384 // only those titles that start with the given letter.
1385 // This may seem an unnecessarily cumbersome job (when it looked like it
1386 // should have worked with just title~f*), BUT, at least the resulting
1387 // documents will be reduced to a set of titles containing f; rather than
1388 // having to search *all* documents in the collection.
1389 final String title = letter+WILDCARD;
1390
1391 FieldSearchResult objects = findObjectsWithTitlesContaining(
1392 collName, title);
1393 ObjectFields[] results = objects.getResultList();
1394 TreeSet v = new TreeSet(); // TreeSet to return the results in
1395 //alphabetical order
1396 for(int i = 0; i < results.length; i++) {
1397 // from the result list, select those titles that don't
1398 // just *contain* the letter, but actually start with it:
1399 String resultTitle = results[i].getTitle(0);
1400 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1401 String pid = results[i].getPid();
1402 // skip the collection object itself
1403 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1404 v.add(pid);
1405 //LOG.debug(resultTitle);
1406 }
1407 }
1408 }
1409 pids = new String[v.size()];
1410 v.toArray(pids);
1411 return pids;
1412 }
1413
1414 /** Implements querying document DC titles of a greenstone collection stored in
1415 * the fedora repository for a term that may occur anywhere in their titles.
1416 * @return the document pids whose DC titles contain the parameter term.
1417 * @param titleContents - the word or phrase to search the collection's
1418 * document titles for. Only one word, and this method finds Greenstone
1419 * DOCUMENT titles CONTAINING that word (if any).
1420 * @param startsWith - if true, searches for titles that start with
1421 * titleContents. Else it searches for titles that contain titleContents. */
1422 public String[] searchDocumentTitles(String collName, String titleContents,
1423 boolean startsWith)
1424 throws RemoteException, FedoraVersionNotSupportedException
1425 {
1426 String[] pids = null;
1427
1428 // We want to do the following kind of search (when written in Fedora's
1429 // REST format - see http://localhost:8080/fedora/search):
1430 // pid~greenstone:<colname>* title~<1st word of titleContents>
1431
1432 // We don't need to normalise the word first (to search titles starting
1433 // with both uppercase and lowercase versions of it), because
1434 // Fedora always searches for the normalised word.
1435
1436 // 2 difficulties:
1437 // - We can only search for single words with Fedora's Conditional Search.
1438 // Obtain pids and titles of documents containing the first word and then
1439 // we filter the titles to those containing the entire phrase of
1440 // titleContents.
1441 // - Searching for title~FirstWord returns all documents containing
1442 // this word ANYWHERE in their titles. If parameter startsWith is false,
1443 // then this is fine. But if parameter startsWith is true, then go
1444 // through all the resulting titles found (containing FirstWord), select
1445 // only pids of those titles that contain the entire phrase titleContents
1446
1447 final String pid = GREENSTONE_+collName+WILDCARD;
1448
1449 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1450 // if titleContents is a phrase (contains space), then it's not
1451 // a single word, in which case search for just the first word
1452 String title = titleContents; // assume it's a single word
1453 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1454 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1455
1456 FieldSearchResult objects = findObjectsWithTitlesContaining(
1457 collName, title);
1458 if(objects == null) {
1459 final String[] empty = {};
1460 return empty;
1461 }
1462
1463 // Go through all the titles found and for those that match the criteria*,
1464 // store their pid. *Criteria: titles that start with OR contain the
1465 // word OR phrase of titleContents.
1466 ObjectFields[] results = objects.getResultList();
1467 Vector v = new Vector(); // return pids in the order found
1468 for(int i = 0; i < results.length; i++) {
1469 // from the result list, select those titles that don't
1470 // just *contain* the first word, but the entire phrase of
1471 // words in titleContents:
1472 String resultTitle = results[i].getTitle(0);
1473 boolean accepted = false; // accept the resultTitle found
1474
1475 String resultPID = results[i].getPid();
1476 // skip the collection object itself, since it's not a document
1477 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1478 accepted = false;
1479 }
1480 // if titleContents is a single word and we are checking
1481 // whether resultTitle contains titleContents:
1482 else if(indexOfFirstSpace == -1) { // titleContents is a single word
1483 if(!startsWith) // titles that *contain* the word titleContents
1484 accepted = true; //accept all titles found
1485 // else startWith: accept titles starting with word titleContents
1486 else if (resultTitle.toLowerCase().startsWith(
1487 titleContents.toLowerCase()))
1488 accepted = true;
1489
1490 }
1491 else { // otherwise, titleContents is a phrase of >1 word, need
1492 // to check that the result title contains the entire phrase
1493 if(startsWith && resultTitle.toLowerCase().startsWith(
1494 titleContents.toLowerCase()))
1495 accepted = true;
1496 else if(!startsWith && resultTitle.toLowerCase().contains(
1497 titleContents.toLowerCase()))
1498 accepted = true;
1499 }
1500
1501 // if the resultTitle fit the criteria, store its pid
1502 if(accepted) {
1503 v.add(resultPID);
1504 //System.out.println(resultTitle);
1505 }
1506
1507 }
1508 pids = new String[v.size()];
1509 v.toArray(pids);
1510 return pids;
1511 }
1512
1513
1514 /**
1515 * @param collName - the collection of documents we'll be searching in.
1516 * @param titleWord - the word we'll be searching the document titles for.
1517 * (Fedora's search returns all objects whose title contains that word).
1518 *
1519 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1520 * (see link):
1521 * <pre>
1522 * "There are two search methods: a search on all fields or a search on
1523 * specific fields. To search all fields the setTerms function of the
1524 * FieldSearchQuery must be used, with the paramater being the desired string.
1525 *
1526 * To search by specific fields, you must create an array of Condition
1527 * objects. Each condition consists of three parts:
1528 * the field to be searched (.setProperty()),
1529 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1530 * and the search string (.setValue())"
1531 * </pre>
1532 * We want to use the second search method above when browsing and searching,
1533 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1534 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1535 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1536 *
1537 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1538 * web services are defined. (The web.xml defines the "Servlets for REST-based
1539 * interfaces to the Fedora Repository Server").
1540 * Do a search on the word "search":
1541 * fedora.server.access.FieldSearchServlet is the class we need to look at
1542 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1543 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1544 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1545 * is what's used in the fedora client application that makes use of
1546 * the SOAP-based interface.
1547 *
1548 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1549 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1550 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1551 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1552 */
1553 protected FieldSearchResult findObjectsWithTitlesContaining(
1554 String collName, final String titleWord)
1555 throws RemoteException, FedoraVersionNotSupportedException
1556 {
1557 // Searching for pids of the form "greenstone:gs2mgdemo*";
1558 final String pid = GREENSTONE_+collName+WILDCARD;
1559
1560 Condition[] conditions = new Condition[2];
1561 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1562 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1563
1564 FieldSearchQuery query = new FieldSearchQuery();
1565 query.setConditions(conditions);
1566
1567 // We'd like pid and title returned for each object, because we'll make
1568 // use of title. We pass maxResults=null to get all objects that match
1569 // (i.e. all collections).
1570 FieldSearchResult objects = null;
1571 final String[] retrieveFields = {"pid", "title"};
1572 try {
1573 objects = AutoFinder.findObjects(
1574 APIA, retrieveFields, maxresults, query);
1575 // collection = APIA.findObjects(new String[]{"pid", "title"},
1576 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1577 } catch(RemoteException ex) {
1578 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1579 // fedoraVersion is too low, searching/browsing is not possible
1580 // (because class Condition has changed after 2.0, from 2.1.1
1581 // onwards)
1582 throw new FedoraVersionNotSupportedException(fedoraVersion);
1583 } else {
1584 LOG.error(
1585 "Remote exception when calling web service operation " +
1586 "findObject() to execute search:\n" + ex.getMessage());
1587 ex.printStackTrace();
1588 throw ex;
1589 }
1590 }
1591 return objects; // return the FieldSearchResult objects found
1592 }
1593
1594 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1595 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1596 * @param docPID - pid of a greenstone document in the fedora repository. */
1597 public String getDocName(String docPID) {
1598 return docPID.substring(docPID.indexOf('-')+1);
1599 }
1600
1601 /** @return the &lt;name&gt; in the parameter collPID
1602 * (greenstone:&lt;name&gt;-collection)
1603 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1604 * in the docPID (greenstone:&lt;name&gt;-docID).
1605 * @param collPID - pid of a greenstone collection in the fedora repository. */
1606 public String getCollectionName(String collPID) {
1607 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1608 }
1609
1610 /** Convert the given Element to a String representing the same XML.
1611 * @return an element containing a copy element e with either only its child
1612 * elements or with all its descendents (depending on whether parameter
1613 * descendents is true or false).
1614 * @param e - the element to start copying from.
1615 * @param descendents - if true, e is copied with all its descendetns into the
1616 * element that's returned. If false, only e and its direct children are copied
1617 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1618 */
1619 protected Element getSubstructure(Element e, boolean descendents)
1620 {
1621 Document doc = builder.newDocument();
1622 Node n = doc.importNode(e, descendents);
1623 // descendents=true: import/copy descendents.
1624 // Else, copy just current node e (later copy its direct children)
1625 doc.appendChild(n); // need to put the copied node into a document
1626 // else it won't have a parent doc (DOMSource can't work with it
1627 // without it having a document parent).
1628
1629 // if we are not recursively copying all descendents, then copy just
1630 // the childnodes:
1631 if(!descendents) { // then copy just the children
1632 // get e's children and copy them into the new document
1633 NodeList children = e.getChildNodes();
1634 for(int i = 0; i < children.getLength(); i++) {
1635 // create copy
1636 n = doc.importNode(children.item(i), false);
1637 // attach it to parent
1638 doc.getDocumentElement().appendChild(n);
1639
1640 // Now we need to indicate whether this new node (child) is a leaf
1641 // or not. (This is necessary for getChildrenOfSection(), else
1642 // it's hard to know if the children are leaves or have further
1643 // subsections.
1644 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1645 // we're dealing only with section children
1646
1647 // Check if the matching original had children:
1648 Element originalsChild = (Element)children.item(i);
1649 NodeList grandchildren =
1650 originalsChild.getElementsByTagName(SECTION_ELEMENT);
1651 if(grandchildren.getLength() > 0) {
1652 // original's child has children, so indicate this
1653 // in the copied child:
1654 Element child = (Element)n;
1655 child.setAttribute(TYPE, INTERNAL_NODE);
1656 }
1657 }
1658 }
1659 }
1660 return doc.getDocumentElement();
1661 }
1662
1663
1664 /**
1665 * Return a datastream of a document, given the document's id
1666 * and the item id of the datastream which is to be retrieved.
1667 * @return the XML (in String form) of the item denoted by itemID
1668 * that's part of the fedora data object denoted by docPID.
1669 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1670 * Can't retrieve images denoted by itemID using this method, only items
1671 * that are of XML format.
1672 * @param docPID - pid of a greenstone document in the fedora repository.
1673 * @param itemID - the itemID of a datastream of the fedora object
1674 * identified by docPID.
1675 */
1676 protected String getItem(String docPID, String itemID)
1677 throws RemoteException, UnsupportedEncodingException
1678 {
1679 // MIMETypedStream getDatastreamDissemination(
1680 // String pid, String dsID, asOfDateTime)
1681 MIMETypedStream datastream
1682 = APIA.getDatastreamDissemination(docPID, itemID, null);
1683 return new String(datastream.getStream(), UTF8);
1684 }
1685
1686 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1687 * returns "1.2.1".
1688 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1689 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1690 * However, the string str is returned unchanged if the prefix does not occur
1691 * at the start of str.
1692 * @return the String parameter str without the prefix.
1693 * It can be used to return the number of an itemID of a greenstone document
1694 * stored in the fedora repository without the given prefix.
1695 * @param prefix - the prefix which ought to be removed from the itemID.
1696 * @param str - the value of the itemID.
1697 */
1698 protected String removePrefix(String str, String prefix) {
1699 // do nothing in those cases where the prefix is not in param str
1700 if(!str.startsWith(prefix))
1701 return str;
1702 // otherwise:
1703 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1704 return "1" + str.substring(prefix.length());
1705 } else {
1706 return str.substring(prefix.length());
1707 }
1708 }
1709
1710 /** Given a number of the form x(.y.z), this method returns this number
1711 * as is, except when x = 1, in which case, it would return .y.z
1712 * That is, given number=3.2.1, this method would return 3.2.1
1713 * But, given number=1.2.3, this method would return .2.3.
1714 * When number=1, it is NOT a special case: "" is returned as explained.
1715 * @param number - a proper (fedora-greenstone document) section number
1716 * @return the same number as it ought to be for the associated EX, DC datastreama.
1717 */
1718 protected String convertToMetaNumber(String number) {
1719 if(number.startsWith("1.") || number.equals("1"))
1720 return number.substring(1); // remove the first char: the initial '1'
1721 else return number;
1722 }
1723
1724 /** @return fedora's baseURL. It's of the form
1725 * "http://localhost:8080/fedora" */
1726 public String getBaseURL() { return baseURL; }
1727
1728 /** @return the portAddressURL (in use) of the Fedora APIA
1729 * web service (should be the endpoint location in the APIA's
1730 * WSDL file).
1731 * It's usually of the form baseURL+"/services/access" */
1732 public String getPortAddressURL() {
1733 return this.baseURL + this.portAddressSuffix;
1734 }
1735
1736 /** @return the baseURL for gsdlAssocFiles */
1737 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1738
1739 public static void main(String args[]) {
1740 try {
1741 FedoraConnection fedoraCon
1742 = new FedoraConnection(new File("fedoraGS3.properties"));
1743
1744 String[] pids = null;
1745 pids = fedoraCon.getCollections();
1746 String[] titles = fedoraCon.getCollectionTitles(pids);
1747 for(int i = 0; i < pids.length; i++) {
1748 System.out.println("extracted title:" + titles[i]);
1749 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1750 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1751 for(int j = 0; j < docPIDs.length; j++) {
1752 System.out.println("\tExtr doc title: " + docTitles[j]);
1753 }
1754 }
1755
1756 String PID = "greenstone:gs2mgdemo-collection";
1757 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1758 String dcXML = fedoraCon.getDC(PID);
1759 String exXML = fedoraCon.getEX(PID);
1760 String tocXML = fedoraCon.getTOC(docPID);
1761 System.out.println("Dublin Core Metadata for " + PID
1762 + " is:\n" + dcXML);
1763 System.out.println("GS3 extracted metadata for " + PID
1764 + " is:\n" + exXML);
1765 System.out.println("Table of Contents for " + docPID
1766 + " is:\n" + tocXML);
1767
1768
1769 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1770 System.out.println("\nSection names for " + docPID + " are:");
1771 for(int i = 0; i < sectionNames.length; i++)
1772 System.out.println(sectionNames[i]);
1773
1774 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1775 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1776 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1777 System.out.println("\nSection numbers for " + docPID + " are:");
1778 for(int i = 0; i < sectionNumbers.length; i++) {
1779 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1780 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1781 }
1782
1783 String sectionID = "SECTION1"; //SECTION1.5
1784 System.out.println("\n");
1785 System.out.println(sectionID+ " - entire subsection:\n"
1786 + fedoraCon.getSubsection(docPID, sectionID));
1787
1788 System.out.println(sectionID + " and children:\n"
1789 + fedoraCon.getChildrenOfSection(docPID, sectionID));
1790
1791 System.out.println(
1792 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1793 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1794 for(int i = 0; i < pids.length; i++)
1795 System.out.println(pids[i]);
1796
1797 System.out.println(
1798 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1799 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1800 for(int i = 0; i < pids.length; i++)
1801 System.out.println(pids[i]);
1802
1803 System.out.println("\nDone - exiting.");
1804 System.exit(0);
1805 } catch(RemoteException re) {
1806 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1807 re.printStackTrace();
1808 } catch(Exception e) {
1809 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1810 e.printStackTrace();
1811 //LOG.error("Unable to instantiate FedoraConnection\n" + e);
1812 }
1813 }
1814}
Note: See TracBrowser for help on using the repository browser.