source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraConnection.java@ 26286

Last change on this file since 26286 was 26286, checked in by ak19, 12 years ago

GSearch works again with the latest version of Fedora and FedoraGSearch (3.6.1 and 2.5 respectively).

File size: 86.3 KB
RevLine 
[15222]1/**
2 *#########################################################################
3 * FedoraConnection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
[26270]22import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
[22300]23import org.greenstone.gsdl3.util.GSXML;
[15222]24
25import fedora.client.utility.AutoFinder;
26import fedora.server.access.FedoraAPIAServiceLocator;
27// The object for accessing FedoraAPI-A web services:
28import fedora.server.access.FedoraAPIA;
29
30// The definitions for all complex fedora types:
31import fedora.server.types.gen.MIMETypedStream;
32import fedora.server.types.gen.RepositoryInfo;
33import fedora.server.types.gen.FieldSearchResult;
34import fedora.server.types.gen.FieldSearchQuery;
35import fedora.server.types.gen.DatastreamDef;
36import fedora.server.types.gen.ObjectFields;
37import fedora.server.types.gen.Condition;
38import fedora.server.types.gen.ComparisonOperator;
39//import fedora.server.types.gen.*;
40
41import javax.net.ssl.SSLHandshakeException;
42import java.net.ConnectException;
43import org.xml.sax.SAXException;
44import java.io.UnsupportedEncodingException;
45import java.io.IOException;
46import javax.xml.parsers.ParserConfigurationException;
47import java.net.MalformedURLException;
48import java.rmi.RemoteException;
49
50import java.io.StringReader;
51import java.io.FileInputStream;
52import java.io.File;
53import java.util.TreeSet;
54import java.util.Properties;
55import java.util.Vector;
56
57import java.awt.GridLayout;
58import javax.swing.JLabel;
59import javax.swing.JOptionPane;
60import javax.swing.JPanel;
61import javax.swing.JPasswordField;
62import javax.swing.JTextField;
63
64import org.apache.log4j.Logger;
65import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
66import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
67import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3InitFailureException;
68import org.greenstone.fedora.services.FedoraGS3Exception.FedoraVersionNotSupportedException;
69import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
70
71import javax.xml.parsers.DocumentBuilderFactory;
72import javax.xml.parsers.DocumentBuilder;
73import javax.xml.transform.*;
74
75import org.xml.sax.InputSource;
76import org.w3c.dom.Document;
77import org.w3c.dom.Element;
78import org.w3c.dom.NodeList;
79import org.w3c.dom.Node;
80
81/** Class that establishes a connection with Fedora's web services (via
82 * Java stub classes for the same) and then provides methods to retrieve
83 * Greenstone-specific data, such as the TOC, EX, DC,and Section
84 * datastreams of the Greenstone documents stored in Fedora's repository.
85 * These datastreams are returned as Strings without any changes being
86 * made to them.
87 * @author ak19
88*/
89public class FedoraConnection implements FedoraGS3DL {
90 /** The logging instance for this class */
91 private static final Logger LOG = Logger.getLogger(
92 FedoraConnection.class.getName());
93
94 /** The version of fedora that is supported by class FedoraConnection */
[22368]95 protected static final String SUPPORTED_VERSION = "3.3"; //"2.2.1";
96 // 3.3 works with genericSearch version 2.2
[26286]97 // 3.6.1 works with genericSearch version 2.5
[22368]98
[15222]99 /* Some fixed strings of known literals */
[22368]100 protected static final String GET = "/get/";
[15222]101
102 // The DemoSOAPClient declares and uses the following as a static member
103 // Probably none of the APIA methods (web service methods) remembers
104 // state, that might explain why we can use it as a static member then.
105 /** The object used to access the Fedora API-A web service methods */
[22368]106 protected FedoraAPIA APIA;
[15222]107
108 /** Version of the running fedora server */
109 protected String fedoraVersion;
[22368]110 /** The location of the fedora server, usually of the form: protocol://host:port/fedora
111 * e.g. (and default) http://localhost:8080/fedora */
[15222]112 protected String baseURL;
113
114 /** The user-specified portAddressSuffix of the Fedora Access web services
115 * (endpoint URL in the WSDL), usually of the form
116 * http://localhost:8080/fedora/services/access
117 * Users can tell FedoraGS3 to try accessing that first by setting
118 * the "port.address.suffix" property in the properties file.
119 * FedoraGS3 itself will not write the portAddressSuffix currently used in
120 * the file for next time, but leave whatever value was entered in the
121 * properties file. The portAddress--not just suffix--currently in use (once
122 * the FedoraAPIA handle has been instantiated) can be obtained through
123 * getPortAddressURL() method. */
124 protected String portAddressSuffix;
125
[22368]126 /** The part of the portAddress that comes after the baseURL. By default and
127 * usually this is: "/services/access" */
[15222]128 protected static final String defaultPortAddressSuffix = "/services/access";
129
[22368]130 /** The preferred language of the display content */
[15222]131 protected String lang;
132 /** The maximum number of collections to retrieve */
133 protected int maxresults;
134 /** DocumentBuilder used to create and parse XML documents */
135 protected DocumentBuilder builder;
136
137 /** Static method that returns the version of Fedora supported by this
138 * class FedoraConnection. */
139 public static String getSupportedVersion() { return SUPPORTED_VERSION; }
140 /** The version of the running Fedora server, which may or may not
141 * match the supported version. */
142 public String getFedoraVersion() { return fedoraVersion; }
143
144 /** @return the default language used to query for titles (and anything else
145 * where there are multiple language options). Upon initialisation, this
146 * defaults to English. */
147 public String getLanguage() { return lang; }
148
149 /** Sets the the default language used to query for titles (and anything else
150 * where there are multiple language options). If the default language for any
151 * query is not available, then English ("en") is used. If that's not available
152 * then the first other available language is used.
153 * @param lang - the two-letter language code to set the default language to.
154 */
155 public void setLanguage(String lang) { this.lang = lang; }
156
157 /** The default maximum number of search results returned for a search. Upon
158 * initialisation, this defaults to Java's Integer.MAX_VALUE. */
159 public int getMaxResults() { return maxresults; }
160
161 /** Set the default maximum number of search results returned for a search.
162 * @param maxresults - the new default maximum number of search results to
163 * be returned. */
164 public void setMaxResults(int maxresults) { this.maxresults = maxresults; }
165
166 /** Code for this constructor is from DemoSOAPClient.java.
167 * Instantiates the APIA handle using the protocol, host, port, fedora
168 * server repository username and password.
169 * @param host - the fedora server host (may be prefixed with http:// or
170 * https:// if parameter protocol is empty). If there's no protocol, and
171 * no protocol prefixed to the host, then the protocol defaults to http.
172 * @param protocol - either http or https (or empty "")
173 * @param port - the port on which fedora is running.
174 * @param fedoraServerUsername - the administrator username required to
175 * access the fedora server's repository. ("fedoraAdmin" unless changed).
176 * @param fedoraServerPassword - the fedora server repository's
177 * administrator password. If none was set on fedora installation, this
178 * can be empty (""). */
179 public FedoraConnection(String protocol, String host, int port,
180 String fedoraServerUsername, String fedoraServerPassword)
181 throws ParserConfigurationException, MalformedURLException,
182 SSLHandshakeException, RemoteException, AuthenticationFailedException,
183 NotAFedoraServerException, ConnectException, Exception
184 {
185 try {
[21855]186 this.portAddressSuffix = "";
[15222]187 init(protocol, host, Integer.toString(port),
188 fedoraServerUsername, fedoraServerPassword);
189 } /*catch(RemoteException re) { //subclass of IOException
190 throw re;
191 } catch(SSLHandshakeException ssle) { //subclass of IOException
192 // this is also of type IOException
193 throw ssle;
194 }*/ catch(IOException ioe) { // connected to the wrong server
195 String exceptMsg = ioe.getMessage().toLowerCase();
196 if(exceptMsg.indexOf("request failed") != -1
197 || exceptMsg.indexOf("404") != -1)
198 throw new NotAFedoraServerException();
199 else // the IOException is not due the cause we thought it was, so
200 throw ioe; // rethrow whatever other IOException was caught (which
201 // could have been RemoteException or SSLHandshakeException
202 // or some other cause)
203 }
204 }
205
206 /** Default constructor which takes input from the user to get host, port,
207 * fedora username and password.
208 * It keeps looping to display authentication popup, until valid values are
209 * entered:
210 * (a) if password is wrong, a RemoteException is thrown and popup reappears;
211 * This popup keeps appearing until the password and username are correct (as
212 * long as there's indeed a fedora server listening at the given host and port).
213 * (b) SSLHandshakeException occurs: this happens EITHER when the user prefixed
214 * the 'https' protocol to the host string when it should have been 'http';
215 * OR the ssl connection failed for some other reason.
216 * Allowing for the 1st case, the authentication popup is displayed just once
217 * more. On the second (consec) attempt, the SSLHandshakeException is rethrown.
218 * NOTE: if a fedora server at the protocol (https or http) isn't accessible,
219 * it takes a long time for the SSLHandshakeException to be thrown.
220 * (c) if the connection is refused, then a ConnectException is thrown.
221 * In that case, it's
222 * EITHER because the host and port values that were entered are wrong (and
223 * the authentication popup dialog is redisplayed just once more allowing
224 * the user to correct host/port values)
225 * OR the entered host and part were right but the fedora server at this
226 * host and port is not running.
227 * On the second consecutive attempt where a ConnectionException is thrown,
228 * it's no longer processed but rethrown, as there's no use in redisplaying
229 * the authentication popup when the problem is not an authentication issue.
230 * (d) Another IOException (other than the SSLHandshakeException of (b))
231 * occurs when there is indeed a server listening at the host and port
232 * entered, but it's not a Fedora server, because it is unable to process
233 * Fedora requests. If the expected message is found in the exception, than
234 * the authentication popup is displayed. However, other causes for an
235 * IOException are not handled. In such cases, the IOException is rethrown.
236 * (Note that IOException is not in the throws clause - other causes for
237 * it being unknown, it can be be considered as the more generic Exception.
238 */
239 public FedoraConnection()
240 throws ParserConfigurationException, MalformedURLException,
241 CancelledException, ConnectException, RemoteException,
242 SSLHandshakeException, Exception
243 {
244 Properties properties = new Properties();
245 // loop to display fedora server authentication popup to
246 // get user input
247 setInitialisationProperties(properties);
[21835]248 properties = null; // finished
[15222]249 }
250
251 /** Single argument constructor that takes the name of the properties file
252 * defining the values of the initialisation parameters required to
253 * instantiate a FedoraConnection. These are fedora server username, password,
254 * host and port. If these values are not present in the file, they are set
255 * to "" before showing the initialisation input dialog.
256 * @param propertyFile is the name of the properties file specifying the
257 * values for Fedora server username, password, host and port. */
258 public FedoraConnection(File propertyFile)
259 throws ParserConfigurationException, MalformedURLException,
260 CancelledException, ConnectException, RemoteException,
261 SSLHandshakeException, Exception
262 {
263 Properties properties = new Properties();
264 // Load the properties from the given file
265 try{
266 if(propertyFile.exists()) {
267 properties.load(new FileInputStream(propertyFile));
268 }
269 } catch(Exception e) {
270 // If the file didn't exist or could not be located,
271 // then we just continue by creating empty properties
272 LOG.warn("Exception loading from propertyFile "
273 + propertyFile + ": " + e);
274 }
275
276 // Go through the process of showing the initialisation dialog
277 setInitialisationProperties(properties);
278
279 // Now let's save whatever values the user may have entered into the
280 // input dialog as the default values for next time the dialog shows
281 try {
282 java.io.FileOutputStream out = new java.io.FileOutputStream(
283 propertyFile); // same file as properties loading file
284 // First make sure errormessage gets stored as "" and doesn't
285 // cause problems next time.
286 properties.setProperty("errormessage", "");
287 // Don't save passwords
288 properties.setProperty("password", "");
289 // If the portAddressSuffix is in the file already, then it's
290 // user-specified and we shouldn't change it. But if there is no
291 // such property in the file, then create it and write it to the file
292 // with an empty string value:
293 String portSuffix = properties.getProperty("port.address.suffix");
294 if(portSuffix == null) {
295 properties.setProperty("port.address.suffix", "");
296 }
297
298 properties.store(out, "fedoraGS3 properties"); // write properties
299 // Javadoc states that "The output stream remains open after this
300 // method (Properties.store) returns." So we close it here
301 out.close();
302 } catch(Exception e) {
303 LOG.warn("Exception writing to propertyFile "
304 + propertyFile + ": " + e);
305 }
306 properties = null; // finished
307 }
308
309 /** Method that loops to display the dialog that retrieves the
310 * fedora server initialisation properties from the user. If there
311 * is a property file with values set already, it will display
312 * the previously entered values by loading them from that file.
313 * Otherwise, input fields in the dialog are empty.
314 * @param properties the Properties Hashmap storing values for
315 * username, password, host and port (and any errormessage). */
316 protected void setInitialisationProperties(Properties properties)
317 throws ParserConfigurationException, MalformedURLException,
318 CancelledException, ConnectException, RemoteException,
319 SSLHandshakeException, Exception
320 {
321 // keep looping to display authentication popup, until valid values are
322 // entered (except when a ConnectionRefused Exception is caught - this
323 // needs to be rethrown):
324 boolean authenticated = true;
325 // reset any error messages that may have been stored (should not be
326 // the case, but if there had been any difficulty during storing, it
327 // may not have written out an empty errorMessage)
328 properties.setProperty("errormessage", "");
329 do{
330 // show the Authentication-popup:
331 // By passing the HashMap Properties, user-updated values will
332 // be persistent in the authentication-popup fields (rather than
333 // reset to the default initial values).
334 properties = showAuthenticationPopup(properties);
335 String fedoraServerUsername = properties.getProperty("username", "");
336 String fedoraServerPassword = properties.getProperty("password", "");
337 String host = properties.getProperty("host", "");
338 String port = properties.getProperty("port", "");
339 //String protocol = host.startsWith("http") ? "" : "http://";
340 String protocol = "http://";
341 if(host.startsWith("http") || host.startsWith("https"))
342 protocol = "";
343 // NOTE THAT: if a fedora server at https:// is not accessible,
344 // it takes a long time for the authentication popup to reappear.
345
346 try{
347 this.portAddressSuffix
348 = properties.getProperty("port.address.suffix", "");
349 // Use the FedoraClient utility to get the SOAP stub for APIA.
350 // This SOAP stub enables the client to connect to a Fedora
351 // repository via the API-A web service interface.
352 init(protocol, host, port,
353 fedoraServerUsername, fedoraServerPassword);
354 // will throw Exception if it can't instantiate APIA
[22368]355
[15222]356 // if no exception thrown in the initialisation statement above,
357 // then we have been authenticated:
358 authenticated = true;
359 } catch(AuthenticationFailedException afe) {
360 authenticated = false;
361 properties.setProperty("errormessage", afe.getMessage());
362 } catch(RemoteException e) { // causes could be various
363 String reason = e.getMessage();
364 if(e.getCause() != null) {
365 // For instance, if a ConnectException indicating
366 // 'Connection Refused' or a java.net.UnknownHostException
367 // caused the RemoteException
368
369 // Strip out prefix "Nested exception is..." from the
370 // encapsulating Exception's message, by using the Cause's
371 // message. Keep Exception classname to give it some context:
372 reason = e.getCause().getClass().getName() + ": "
373 + e.getCause().getMessage();
374 // Give some more information if the connection was refused.
375 // (This can also happen when the Fedora server is not running)
376 if(e.getCause().getClass().equals(ConnectException.class)) {
377 reason += FedoraGS3Exception.connectionRefusedMessage;
378 }
379 }
380 // if the message indicates that a server was running there,
381 // then we tell the user it was not a Fedora server
382 if(reason.toLowerCase().contains("404")
383 || reason.toLowerCase().contains("request failed"))
384 {
385 reason = NotAFedoraServerException.MESSAGE + "\n("+reason+")";
386 }
387 authenticated = false;
388 properties.setProperty("errormessage", reason);
389 } catch(ConnectException e) {
390 properties.setProperty("errormessage",
391 FedoraGS3Exception.connectionRefusedMessage);
392 authenticated = false;
393 } catch(SSLHandshakeException ssle) { // SSLHandshakeException should
394 // be handled before IOException, as it's an IOException subclass.
395 authenticated = false;
396 properties.setProperty("errormessage",
397 FedoraGS3Exception.sslHandshakeExceptionMessage);
398 // we won't prefix the host with http for the user, as https
399 // might be right after all, and something else might have gone
400 // during the connection attempt instead.
401 //host = host.replace("https", "http"); //setting it for them
402 //properties.setProperty("host", host);
403 } catch(IOException ioe) { // occurs when we try to connect to a
404 // host/port where some server other than Fedora's is listening
405 // (e.g. if we end up connecting to GS3's host and port).
406 // In that case, we can get exception messages like a 404:
407 // "Unable to instantiate FedoraConnection
408 // java.io.IOException: Request failed [404 /fedora/describe]"
409 // Test this by trying to connect to localhost at 9090 where GS3 is
410 String exceptMsg = ioe.getMessage().toLowerCase();
411 if(exceptMsg.indexOf("request failed") != -1
412 || exceptMsg.indexOf("404") != -1)
413 {
414 properties.setProperty("errormessage",
415 NotAFedoraServerException.MESSAGE
416 + "\n(" + ioe.getMessage() + ")");
417 } else if(exceptMsg.indexOf("401") != -1
418 || exceptMsg.indexOf("500") != -1)
419 {
420 authenticated = false;
421 properties.setProperty("errormessage", ioe.getMessage());
422 } else { // the exception occurred for some other reason, rethrow it
423 throw ioe;
424 }
425 }
426 } while(!authenticated); // will keep showing popup until auhentication
427 // and connection input values are valid
428 }
429
430 /**
431 * Static method that displays a popup to allow the user to provide Fedora
432 * authentication (username, pwd) and connection (protocol+host, port) details.
433 * @param properties is a Properties HashMap where the property Keys which must
434 * have been put in here in advance (even with "" Values if appropriate) are:
435 * <pre>
436 * - username
437 * - password
438 * - host (may - but need not - be prefixed with either of the protocols
439 * "http://" and "https://"
440 * - port
441 * - errorMessage (displayed near the top of the popup dialog). Can be "".
442 * </pre>
443 * The values stored in the properties HashMap for the above property are
444 * initially displayed in the fields and the user can overwrite them.
445 * This is useful in such cases where invalid values were entered and this
446 * popup must be redisplayed to allow the user to correct their previous input.
447 * @return the same HashMap Properties which was passed as parameter. */
448 protected static Properties showAuthenticationPopup(Properties properties)
449 throws CancelledException
450 {
451 // Retrieve all the properties -- defaults to "" if any are null
452 JTextField usernameField = new JTextField(
453 properties.getProperty("username", "fedoraAdmin"));
454 JTextField passwordField = new JPasswordField(
455 properties.getProperty("password", ""));
456 JTextField hostField = new JTextField(
457 properties.getProperty("host", "localhost"));
458 JTextField portField = new JTextField(
459 properties.getProperty("port", "8080"));
460
461 JPanel panel = new JPanel(new GridLayout(4,2));
462 panel.add(new JLabel("User Name"));
463 panel.add(usernameField);
464 panel.add(new JLabel("Password"));
465 panel.add(passwordField);
466 panel.add(new JLabel("Host"));
467 panel.add(hostField);
468 panel.add(new JLabel("Port"));
469 panel.add(portField);
470
471 String heading = "Fedora Server Admin Authentication:";
472 String errorMessage = properties.getProperty("errormessage", "");
473 if(!errorMessage.equals("")) {
474 heading = "=> " + errorMessage + "\n\n" + heading;
475 }
476 int option = JOptionPane.showConfirmDialog(null, new Object[] {
477 heading, panel},
478 "Enter Network Password",
479 JOptionPane.OK_CANCEL_OPTION, JOptionPane.PLAIN_MESSAGE);
480
481 if (option == JOptionPane.OK_OPTION) {
482 String fedoraServerUsername = usernameField.getText();
483 String fedoraServerPassword = passwordField.getText();
484 String host = hostField.getText();
485 String port = portField.getText();
486 properties.setProperty("username", fedoraServerUsername);
487 properties.setProperty("password", fedoraServerPassword);
488 properties.setProperty("host", host);
489 properties.setProperty("port", port);
490 } else { // Cancel option
491 throw new CancelledException();
492 }
493 return properties;
494 }
495
496 /** Init method that is called by the constructor to set some
497 * important member variables including instantiating the APIA object
498 * used to invoke the Fedora APIA web service operations.
499 * @param protocol can be http or https
500 * @param host is the name of the Fedora server host
501 * @param port is the port number (String form) of the Fedora server
502 * @param fedoraServerUsername is the user name to access the Fedora
503 * Server
504 * @param fedoraServerPassword is the password needed to access the
505 * Fedora Server
506 */
507 protected void init(String protocol, String host, String port,
508 String fedoraServerUsername, String fedoraServerPassword)
509 throws ParserConfigurationException, MalformedURLException,
510 AuthenticationFailedException, RemoteException, Exception
511 {
512 // initialise member variables
513 lang = ENGLISH;
514 maxresults = Integer.MAX_VALUE;
515 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
516 builder = factory.newDocumentBuilder();
517
518 // (protocol is "" if host already contains protocol)
519 if(!protocol.equals("") && !protocol.endsWith("://"))
520 protocol += "://";
521 // now create baseURL = protocol://host:port/fedora
522 this.baseURL = protocol + host + ":" + port + "/fedora";
523
524 // Get the FedoraAPIA handle to/stub of the Fedora web services
525 // New way of instantiating connection to Fedora is dependent on
526 // fewer files of FedoraClient.jar
527 FedoraAPIAServiceLocator serviceLocator
[21835]528 = new FedoraAPIAServiceLocator(fedoraServerUsername,
529 fedoraServerPassword);
[15222]530
531 APIA = null;
532 boolean isUserSpecifiedPortAddressSuffix = false;
533 // try any portAddressSuffix specified by the user
534 if(!this.portAddressSuffix.equals("")) {
535 isUserSpecifiedPortAddressSuffix = true;
536 this.createAPIA(serviceLocator, this.portAddressSuffix,
537 "user-specified", isUserSpecifiedPortAddressSuffix);
538 }
539
540 // If the user-specified portAddressSuffix failed or if there was none
541 // given, then APIA will be null, so we will try with the default
542 // portAddressSuffix. This time all exceptions will be passed on.
543 if(APIA == null) {
544 isUserSpecifiedPortAddressSuffix = false;
545 this.createAPIA(serviceLocator, defaultPortAddressSuffix,
546 "default", isUserSpecifiedPortAddressSuffix);
547 }
548 }
549
550 /** Tries to create the FedoraAPIA instance using the serviceLocator
551 * and the given portSuffix. The APIA instance is obtained for the
552 * baseURL+portSuffix. Any exceptions are (processed and) rethrown
553 * or, if the flag isUserSpecifiedPortAddressSuffix is true, then the
554 * Remote Exception from AXIS that it can't find the target service to
555 * invoke is ignored so that the caller can retry with the default port-
556 * address suffix first before giving up. */
557 protected void createAPIA(FedoraAPIAServiceLocator serviceLocator,
558 String portSuffix, String messageInsert,
559 boolean isUserSpecifiedPortAddressSuffix)
560 throws Exception
561 {
562 //String portSuffix = (isUserSpecifiedPortAddressSuffix) ?
563 // this.portAddressSuffix : defaultPortAddressSuffix;
564
565 try {
566 LOG.debug( "Trying to connect to Fedora using the given"
567 + " baseURL and the " + messageInsert + " portAddress suffix:\n"
568 + baseURL + portSuffix);
569 APIA = serviceLocator.getFedoraAPIAPortSOAPHTTP(
570 new java.net.URL(baseURL+portSuffix));
571 // let's test whether we're authenticated (otherwise a
572 // RemoteException will be thrown to indicate that the
573 // password was incorrect.)
574 RepositoryInfo repositoryInfo = APIA.describeRepository();
575 // throws RemoteException if pwd wrong or for other reasons
576 // in which case describeRepository() service is unavailable
577 this.fedoraVersion = repositoryInfo.getRepositoryVersion();
578 // If we come all the way here, no exceptions were thrown:
579 this.portAddressSuffix = portSuffix; // store the one currently in use
580 } catch(RemoteException re) {
581 // if we're here, then APIA was unable to call the web service
582 // If this was because the fedora authentication failed, then
583 // let's throw a custom exception
584 String message = re.getMessage().toLowerCase();
585 // Looking for something Unauthorized(401)
586 if(message.indexOf("unauthorized") != -1
587 || message.indexOf("401") != -1)
588 {
589 throw new AuthenticationFailedException();
590 } else if(isUserSpecifiedPortAddressSuffix
591 && re.getMessage().contains(
592 FedoraGS3Exception.missingTargetService))
593 {
594 LOG.warn("Failed to connect to Fedora APIA services at given"
595 + " port address:\n" + portSuffix
596 + "\nException: " + re.getMessage());
597 // APIA.describeRepository can throw a remote exception
598 // whereby AXIS says the target service is missing and can't
599 // be invoked (FedoraGS3Exception.missingTargetService)
600 // Don't rethrow this, if AXIS can't find the user-specified
601 // portAddressSuffix, we will try with the default suffix next
602 APIA = null;
603 } else { // if trying default portAddressSuffix or if any other
604 // RemoteException was generated (whose cause is something
605 // other than an authentication failure) rethrow it.
606 throw re;
607 }
608 } catch(Exception e) { // Other Exceptions
609 // Could possibly be a ServiceException when using ServiceLocator
610 if(isUserSpecifiedPortAddressSuffix) {
611 APIA = null; // we won't throw other exceptions yet until
612 // we have tried the default PortAddressSuffix for the baseURL
613 } else {
614 throw new FedoraGS3InitFailureException(e);
615 }
616 }
617 }
618
619 /** Gets all greenstone collections. Searches for greenstone:*-collection.
620 * Method getCollections() defaults to getting only those objects in fedora's
621 * repository whose pids are of the format greenstone:*-collection.
622 * The use of AutoFinder and findObjects is shown in
623 * fedora-2.2.1-src/src/java/fedora/client/search/ResultFrame.java
624 * The Fedora-APIA's method definition of findObjects is:
625 * <pre>
626 * fedora-types:FieldSearchResult findObjects(
627 * fedora-types:ArrayOfString resultFields,
628 * xsd:nonNegativeInteger maxResults,
629 * fedora-types:FieldSearchQuery query )
630 * </pre>
631 * @see <a href="http://localhost:8080/fedora/search">The local fedora search page for how the search works</a>
632 * @see <a href="http://www.fedora.info/definitions/1/0/api/Fedora-API-A.html">Fedora access API, API-A for method findObjects</a>
633 * @see <a href="http://www.fedora.info/definitions/1/0/types/&#035;complexType_FieldSearchQuery_Link031D7D80">XML type definition of FieldSearchQuery</a>
[22300]634 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/FieldSearchQuery.html">Type definition of 2.2.1 FieldSearchQuery</a>
[15222]635 * @see <a href="http://www.fedora.info/download/2.1.1/userdocs/server/serverdocs/fedora/server/search/FieldSearchQuery.html">does not apply: type definition of 2.1.1 FieldSearchQuery</a>
636 * @see <a href="http://john.drc-dev.ohiolink.edu/browser/drc-core/trunk/src/java/edu/ohiolink/drc/drcdl/BrowseController.java?rev=462">BrowseController.java for an example</a>
637 *
638 * @return an array of Strings containing the pids of all collections
639 * matching the format greenstone:*-collection.
640 */
641 public String[] getCollections() throws RemoteException
642 {
643 // Available constructors:
644 // FieldSearchQuery(java.util.List conditions)
645 // FieldSearchQuery(java.lang.String terms)
646 final String queryStr = GREENSTONE_+WILDCARD+_COLLECTION;
647 FieldSearchQuery query = new FieldSearchQuery();
648 query.setTerms(queryStr);
649 query.setConditions(null);
650 // we'd like pid and title returned for each object
651 // we pass maxResults=null to get all objects that match
652 // (i.e. all collections)
653 String[] pids = null;
654
655 FieldSearchResult collection = AutoFinder.findObjects(
656 APIA, new String[]{"pid", "title"}, maxresults, query);
657 ObjectFields[] results = collection.getResultList();
658 pids = new String[results.length];
659 for(int i = 0; i < results.length; i++) {
660 pids[i] = results[i].getPid();
661 }
662 return pids;
663 }
664
665 /** All objects (incl "greenstone:*" objects) in fedora - be they collections,
666 * top-level documents or document sections - have a DC datastream. This
667 * method returns the content (XML) of the DC datastream as it is stored in
668 * fedora's repository.
669 * (The pid/DC call is one of the default fedora-system 3 disseminations.)
670 * Try an example of the form: http://localhost:8080/fedora/get/&lt;pid&gt;/DC
671 * To obtain the DC/any datastream, we use method getDatastreamDissemination()
672 * of the interface FedoraAPIA. This method returns a MIMETypedStream.
673 * The method signature is:
674 * MIMETypedStream getDatastreamDissemination(String pid, String dsID, String asOfDateTime)
675 * where dsID = itemID (look at datastreams page of running fedora instance)
676 * To access the XML content of the MIMETypedObject returned, we use its method
677 * bytes[] getStream(), but when instantiating a String from this, we have to
678 * use the String() contructor where we can specify the charset encoding (in
679 * this case, it must be UTF-8). Else getStream() returns gobbledygook.
680 * @return a String version of the XML in the DC datastream for the fedora
681 * object denoted by pid.
682 * @param pid - the fedora persistent identifier for an item in the fedora
683 * repository.
684 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java, the API-A web service stub class</a>
685 * @see <a href="http://drc-dev.ohiolink.edu/browser/fedora-core/tags/upstream/src/test/junit/fedora/test/integration/TestAPIA.java?rev=575">TestAPIA.java, which contains an example of MIMETypedObject.getStream() usage.</a>
686 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html#String(byte[],%20java.lang.String)">constructor String(byte[], java.lang.String)</a>
687 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Charset.java, for character sets and encoding</a>
688 */
689 public String getDC(String pid)
690 throws RemoteException, UnsupportedEncodingException
691 {
692 // an example at http://expanse.cs.waikato.ac.nz:9080/fedora/get/greenstone:demo-HASH23d1019b589e2ef6a680e3/DC
693 // datastream ID, dsID = itemID, look at a running fedora
694 MIMETypedStream dcStream
695 = APIA.getDatastreamDissemination(pid, DC, null);
696 //asOfDateTime = null to get the current version of the dataStream
697
698 // need to set the charset encoding to UTF8
699 return new String(dcStream.getStream(), UTF8);
700 }
701
702 /** All "greenstone:*" objects in fedora (be they collections be they
703 * collections, top-level documents or document sections) have an EX
704 * datastream. This method returns the content (XML) of the EX datastream as
705 * is. (It calls the default fedora-system 3 dissemination &lt;pid&gt;/EX.)
706 * @return a String version of the XML in the EX datastream for the fedora
707 * object denoted by pid.
708 * @param pid - the fedora persistent identifier for an item in the fedora
709 * repository.
710 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
711 * asOfDateTime).
712 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
713 * @see String getDC(String pid) throws Exception
714 * */
715 public String getEX(String pid)
716 throws RemoteException, UnsupportedEncodingException
717 {
718 MIMETypedStream exStream = APIA.getDatastreamDissemination(pid, EX, null);
719 //asOfDateTime = null to get the current version of the dataStream
720
721 // need to set the charset encoding to UTF8
722 return new String(exStream.getStream(), UTF8);
723 }
724
725 /** Some "greenstone:*" top-level documents in the fedora repository (but not
726 * greenstone collections or document sections) have a DLS metadata datastream.
727 * This method returns the content (XML) of the DLS datastream as is. (It calls
728 * the default fedora-system 3 dissemination &lt;pid&gt;/DLS.)
729 * @return a String version of the XML in the DLS datastream for the fedora
730 * object denoted by pid, or "" if the document given by pid has no DLS datastream.
731 * @param pid - the fedora persistent identifier for an item in the fedora
732 * repository.
733 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
734 * asOfDateTime).
735 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream().
736 * @see String getDC(String pid) throws Exception
737 * */
738 public String getDLS(String pid)
739 throws RemoteException, UnsupportedEncodingException
740 {
741 MIMETypedStream dlsStream = null;
742 // If there is no DLS datastream, it throws an exception (whose class
743 // fedora.server.errors.DatastreamNotFoundException can't be imported
744 // here (it's not in the client side fedora.server.* package, but on
745 // the server side package of that name):
746 try{
747 dlsStream = APIA.getDatastreamDissemination(pid, DLS, null);
748 //asOfDateTime=null to get the current version of the dataStream
749 } catch(RemoteException e) {
750 //These two don't work:
751 //if(e.getCause().getClass().getName().equals("fedora.server.errors.DatastreamNotFoundException"))
752 //if(e.getCause().getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
753
[26259]754 if(e.getMessage().contains("No datastream could be returned.") || e.getMessage().contains("fedora.server.errors.DatastreamNotFoundException"))
[15222]755 { // there is no DLS data stream for this document
756 return "";
757 }
758 else { // different problem, exception due to different cause
759 throw(e);
760 }
761 }
762 if(dlsStream == null)
763 return "";
764 // need to set the charset encoding to UTF8
765 return new String(dlsStream.getStream(), UTF8);
766 }
767
768 /** All "greenstone:*" objects in fedora (be they collections or documents)
[21775]769 * have a TOC datastream, unless they have only 1 section (SECTION1).
770 * This method returns the content (XML) of the TOC datastream as is.
771 * (Calls default fedora-system 3 dissemination &lt;pid&gt;/TOC.)
[15222]772 * @return a String version of the XML in the TOC datastream for the fedora
773 * object denoted by pid.
774 * @param pid - the fedora persistent identifier for an item in the fedora
775 * repository.
776 * Use MIMETypedStream APIA.getDatastreamDissemination(pid, itemID,
777 * asOfDateTime)
778 * Use String(bytes[], charset="UTF-8") to convert MIMETypedStream.getStream()
779 * @see String getDC(String pid) throws Exception
780 * */
781 public String getTOC(String pid)
782 throws RemoteException, UnsupportedEncodingException
783 {
[21775]784 try {
[15222]785 MIMETypedStream tocStream = APIA.getDatastreamDissemination(pid, TOC, null);
786 //asOfDateTime = null to get the current version of the dataStream
787 // need to set the charset encoding to UTF8
788 return new String(tocStream.getStream(), UTF8);
[21775]789 } catch(RemoteException re) {
790 // if TOC does not exist, then it means there is only 1 section, dsID: SECTION1
791 return new String("<Section id=\"1\"></Section>".getBytes(), UTF8); //set charset
792 }
[15222]793 }
794
795 /** @return the &lt;name&gt;s (in greenstone:&lt;name&gt;-collection)
796 * for the collections indicated by collPIDs.
797 * @param collPIDs - an array of Strings denoting the pids for greenstone
798 * collections stored in the fedora repositoryl. These should be of the
799 * format "greenstone:&lt;collectionName&gt;-collection". */
800 public String[] getCollectionNames(String[] collPIDs) {
801 String[] collNames = new String[collPIDs.length];
802 for(int i = 0; i < collPIDs.length; i++)
803 collNames[i] = getCollectionName(collPIDs[i]);
804 return collNames;
805 }
806
807 /** @return "greenstone:&lt;name&gt;-collection" for all &lt;name&gt;s
808 * in the parameter collNames.
809 * @param collNames - a list of names of greenstone collections
810 * stored in the fedora repository. */
811 public String[] getCollectionPIDs(String[] collNames) {
812 String[] collPIDs = new String[collNames.length];
813 for(int i = 0; i < collNames.length; i++)
814 collPIDs[i] = getCollectionName(collNames[i]);
815 return collPIDs;
816 }
817
818 /** @return greenstone:&lt;name&gt;-collection for the&lt;name&gt;
819 * denoted by parameter collName.
820 * @param collName - the name of a greenstone collection stored
821 * stored in the fedora repository. */
822 public String getCollectionPID(String collName) {
823 return GREENSTONE_+collName+_COLLECTION;
824 }
825
826 /**
827 * Gets the title of the collection denoted by the given collection's pid by
828 * retrieving the title metadata for it from the collection's EX datastream.
829 * @return the title (in the default language, else English, else the
830 * first title found) for the particular collection denoted by its PID.
831 * @param collPID is the pid of a greenstone collection in the fedora
832 * repository. */
833 public String getCollectionTitle(String collPID)
834 throws RemoteException, UnsupportedEncodingException,
835 SAXException, IOException
836 {
837 String title = null; // has to be null initially, we do a check on it
838 // Parse the EX datastream (XML), and in its DOM, find the
839 // <ex:metadata name="collectionname" qualifier="en">title</ex:metadata>
840 // There might be one OR several of those with attribute
841 // name="collectionname". If there's only one, then get that.
842 // If there are several, there would possibly a be qualifier attribute,
843 // in which case get qualifier=lang (where lang is the member variable)
844 // If there is no qualifier with the requested language, then get the
845 // english one which is likely to be there, else return the title for
846 // the first collectionname .
847
848 MIMETypedStream exdata
849 = APIA.getDatastreamDissemination(collPID, EX, null);
850 String exStream = new String(exdata.getStream(), UTF8);
851
852 InputSource source = new InputSource(new StringReader(exStream));
853 Document doc = builder.parse(source);
854 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
855 NodeList children = docEl.getChildNodes();
856
857 String firstName = "";
858 String englishName = "";
859 for(int i = 0; i < children.getLength(); i++ ) {
860 Node n = children.item(i);
861 if(n.getNodeType() == Node.ELEMENT_NODE) {
862 Element e = (Element)n;
863 if(e.hasAttribute(NAME)
864 && e.getAttribute(NAME).equals(COLLECTIONNAME)) {
865 firstName = FedoraCommons.getValue(e);
866 if(!e.hasAttribute(QUALIFIER)) {
867 title = FedoraCommons.getValue(e);
868 break;
869 }
870 else if(e.getAttribute(QUALIFIER).equals(lang)) {
871 title = FedoraCommons.getValue(e);
872 break;
873 } else if(e.getAttribute(QUALIFIER).equals(ENGLISH)) {
874 englishName = FedoraCommons.getValue(e);
875 }
876 }
877 }
878 }
879
880 // if the title is still not set to that of the requested language,
881 // then try setting it to the collection name in English. If English
882 // isn't available, then set it to the first collection name provided
883 // (in whichever language).
884 if(title == null) {
885 title = englishName.equals("") ? firstName : englishName;
886 }
887 doc = null;
888 return title;
889 }
890
891 /** @return the collection titles for all the collections indicated by
892 * collPIDs.
893 * @param collPIDs - a list of pids identifying greenstone collections
894 * stored in the fedora repository. */
895 public String[] getCollectionTitles(String[] collPIDs)
896 throws RemoteException, UnsupportedEncodingException,
897 SAXException, IOException
898 {
899 String[] titles = new String[collPIDs.length];
900
901 // parse each EX datastream (XML) which contains the gs3-extracted meta.
902 for(int i = 0; i < collPIDs.length; i++) {
903 titles[i] = getCollectionTitle(collPIDs[i]);
904 }
905 return titles;
906 }
907
908 /** @return the title metadata for the given doc objects of a collection.
909 * These titles are returned in the same order as the given docIDs.
910 * (The docPIDs already contain the collection name anyway.)
911 * @param docPIDs - a list of pids identifying documents stored in the
912 * fedora repository. */
913 public String[] getDocTitles(String[] docPIDs)
914 throws RemoteException, UnsupportedEncodingException,
915 SAXException, IOException
916 {
917 String[] titles = new String[docPIDs.length];
918 for(int i = 0; i < docPIDs.length; i++) {
919 titles[i] = getDocTitle(docPIDs[i]);
920 }
921 return titles;
922 }
923
924 /** Gets the title metadata for a particular doc object in a collection
925 * denoted by docPID. The docPID already contains the collection name.
926 * @return the title for the fedora document item denoted by docPID
927 * @param docPID is the pid of the document in the fedora repository
928 * (docPID is of the form greenstone:&lt;colName&gt;-&lt;doc-identifier&gt; */
929 public String getDocTitle(String docPID)
930 throws RemoteException, UnsupportedEncodingException,
931 SAXException, IOException
932 {
933 // We need the extracted metadata file, and find its
934 // documentElement's child
935 // <ex:metadata name="Title">sometitle</ex:metadata>
936 // where the title we return is sometitle
937
938 String title = "";
939 MIMETypedStream exdata
940 = APIA.getDatastreamDissemination(docPID, EX, null);
941 String exStream = new String(exdata.getStream(), UTF8);
942 return getTitle(exStream);
943 }
944
945 /** Given a string representation of a document's or document section's
946 * EX datastream -- which is a greenstone extracted metadata XML file --
947 * of the form:
948 * &lt;ex&gt;
949 * &lt;ex:metadata name="Title"&gt;sometitle&lt;/ex:metadata&gt;
950 * &lt;ex:metadata name="..."&gt;....&lt;/ex:metadata&gt;
951 * ...
952 * &lt;/ex&gt;
953 * This method finds the &lt;ex:metadata&gt; where the name="Title" and
954 * returns the value embedded in that element ('sometitle' in
955 * the example above).
956 * @return the title metadata of the document/document section whose EX
957 * datastream is passed as parameter
958 * @param exStream the EX datastream in String form of the document or
959 * document section. */
960 protected String getTitle(String exStream)
961 throws SAXException, IOException
962 {
963 String title = "";
964 InputSource source = new InputSource(new StringReader(exStream));
965 Document doc = builder.parse(source);
966 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
967 NodeList children = docEl.getChildNodes();
968
969 // Cycle through all the *element* children of <ex:ex></ex:ex>
970 // which are all of the form:
971 // <ex:metadata name="somename">somevalue</ex:metadata>
972 // Find the one where name="Title", its value is the title
973 for(int i = 0; i < children.getLength(); i++ ) {
974 Node n = children.item(i);
975 if(n.getNodeType() == Node.ELEMENT_NODE) {
976 Element e = (Element)n;
977 if(e.hasAttribute(NAME)
978 && e.getAttribute(NAME).equals(TITLE)) {
979 title = FedoraCommons.getValue(e);
980 break;
981 }
982 }
983 }
984 return title;
985 }
986
[26171]987
988 /** Used to obtain the dc:title value (hashID) of the DC stream of a digital
989 * object whose fedoraID is of a special sort: greenstone-http:<colname>-id. */
990 protected String getDCTitle(String fedoraPID)
991 throws RemoteException, UnsupportedEncodingException,
992 SAXException, IOException
993 {
994 String title = "";
995 MIMETypedStream dcdata = APIA.getDatastreamDissemination(fedoraPID, DC, null);
996 if(dcdata == null || dcdata.equals("")) {
997 return title;
998 }
999 String dcStream = new String(dcdata.getStream(), UTF8);
1000
1001 InputSource source = new InputSource(new StringReader(dcStream));
1002 Document doc = builder.parse(source);
1003 Element docEl = doc.getDocumentElement(); // docEl=<oai_dc:dc></oai_dc:dc>
1004 NodeList children = docEl.getElementsByTagName("dc:title");
1005 if(children != null && children.getLength() > 0) {
1006 Node n = children.item(0); // <dc:title>
1007 Element e = (Element)n;
1008 title = FedoraCommons.getValue(e);
1009 }
1010 return title;
1011 }
1012
[15222]1013 /** @return the title metadata for the given document sections.
1014 * These titles are returned in the same order as the given docPIDs
1015 * and associated sectionIDs.
1016 * (The docPIDs already contain the collection name anyway.)
1017 * @param docPIDs - a list of pids identifying documents stored in the
1018 * fedora repository.
1019 * @param sectionIDs - a list of sectionIDs identifying individual sections
1020 * of documents stored in the fedora repository whose titles are requested. */
1021 public String[] getSectionTitles(String[] docPIDs, String[] sectionIDs)
1022 throws RemoteException, UnsupportedEncodingException,
1023 SAXException, IOException
1024 {
1025 String[] titles = new String[docPIDs.length];
1026 for(int i = 0; i < docPIDs.length; i++) {
1027 titles[i] = getSectionTitle(docPIDs[i], sectionIDs[i]);
1028 }
1029 return titles;
1030 }
1031
1032 /** @return the title metadata for the given document section.
1033 * (The docPID already contain the collection name anyway.)
1034 * @param docPID - a pid identifying a document in the fedora repository.
1035 * @param sectionID - the sectionID of the section of the
1036 * document whose title is requested. */
1037 public String getSectionTitle(String docPID, String sectionID)
1038 throws UnsupportedEncodingException, RemoteException,
1039 SAXException, IOException
1040 {
1041 String ex = this.getSectionEXMetadata(docPID, sectionID);
1042 return getTitle(ex);
1043 }
1044
1045 /** Searches the fedora repository for all greenstone:&lt;colPID&gt;* and
1046 * returns the PIDs of the data objects found, with the exception of
1047 * greenstone:&lt;colPID&gt;-collection, which is not a document but a
1048 * collection PID.
1049 * That is, pids of objects whose pid is greenstone:&lt;colName&gt;*
1050 * (but not greenstone:&lt;colName&gt;-collection itself, because that represents
1051 * the collection and not an object of the same collection) are returned.
1052 * All pids that do not map to a collection are assumed to be documents!
1053 * @return a list of the pids of all the (doc) objects in a collection.
1054 * @param colPID is the pid of the greenstone collection stored in
1055 * the fedora repository. */
1056 public String[] getCollectionDocs(String colPID)
1057 throws RemoteException
1058 {
1059 String colName = getCollectionName(colPID);
1060 //LOG.debug("colName: " + colName);
1061
1062 // Search fedora objects for pid=greenstone:<colName>-*
1063 final String queryStr = GREENSTONE_+colName+HYPHEN+WILDCARD;
1064 // searches for "greenstone:"+colName+"-*";
1065 FieldSearchQuery query = new FieldSearchQuery();
1066 query.setTerms(queryStr);
1067 query.setConditions(null);
1068 String[] pids = null;
1069
1070 FieldSearchResult objects = AutoFinder.findObjects(
1071 APIA, new String[]{"pid", "title"}, maxresults, query);
1072 ObjectFields[] results = objects.getResultList();
1073
1074 // NEED TO SKIP ANYTHING THAT ENDS IN "-collection" because
1075 // that's not a document object:
1076 pids = new String[results.length-1]; // not storing collection object
1077 int index = 0; // keeps track of docPid index
1078 for(int i = 0; i < results.length; i++) {
1079 // check it's not a collection object
1080 if(!results[i].getPid().endsWith(_COLLECTION)) {
1081 pids[index] = results[i].getPid();
1082 index++;
1083 }
1084 }
1085
1086 return pids;
1087 }
1088
1089 /** Given the pid of a document fedora data object, this method will return
1090 * all itemIDs that are part of that data object and are Sections. For further
1091 * information see interface Comparable (implemented by String), SortedSet
1092 * and TreeSet.
1093 * @return an array of itemIDs of the Sections of the document,
1094 * indicated by docPID, in ascending order. These are of the form: "SECTION1.*"
1095 * @param docPID is a fedora pid identifying a greenstone document object.
1096 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1097 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1098 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1099 */
1100 public String[] getSectionNames(String docPID) throws RemoteException {
1101 // DatastreamDef[] listDatastreams(
1102 // java.lang.String pid, java.lang.String asOfDateTime)
1103
1104 // listDatastreams returns information on each item (including itemID=dsID)
1105 // in the document object indicated by docPID
1106
1107 // Need to give an object version number, because null for asOfDateTime
1108 // does not return any datastreams!
1109 String[] times = APIA.getObjectHistory(docPID);
1110
1111 DatastreamDef[] datastreams = APIA.listDatastreams(
1112 docPID, times[times.length-1]);
1113
1114 // TreeSet is a SortedSet. We're going to put Strings into it,
1115 // and Strings implement interface Comparable already.
1116 TreeSet orderedList = new TreeSet(); //TreeSet(new RankComparator())
1117 for(int i = 0; i < datastreams.length; i++) {
1118 String itemID = datastreams[i].getID();
1119 if (itemID.startsWith("SECTION"))
1120 orderedList.add(itemID);
1121 }
1122
1123 String[] sectionNames = new String[orderedList.size()];
1124 orderedList.toArray(sectionNames);
1125 orderedList = null;
1126 return sectionNames;
1127 }
1128
1129 /** Given the pid of a document fedora data object, this method will return all
1130 * itemIDs that are part of that data object and are Sections, but just the
1131 * Section numbers are returned. For further information see interface Comparable
1132 * (implemented by String), SortedSet and TreeSet.
1133 * @return an array of itemIDs of the Section numbers of the document
1134 * indicated by docPID, in ascending order. Return values are of form: "1.*".
1135 * @param docPID is a fedora pid identifying a greenstone document object.
1136 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/access/FedoraAPIABindingSOAPHTTPSkeleton.html">FedoraAPIABindingSOAPHTTPSkeleton.java stub class for the API-A web services</a>
1137 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/DatastreamDef.html">DatastreamDef.java</a>
1138 * @see <a href="http://dltj.org/2006/12/fedora-batch-processing/">Fedora batch processing</a>
1139 */
1140 public String[] getSectionNumbers(String docPID) throws RemoteException {
1141 String[] times = APIA.getObjectHistory(docPID);
1142
1143 DatastreamDef[] datastreams
1144 = APIA.listDatastreams(docPID, times[times.length-1]);
1145 //Vector v = new Vector(datastreams.length);
1146 TreeSet orderedList = new TreeSet();
1147
1148 for(int i = 0; i < datastreams.length; i++) {
1149 String itemID = datastreams[i].getID();
1150 if (itemID.startsWith("SECTION")) {
1151 //int index = SECTION.length();
1152 //itemID = itemID.substring(index);
1153 itemID = removePrefix(itemID, SECTION);
1154 orderedList.add(itemID);
1155 }
1156 }
1157
1158 String[] sectionNumbers = new String[orderedList.size()];
1159 orderedList.toArray(sectionNumbers);
1160 orderedList = null;
1161
1162 return sectionNumbers;
1163 }
1164
1165 /** @return the titles for the document sections denoted by the parameters.
1166 * @param docPID is a fedora pid identifying a greenstone document object.
1167 * @param sectionIDs is a list of identifiers identifying sections in the
1168 * document denoted by docPID, whose titles need to be returned. Each
1169 * sectionID may sectionID may be either a section name (e.g. SECTION1.5.1)
1170 * or a section number (eg. 1.5.1). */
1171 public String[] getTitles(String docPID, String[] sectionIDs)
1172 throws RemoteException, UnsupportedEncodingException,
1173 SAXException, IOException
1174 {
1175 String[] titles = new String[sectionIDs.length];
1176 for(int i = 0; i < titles.length; i++)
1177 titles[i] = getTitle(docPID, sectionIDs[i]);
1178 return titles;
1179 }
1180
1181 /** @return the title for the document section denoted by the parameters.
1182 * @param docPID is a fedora pid identifying a greenstone document object.
1183 * @param sectionID identifies the particular section in the document denoted
1184 * by docPID, whose title needs to be returned. The sectionID may be either a
1185 * section name (e.g. SECTION1.5.1) or a section number (eg. 1.5.1). */
1186 public String getTitle(String docPID, String sectionID)
1187 throws RemoteException, UnsupportedEncodingException,
1188 SAXException, IOException
1189 {
1190 // Compose the itemID for the EX data stream from the number in the
1191 // sectionID:
1192 String exID = removePrefix(sectionID, SECTION);
1193 exID = EX+convertToMetaNumber(exID);
1194
1195 // Retrieve the extracted metadata stream (EX, in XML) for the given
1196 // section
1197 String exStream = getItem(docPID, exID);
1198
1199 // Extract the title from the XML, look for:
1200 // <ex:ex><ex:metadata name="Title">title</ex:metadata></ex:ex>
1201 InputSource source = new InputSource(new StringReader(exStream));
1202 Document doc = builder.parse(source);
1203 Element docEl = doc.getDocumentElement(); // docEl=<ex:ex></ex:ex>
1204 NodeList children = docEl.getElementsByTagName(
1205 EX.toLowerCase()+COLON+METADATA); // <ex:metadata>
1206 for(int i = 0; i < children.getLength(); i++) {
1207 Element e = (Element)children.item(i);
1208 if(e.hasAttribute(NAME) && e.getAttribute(NAME).equals(TITLE))
1209 return FedoraCommons.getValue(e); // extract and return the title
1210 }
1211 return ""; // if we got here, then we couldn't find a title
1212 }
1213
1214 /** @return the section's XML (as a String) as it is stored in fedora.
1215 * Works out if sectionID is a sectionName or sectionNumber.
1216 * @param docPID - a fedora pid identifying a greenstone document object.
1217 * @param sectionID - identifyies the particular section in the
1218 * document denoted by docPID, may be a section name or number. */
1219 public String getSection(String docPID, String sectionID)
1220 throws RemoteException, UnsupportedEncodingException
1221 {
1222 if(!sectionID.startsWith(SECTION)) // then it has only section number
1223 sectionID = SECTION+sectionID;
1224
1225 String sectionXML = this.getItem(docPID, sectionID);
1226 return sectionXML;
1227 }
1228
1229 /** @return the required section's DC metadata XML datastream.
1230 * @param docPID - a fedora pid identifying a greenstone document object.
1231 * @param sectionID - identifyies the particular section in the
1232 * document denoted by docPID, may be a section name or number. */
1233 public String getSectionDCMetadata(String docPID, String sectionID)
1234 throws RemoteException, UnsupportedEncodingException
1235 {
1236 String dcID = removePrefix(sectionID, SECTION);
1237 // ensure we have just the section number
1238 dcID = DC+convertToMetaNumber(dcID); // itemID of DC = DC + number
1239
1240 // now get the DC datastream for that number
1241 String dcXML = this.getItem(docPID, dcID);
1242 return dcXML;
1243 }
1244
1245 /** Returns the section EX metadata XML datastream for SectionID which may be
1246 * a section name or number. Currently a few EX files are named awkwardly:
1247 * the EX file for section 1.* is actually associated with datastream EX.*.
1248 * But subsequent EX datastreams are named appropriately: for instance,
1249 * EX2.1.1 matches with section 2.1.1
1250 * @return the required section's EX metadata XML datastream.
1251 * @param docPID - a fedora pid identifying a greenstone document object.
1252 * @param sectionID - identifyies the particular section in the
1253 * document denoted by docPID, may be a section name or number. */
1254 public String getSectionEXMetadata(String docPID, String sectionID)
1255 throws RemoteException, UnsupportedEncodingException
1256 {
1257 String exID = removePrefix(sectionID, SECTION);
1258 exID = EX+convertToMetaNumber(exID); // make it a proper EX metadata number
1259
1260 // now get the EX datastream for that for number
1261 String exXML = this.getItem(docPID, exID);
1262 return exXML;
1263 }
1264
[22300]1265 /** Given a documentNode element, adds the nodetype attribute to all of its
1266 * docNode descendants. The nodetype is either Root, Internal or Leaf to indicate
1267 * whether the docnode is a toplevel document Node, or has children or has none.
1268 * @param e - the documentNode element whose descendants' nodetypes will be set
1269 * at method's end. */
1270 protected void addNodeTypeToDescendants(Element e) {
1271 NodeList sections = e.getElementsByTagName(SECTION_ELEMENT);
1272 for(int i = 0; i < sections.getLength(); i++) {
1273 Element section = (Element)sections.item(i);
1274 NodeList descendants = section.getElementsByTagName(SECTION_ELEMENT);
1275 if(descendants.getLength() > 0) {
1276 // if there are any descendants (which includes children) that are SECTIONS
1277 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
1278 } else {
1279 section.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
1280 }
[15222]1281 }
[22300]1282 }
1283
1284
1285 /** @return the part of the TOC XML file (which outlines doc structure)
1286 * relating to the given section. This includes the section denoted by
1287 * sectionID as well as all descendent subsections thereof.
1288 * @param docPID - a fedora pid identifying a greenstone document object.
1289 * @param sectionID - identifyies the particular section in the
1290 * document denoted by docPID, may be a section name or number.
1291 * @param structure can contain any combination of: ancestors, parent,
1292 * siblings, children, descendants, entire, specifying the portion of
1293 * the structure to retrieve.
1294 * @param info can contain any combination of: siblingPosition, numSiblings,
[26270]1295 * numChildren, documentType, requesting additional information about the structure. */
[22300]1296 public Element getSectionStructureXML(String docPID, String sectionID, String structure, String info)
1297 throws RemoteException, UnsupportedEncodingException, SAXException, IOException
1298 {
1299 // get the TableOfContents (TOC) XML datastream as a String
1300 String xmlTOC = getTOC(docPID);
[15222]1301
[22300]1302 // convert it into a DOM document
1303 InputSource source = new InputSource(new StringReader(xmlTOC));
1304 Document doc = builder.parse(source);
1305 // toplevel element docEl = <Section id="1"></Section>
1306 Element docEl = doc.getDocumentElement();
1307 addNodeTypeToDescendants(docEl);
1308 docEl.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
1309
1310 if(structure.indexOf("entire") != -1) { // don't need to find the specific section, doc root is what's required
1311 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1312 return docEl;
[15222]1313 }
[22300]1314
1315 if(sectionID.equals("")) {
1316 sectionID = "1";
1317 }
[15222]1318
[22300]1319 // Store just the number
1320 String sectionNumber = removePrefix(sectionID, SECTION);
1321 // Check whether we're requested to return the toplevel element itself
1322 // If sectionNumber=1, then the top-level element/document element
1323 // of the TOC XML is requested, so return the TOC as is.
1324 if(sectionNumber.equals("1") && structure.indexOf("descendants") != -1) {
1325 docEl = getStructureInfo(docEl.getOwnerDocument(), docEl, info);
1326 return docEl;
[15222]1327 }
1328
[22300]1329 // if the root is the section required, return that
1330 if(docEl.getTagName().equals(SECTION_ELEMENT)
1331 && docEl.getAttribute(ID).equals(sectionNumber)) {
1332 Element substructure = getSubstructure(docEl, structure);
1333 return getStructureInfo(substructure.getOwnerDocument(), docEl, info);
1334 //return docEl;
[15222]1335 }
1336
[22300]1337
1338 // Else, get all <Section> elements and find the
1339 // <Section id="sectionNumber"></Section> and return that
1340 NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT);
1341 for(int i = 0; i < sections.getLength(); i++) {
1342
1343 Element e = (Element)sections.item(i);
1344 if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) {
1345 Element substructure = getSubstructure(e, structure);
1346 return getStructureInfo(substructure.getOwnerDocument(), e, info);
1347 }
1348 }
1349
1350 return null; // not found
1351 }
1352
1353
[15222]1354 /** Implements browsing document titles of a greenstone collection stored in
1355 * the fedora repository by letter.
1356 * @return the document pids whose titles start with the given letter.
[22300]1357 * @param collName - the name of the collection.
[15222]1358 * @param letter - the starting letter to browse by.
1359 */
1360 public String[] browseTitlesByLetter(final String collName, final String letter)
1361 throws RemoteException, FedoraVersionNotSupportedException
1362 {
1363 String[] pids = null;
1364
1365 // We want to do the following kind of search (assuming letter=f
1366 // and collName=demo):
1367 // pid~greenstone:demo* title~f*
1368
1369 // We don't need to normalise the letter first (to search titles starting
1370 // with both uppercase and lowercase versions of the letter), because
1371 // Fedora always searches for both.
1372 // HOWEVER, searching for title~f* returns all documents containing f (or F)
1373 // ANYWHERE in their titles!
1374 // SOLUTION: search the collection for all titles containing f as given,
1375 // retrieving pid and title fields. Then from the list of results, select
1376 // only those titles that start with the given letter.
1377 // This may seem an unnecessarily cumbersome job (when it looked like it
1378 // should have worked with just title~f*), BUT, at least the resulting
1379 // documents will be reduced to a set of titles containing f; rather than
1380 // having to search *all* documents in the collection.
1381 final String title = letter+WILDCARD;
1382
1383 FieldSearchResult objects = findObjectsWithTitlesContaining(
1384 collName, title);
1385 ObjectFields[] results = objects.getResultList();
1386 TreeSet v = new TreeSet(); // TreeSet to return the results in
1387 //alphabetical order
1388 for(int i = 0; i < results.length; i++) {
1389 // from the result list, select those titles that don't
1390 // just *contain* the letter, but actually start with it:
1391 String resultTitle = results[i].getTitle(0);
[15659]1392 if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) {
1393 String pid = results[i].getPid();
1394 // skip the collection object itself
1395 if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
1396 v.add(pid);
1397 //LOG.debug(resultTitle);
1398 }
[15222]1399 }
1400 }
1401 pids = new String[v.size()];
1402 v.toArray(pids);
1403 return pids;
1404 }
1405
1406 /** Implements querying document DC titles of a greenstone collection stored in
1407 * the fedora repository for a term that may occur anywhere in their titles.
1408 * @return the document pids whose DC titles contain the parameter term.
1409 * @param titleContents - the word or phrase to search the collection's
1410 * document titles for. Only one word, and this method finds Greenstone
1411 * DOCUMENT titles CONTAINING that word (if any).
1412 * @param startsWith - if true, searches for titles that start with
1413 * titleContents. Else it searches for titles that contain titleContents. */
1414 public String[] searchDocumentTitles(String collName, String titleContents,
1415 boolean startsWith)
1416 throws RemoteException, FedoraVersionNotSupportedException
1417 {
1418 String[] pids = null;
1419
1420 // We want to do the following kind of search (when written in Fedora's
1421 // REST format - see http://localhost:8080/fedora/search):
[22300]1422 // pid~greenstone:<colname>-* title~<1st word of titleContents>
[15222]1423
1424 // We don't need to normalise the word first (to search titles starting
1425 // with both uppercase and lowercase versions of it), because
1426 // Fedora always searches for the normalised word.
1427
1428 // 2 difficulties:
1429 // - We can only search for single words with Fedora's Conditional Search.
1430 // Obtain pids and titles of documents containing the first word and then
1431 // we filter the titles to those containing the entire phrase of
1432 // titleContents.
1433 // - Searching for title~FirstWord returns all documents containing
1434 // this word ANYWHERE in their titles. If parameter startsWith is false,
1435 // then this is fine. But if parameter startsWith is true, then go
1436 // through all the resulting titles found (containing FirstWord), select
1437 // only pids of those titles that contain the entire phrase titleContents
1438
[22300]1439 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
[15222]1440
1441 int indexOfFirstSpace = titleContents.indexOf(' '); // check for space
1442 // if titleContents is a phrase (contains space), then it's not
1443 // a single word, in which case search for just the first word
1444 String title = titleContents; // assume it's a single word
1445 if(indexOfFirstSpace != -1) // if not single word but a phrase, store
1446 title = titleContents.substring(0, indexOfFirstSpace); // 1st word
1447
1448 FieldSearchResult objects = findObjectsWithTitlesContaining(
1449 collName, title);
1450 if(objects == null) {
1451 final String[] empty = {};
1452 return empty;
1453 }
1454
1455 // Go through all the titles found and for those that match the criteria*,
1456 // store their pid. *Criteria: titles that start with OR contain the
1457 // word OR phrase of titleContents.
1458 ObjectFields[] results = objects.getResultList();
1459 Vector v = new Vector(); // return pids in the order found
1460 for(int i = 0; i < results.length; i++) {
1461 // from the result list, select those titles that don't
1462 // just *contain* the first word, but the entire phrase of
1463 // words in titleContents:
1464 String resultTitle = results[i].getTitle(0);
1465 boolean accepted = false; // accept the resultTitle found
1466
[15659]1467 String resultPID = results[i].getPid();
1468 // skip the collection object itself, since it's not a document
1469 if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) {
[15658]1470 accepted = false;
1471 }
[15222]1472 // if titleContents is a single word and we are checking
[15658]1473 // whether resultTitle contains titleContents:
1474 else if(indexOfFirstSpace == -1) { // titleContents is a single word
[15222]1475 if(!startsWith) // titles that *contain* the word titleContents
1476 accepted = true; //accept all titles found
1477 // else startWith: accept titles starting with word titleContents
1478 else if (resultTitle.toLowerCase().startsWith(
1479 titleContents.toLowerCase()))
1480 accepted = true;
1481
[15659]1482 }
[15658]1483 else { // otherwise, titleContents is a phrase of >1 word, need
[15222]1484 // to check that the result title contains the entire phrase
1485 if(startsWith && resultTitle.toLowerCase().startsWith(
1486 titleContents.toLowerCase()))
1487 accepted = true;
1488 else if(!startsWith && resultTitle.toLowerCase().contains(
1489 titleContents.toLowerCase()))
1490 accepted = true;
1491 }
[15658]1492
[15222]1493 // if the resultTitle fit the criteria, store its pid
1494 if(accepted) {
[15659]1495 v.add(resultPID);
[15222]1496 //System.out.println(resultTitle);
1497 }
1498
1499 }
1500 pids = new String[v.size()];
1501 v.toArray(pids);
1502 return pids;
1503 }
1504
1505
1506 /**
1507 * @param collName - the collection of documents we'll be searching in.
1508 * @param titleWord - the word we'll be searching the document titles for.
1509 * (Fedora's search returns all objects whose title contains that word).
1510 *
1511 * Two kinds of search are provided by Fedora as stated in FedoraAccess.java
1512 * (see link):
1513 * <pre>
1514 * "There are two search methods: a search on all fields or a search on
1515 * specific fields. To search all fields the setTerms function of the
1516 * FieldSearchQuery must be used, with the paramater being the desired string.
1517 *
1518 * To search by specific fields, you must create an array of Condition
1519 * objects. Each condition consists of three parts:
1520 * the field to be searched (.setProperty()),
1521 * the operation to be used (.setOperator(ComparisonOperator. &lt;operator&gt;)),
1522 * and the search string (.setValue())"
1523 * </pre>
1524 * We want to use the second search method above when browsing and searching,
1525 * and search for: pid~greenstone:&lt;collName&gt;* title~&lt;letter&gt;*
1526 * or pid~greenstone:&lt;collName&gt;* title~&lt;first word of search phrase&gt;
1527 * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java.
1528 *
1529 * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based
1530 * web services are defined. (The web.xml defines the "Servlets for REST-based
1531 * interfaces to the Fedora Repository Server").
1532 * Do a search on the word "search":
1533 * fedora.server.access.FieldSearchServlet is the class we need to look at
1534 * It accesses a different Condition.java class: fedora.server.search.Condition.java
1535 * The above is what is used by the REST-based interface in FieldSearchServlet.java
1536 * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java
1537 * is what's used in the fedora client application that makes use of
1538 * the SOAP-based interface.
1539 *
1540 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/tags/upstream/src/org/acs/elated/fed/FedoraAccess.java?rev=76&format=txt">FedoraAccess.java</a>
1541 * @see <a href="http://drc-dev.ohiolink.edu/browser/elated-core/trunk/WEB-INF/src/org/acs/elated/fed/MaAPI.java?rev=275">MaAPI</a>
1542 * @see <a href="http://www.fedora.info/download/2.2.1/javadocs/fedora/server/types/gen/Condition.html">Fedora server type Condition.java</a>
1543 * @see <a href="http://source.uhi.ac.uk/fisheye/browse/svn/ctrep/trunk/fedora-ws-clients/testapp/testsrc/fedora/webservices/client/api/a/test/Search.java?r1=133&r2=165">Fedora client test Search.java</a>
1544 */
1545 protected FieldSearchResult findObjectsWithTitlesContaining(
1546 String collName, final String titleWord)
1547 throws RemoteException, FedoraVersionNotSupportedException
1548 {
[22300]1549 // Searching for pids of the form "greenstone:gs2mgdemo-*";
1550 final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD;
[15222]1551
1552 Condition[] conditions = new Condition[2];
1553 conditions[0] = new Condition("pid", ComparisonOperator.has, pid);
1554 conditions[1] = new Condition("title", ComparisonOperator.has, titleWord);
1555
1556 FieldSearchQuery query = new FieldSearchQuery();
1557 query.setConditions(conditions);
1558
1559 // We'd like pid and title returned for each object, because we'll make
1560 // use of title. We pass maxResults=null to get all objects that match
1561 // (i.e. all collections).
1562 FieldSearchResult objects = null;
[15658]1563 final String[] retrieveFields = {"pid", "title"};
[15222]1564 try {
1565 objects = AutoFinder.findObjects(
[15658]1566 APIA, retrieveFields, maxresults, query);
[15222]1567 // collection = APIA.findObjects(new String[]{"pid", "title"},
1568 // new NonNegativeInteger(Integer.toString(maxresults)), query);
1569 } catch(RemoteException ex) {
1570 if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) {
1571 // fedoraVersion is too low, searching/browsing is not possible
1572 // (because class Condition has changed after 2.0, from 2.1.1
1573 // onwards)
1574 throw new FedoraVersionNotSupportedException(fedoraVersion);
1575 } else {
1576 LOG.error(
1577 "Remote exception when calling web service operation " +
1578 "findObject() to execute search:\n" + ex.getMessage());
1579 ex.printStackTrace();
1580 throw ex;
1581 }
1582 }
1583 return objects; // return the FieldSearchResult objects found
1584 }
1585
1586 /** @return the &lt;docName&gt; in the parameter docPID (which is of the form:
1587 * greenstone:&lt;colname&gt;-&lt;docName&gt;)
1588 * @param docPID - pid of a greenstone document in the fedora repository. */
1589 public String getDocName(String docPID) {
1590 return docPID.substring(docPID.indexOf('-')+1);
1591 }
1592
1593 /** @return the &lt;name&gt; in the parameter collPID
1594 * (greenstone:&lt;name&gt;-collection)
1595 * If collPID is a docPID, this method does the same: return the &lt;name&gt;
1596 * in the docPID (greenstone:&lt;name&gt;-docID).
1597 * @param collPID - pid of a greenstone collection in the fedora repository. */
1598 public String getCollectionName(String collPID) {
1599 return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-'));
1600 }
[22300]1601
1602
1603 /** Return the TOC substructure requested
1604 * @return an element containing a copy if element e with either only its child
1605 * elements or with all its descendants and/or its ancestors or only its parent
1606 * and/or its siblings (depending on what the parameter structure specifies).
1607 * @param e - the element to start copying from and whose structure is requested.
1608 * @param structure - a string containing any combination of the values:
1609 * ancestors, parent, siblings, children, descendants,
1610 * specifying the portion of the structure to retrieve.
1611 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1612 */
1613 protected Element getSubstructure(Element original, String structure)
1614 {
1615 Document doc = builder.newDocument();
1616
1617 boolean descendants = (structure.indexOf("descendants") != -1) ? true : false;
1618 Node current = doc.importNode(original, descendants);
1619
1620 // descendants=true: import/copy descendants.
1621 // Else, copy just current node original (later copy its direct children)
1622
1623 Node parentOfCurrent = null;
1624 Node parentOfOriginal = original.getParentNode();
1625 if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot)
1626 parentOfOriginal = null;
1627 }
1628
1629 if(parentOfOriginal == null) { // no parentNode, so current is the root node.
1630 // can't get ancestors/parent/siblings, since all these need parentNode
1631 doc.appendChild(current);
1632 } else { // siblings, ancestors and parent requests all require parent node to exist
1633 // First check if we need to get ancestors, else for whether parent is required
1634 if(structure.indexOf("ancestors") != -1) {
1635 parentOfCurrent = doc.importNode(parentOfOriginal, false);
[15222]1636
[22300]1637 Node child = null;
1638 Node parent = parentOfCurrent; // the copy
1639 Node n = parentOfOriginal.getParentNode(); // the doc to copy from
1640
1641 while(n != null && n != original.getOwnerDocument()) {
1642 child = parent;
1643 parent = doc.importNode(n, false); // no descendants
1644 parent.appendChild(child);
1645 n = n.getParentNode();
1646 }
1647
1648 doc.appendChild(parent); // need to put the copied node into a document
1649 // else it won't have a parent doc (DOMSource can't work with it
1650 // without it having a document parent).
1651
1652 } else if(structure.indexOf("parent") != -1) {
1653 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1654 //parentOfCurrent.appendChild(current);
1655 doc.appendChild(parentOfCurrent);
1656 }
1657
1658 // a request for siblings is independently tested for
1659 if(structure.indexOf("siblings") != -1) {
1660 // only import parent if we didn't already import
1661 // it for a request for ancestors or parent
1662 if(parentOfCurrent == null) {
1663 parentOfCurrent = doc.importNode(parentOfOriginal, false);
1664 doc.appendChild(parentOfCurrent); // this becomes the root
1665 }
1666 // now the siblings of current (children of parentOfCurrent)
1667 NodeList children = parentOfOriginal.getChildNodes();
1668 for(int i = 0; i < children.getLength(); i++) {
1669 Node n = children.item(i);
1670
1671 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1672 if((Element)n != original) { // skip original which was already imported
1673 Node child = doc.importNode(n, false); // no descendants
1674 parentOfCurrent.appendChild(child);
1675 } else { // already imported Current element, insert at this position
1676 parentOfCurrent.appendChild(current);
[22368]1677 }
[22300]1678
1679 }
1680 }
1681 } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests
1682 // (sibling request adds the current node into a particular position)
1683 parentOfCurrent.appendChild(current);
1684 // need to put the copied node into a document
1685 // else it won't have a parent doc (DOMSource can't work with it
1686 // without it having a document parent).
1687 } else { // when only children or descendants were requested, current becomes root document
1688 doc.appendChild(current);
1689 }
1690 }
1691
1692 // if we are not recursively copying all descendants, then copy just
1693 // the childnodes of current:
1694 if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children
1695
1696 // get e's children and copy them into the new document
1697 NodeList children = original.getChildNodes();
1698 for(int i = 0; i < children.getLength(); i++) {
1699 // create copy
1700 Node n = doc.importNode(children.item(i), false);
1701 // attach it to parent
1702 current.appendChild(n);
1703
1704 // Now we need to indicate whether this new node (child) is a leaf
1705 // or not. (This is necessary for getChildrenOfSection(), else
1706 // it's hard to know if the children are leaves or have further
1707 // subsections.
1708 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1709 // we're dealing only with section children
1710
1711 // Check if the matching original had children:
1712 Element originalsChild = (Element)children.item(i);
1713 NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT);
1714 if(grandchildren.getLength() > 0) {
1715 // original's child has children, so indicate this
1716 // in the copied child:
1717 Element child = (Element)current;
1718 // child.setAttribute(TYPE, INTERNAL_NODE);
1719
1720 }
1721 }
1722 }
1723 }
1724
1725 return doc.getDocumentElement();
1726 }
1727
1728
1729 /** Return the TOC substructure with the requested structural info.
1730 * @return an element containing a copy if element e with either only its child
1731 * elements or with all its descendants and/or its ancestors or only its parent
1732 * and/or its siblings (depending on what the parameter structure specifies).
1733 * Returns null if the element, e, passed in is null.
1734 * @param doc - the new document into whose root element the structural information
1735 * will be inserted as attributes.
1736 * @param e - the element to start copying from and whose structure is requested.
1737 * @param info - a string containing any combination of the values: numChildren,
1738 * numSiblings, siblingPosition. The requested info gets added as attributes to
1739 * the returned root element.
1740 * @see <a href="http://forum.java.sun.com/thread.jspa?threadID=678472&tstart=30">Sun java thread on transforming a DOM XML to a String</a>
1741 */
1742 protected Element getStructureInfo(Document doc, Element e, String info)
1743 {
1744 if(e == null) {
1745 return null;
1746 }
[15222]1747
[22300]1748 Element root = doc.getDocumentElement();
1749
1750 if(!info.equals("")) {
[26270]1751 if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1
1752 || info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) {
[22300]1753 //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength();
1754 int numChildren = 0;
1755
1756 NodeList children = e.getChildNodes();
1757 for(int i = 0; i < children.getLength(); i++) {
1758 Node n = children.item(i);
1759 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1760 numChildren++;
1761 }
1762 }
1763
[26270]1764 if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1) {
1765 root.setAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN, Integer.toString(numChildren));
1766 }
1767 if(info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) {
1768 //String docType = (numChildren > 0) ? "hierarchy" : "simple";
1769 String docType = "hierarchy";
1770 root.setAttribute(AbstractBasicDocument.INFO_DOC_TYPE, docType);
1771 }
[22300]1772 }
1773
1774 if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings
1775 int numSiblings = 0;
1776 int siblingPosition = 0;
1777
1778 Node parent = e.getParentNode();
1779 if(parent == null) {
1780 numSiblings = 0;
1781 siblingPosition = 1;
1782 } else {
1783 //numSiblings = parent.getChildNodes().getLength();
1784 NodeList siblings = parent.getChildNodes();
1785
1786 for(int i = 0; i < siblings.getLength(); i++) {
1787 Node n = siblings.item(i);
1788 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1789 if(e == (Element)n) {
1790 siblingPosition = numSiblings+1;
1791 } else { // count every sibling section element, except e itself
1792 numSiblings++;
1793 }
[15222]1794 }
[22300]1795 }
1796 }
1797
[26270]1798 if(info.indexOf(AbstractBasicDocument.INFO_NUM_SIBS) != -1) {
1799 root.setAttribute(AbstractBasicDocument.INFO_NUM_SIBS, Integer.toString(numSiblings));
[15222]1800 }
[22300]1801
[26270]1802 if(info.indexOf(AbstractBasicDocument.INFO_SIB_POS) != -1) {
1803 root.setAttribute(AbstractBasicDocument.INFO_SIB_POS, Integer.toString(siblingPosition));
[22300]1804 }
1805 }
[15222]1806 }
1807
[22300]1808 return root;
1809 }
1810
1811
[15222]1812 /**
1813 * Return a datastream of a document, given the document's id
1814 * and the item id of the datastream which is to be retrieved.
1815 * @return the XML (in String form) of the item denoted by itemID
1816 * that's part of the fedora data object denoted by docPID.
1817 * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3
1818 * Can't retrieve images denoted by itemID using this method, only items
1819 * that are of XML format.
1820 * @param docPID - pid of a greenstone document in the fedora repository.
1821 * @param itemID - the itemID of a datastream of the fedora object
1822 * identified by docPID.
1823 */
1824 protected String getItem(String docPID, String itemID)
1825 throws RemoteException, UnsupportedEncodingException
1826 {
1827 // MIMETypedStream getDatastreamDissemination(
1828 // String pid, String dsID, asOfDateTime)
1829 MIMETypedStream datastream
1830 = APIA.getDatastreamDissemination(docPID, itemID, null);
1831 return new String(datastream.getStream(), UTF8);
1832 }
1833
1834 /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method
1835 * returns "1.2.1".
1836 * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1,
1837 * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1".
1838 * However, the string str is returned unchanged if the prefix does not occur
1839 * at the start of str.
1840 * @return the String parameter str without the prefix.
1841 * It can be used to return the number of an itemID of a greenstone document
1842 * stored in the fedora repository without the given prefix.
1843 * @param prefix - the prefix which ought to be removed from the itemID.
1844 * @param str - the value of the itemID.
1845 */
1846 protected String removePrefix(String str, String prefix) {
1847 // do nothing in those cases where the prefix is not in param str
1848 if(!str.startsWith(prefix))
1849 return str;
1850 // otherwise:
1851 if(prefix.equals(EX+".") || prefix.equals(DC+".")) {
1852 return "1" + str.substring(prefix.length());
1853 } else {
1854 return str.substring(prefix.length());
1855 }
1856 }
1857
1858 /** Given a number of the form x(.y.z), this method returns this number
1859 * as is, except when x = 1, in which case, it would return .y.z
1860 * That is, given number=3.2.1, this method would return 3.2.1
1861 * But, given number=1.2.3, this method would return .2.3.
1862 * When number=1, it is NOT a special case: "" is returned as explained.
1863 * @param number - a proper (fedora-greenstone document) section number
1864 * @return the same number as it ought to be for the associated EX, DC datastreama.
1865 */
1866 protected String convertToMetaNumber(String number) {
1867 if(number.startsWith("1.") || number.equals("1"))
1868 return number.substring(1); // remove the first char: the initial '1'
1869 else return number;
1870 }
1871
1872 /** @return fedora's baseURL. It's of the form
1873 * "http://localhost:8080/fedora" */
1874 public String getBaseURL() { return baseURL; }
1875
1876 /** @return the portAddressURL (in use) of the Fedora APIA
1877 * web service (should be the endpoint location in the APIA's
1878 * WSDL file).
1879 * It's usually of the form baseURL+"/services/access" */
1880 public String getPortAddressURL() {
1881 return this.baseURL + this.portAddressSuffix;
1882 }
1883
1884 /** @return the baseURL for gsdlAssocFiles */
1885 public String getAssocFileBaseURL() { return baseURL + "/get/"; }
1886
1887 public static void main(String args[]) {
1888 try {
1889 FedoraConnection fedoraCon
1890 = new FedoraConnection(new File("fedoraGS3.properties"));
1891
1892 String[] pids = null;
1893 pids = fedoraCon.getCollections();
1894 String[] titles = fedoraCon.getCollectionTitles(pids);
1895 for(int i = 0; i < pids.length; i++) {
1896 System.out.println("extracted title:" + titles[i]);
1897 String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]);
1898 String[] docTitles = fedoraCon.getDocTitles(docPIDs);
1899 for(int j = 0; j < docPIDs.length; j++) {
1900 System.out.println("\tExtr doc title: " + docTitles[j]);
1901 }
1902 }
1903
1904 String PID = "greenstone:gs2mgdemo-collection";
1905 String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1";
1906 String dcXML = fedoraCon.getDC(PID);
1907 String exXML = fedoraCon.getEX(PID);
1908 String tocXML = fedoraCon.getTOC(docPID);
1909 System.out.println("Dublin Core Metadata for " + PID
1910 + " is:\n" + dcXML);
1911 System.out.println("GS3 extracted metadata for " + PID
1912 + " is:\n" + exXML);
1913 System.out.println("Table of Contents for " + docPID
1914 + " is:\n" + tocXML);
1915
1916
1917 String[] sectionNames = fedoraCon.getSectionNames(docPID);
1918 System.out.println("\nSection names for " + docPID + " are:");
1919 for(int i = 0; i < sectionNames.length; i++)
1920 System.out.println(sectionNames[i]);
1921
1922 String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID);
1923 //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers);
1924 String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames);
1925 System.out.println("\nSection numbers for " + docPID + " are:");
1926 for(int i = 0; i < sectionNumbers.length; i++) {
1927 //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]);
1928 System.out.println(sectionNames[i] + " " + sectionTitles[i]);
1929 }
1930
1931 String sectionID = "SECTION1"; //SECTION1.5
1932 System.out.println("\n");
1933
1934 System.out.println(
1935 "browsing greenstone's gs2mgdemo collection by (first) letter F:");
1936 pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f");
1937 for(int i = 0; i < pids.length; i++)
1938 System.out.println(pids[i]);
1939
1940 System.out.println(
1941 "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:");
1942 pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false);
1943 for(int i = 0; i < pids.length; i++)
1944 System.out.println(pids[i]);
1945
1946 System.out.println("\nDone - exiting.");
1947 System.exit(0);
1948 } catch(RemoteException re) {
1949 System.out.println("Remote Exception when calling web service operation\n" + re.getMessage());
1950 re.printStackTrace();
1951 } catch(Exception e) {
1952 System.out.println("Unable to instantiate FedoraConnection\n" + e);
1953 e.printStackTrace();
[22300]1954 //LOG.error("Unable to instantiate FedoraConnection\n" + e, e);
[15222]1955 }
1956 }
1957}
Note: See TracBrowser for help on using the repository browser.