/**
*#########################################################################
* FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
* of the Greenstone digital library suite from the New Zealand Digital
* Library Project at the * University of Waikato, New Zealand.
*
* Copyright (C) 2008 New Zealand Digital Library Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*########################################################################
*/
package org.greenstone.fedora.services;
import java.io.StringReader;
import org.apache.log4j.Logger;
import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
import org.greenstone.gsdl3.util.GSXML;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Attr;
import org.w3c.dom.Text;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import java.io.File;
import java.util.HashMap;
import java.util.Properties;
import java.util.Map;
import javax.swing.JOptionPane;
import org.xml.sax.SAXException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import javax.net.ssl.SSLHandshakeException;
import java.net.Authenticator;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.PasswordAuthentication;
import java.rmi.RemoteException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
/**
* Class that extends FedoraConnection in order to be able to use
* Fedora's web services to retrieve the specific datastreams of
* Greenstone documents stored in Fedora's repository. This class
* provides methods that convert those datastreams into Greenstone3
* XML response messages which are returned.
* @author ak19
*/
public class FedoraGS3Connection
extends FedoraConnection implements FedoraToGS3Interface,
FedoraToGS3Interface.Constants
{
/** The logging instance for this class */
private static final Logger LOG = Logger.getLogger(
FedoraGS3Connection.class.getName());
/** Default name of Fedora index */
private static final String DEFAULT_FEDORA_INDEX = "FgsIndex"; //"BasicIndex" for older versions of GSearch
/** Complete list of services that our FedoraGS3 would support
* if everything goes well. If a connection to FedoraGSearch
* cannot be established, the query services will no longer be
* available. The actual services supported are given by member
* variable serviceNames. */
protected static final String[] SERVICES = {
"DocumentContentRetrieve", "DocumentMetadataRetrieve",
"DocumentStructureRetrieve",
"TextQuery", "FieldQuery",
"ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
};
/** List of services actually supported by our FedoraGS3 repository
* after construction. If FedoraGenericSearch can't be connected to,
* then query services will not be offered */
protected String[] serviceNames;
/** The object used to connect to FedoraGenericSearch, which is used
* for doing full-text searching */
protected GSearchConnection fedoraGSearch;
/** The url for the wsdl file of FedoraGSearch's web services
* by default this will be the Fedora server's base URL
* concatenated to "gsearch/services/FgsOperations?wsdl" */
protected String gSearchWSDLURL;
/** The last part of the gSearchWSDL URL. The first part is
* the same as the fedora server's base url. */
protected String gSearchWSDLSuffix;
/** The name of the index that FedoraGSearch will index the GS3
* documents into. If no name is specified in the properties file,
* this will default to FedoraIndex. */
protected String gSearchIndexName;
/** 5 argument constructor is the same as that of superclass FedoraConnection:
* @param protocol can be either http or https
* @param host is the host where the fedora server is listening
* @param port is the port where the fedora server is listening
* @param fedoraServerUsername is the username for administrative
* authentication required to access the fedora server.
* @param fedoraServerPassword is the password for administrative
* authentication required to access the fedora server. If no password was set
* when installing Fedora, leave the field "".
* Instantiates a FedoraGS3Connection object which connects to Fedora's
* web services through stub classes and tries to connect to FedoraGSearch's
* web services through the default WSDL location for it
* ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
* call setGSearchWSDLURL(url) after the constructor instead.
*/
public FedoraGS3Connection(String protocol, String host, int port,
String fedoraServerUsername, String fedoraServerPassword)
throws ParserConfigurationException, MalformedURLException,
SSLHandshakeException, RemoteException, AuthenticationFailedException,
NotAFedoraServerException, ConnectException, Exception
{
super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** No-argument constructor which is the same as that of superclass
* FedoraConnection: it displays a small dialog requesting input for the
* host, port, administrative password and username of the fedora server.
* If no password was set on the fedora repository when installing it,
* the user can leave the password field blank. */
public FedoraGS3Connection()
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super();
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** Single-argument constructor which is the same as that of superclass
* FedoraConnection: it takes the name of the properties file where
* connection initialisation values may already be provided and then
* displays a small dialog requesting input for the host, port,
* administrative password and username of the fedora server showing
* the values in the properties file as default. If the necessary
* initialisation are not present in the file, the corresponding fields
* in the dialog will be blank.
* If no password was set on the fedora repository when installing it,
* the user can leave the password field blank. */
public FedoraGS3Connection(File propertiesFilename)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super(propertiesFilename);
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** The superclass constructor calls this method passing any preset
* properties loaded from a propertiesFile. This method is overridden
* here in order to instantiate the gSearchConnection based on the
* - gSearchWSDLSuffix that will be appended to the fedora base url.
* (If one was not provided in the properties file, gSearchWSDLURL defaults
* to something of the form
* "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
* which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
* "gsearch/services/FgsOperations?wsdl".
* - name of the index into which the GS3 documents have been indexed
* and which FedoraGenericSearch should use to perform searches. If none is
* given in the properties file, then the index name defaults to "FgsIndex"
* (no longer BasicIndex or FedoraIndex).
* @param properties is the Properties Map loaded from a properties file
* (if there was any) which specifies such things as host and port of the
* FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
* At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
* to whatever the final value of this.gSearchWSDLURL' suffix is, and
* "gsearch.indexName" will be set to to whatever the final value of
* this.gSearchIndexName is.
*/
protected void setInitialisationProperties(Properties properties)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super.setInitialisationProperties(properties);
// gsearchWSDL URL suffix, if not specified, defaults to
// "fedoragsearch/services/FgsOperations?wsdl" which is
// concatenated to the baseURL of fedora to give the gsearchWSDLURL.
this.gSearchWSDLSuffix = properties.getProperty(
"gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
// Set the property to whatever this.gSearchWSDLURL is now,
// so that it will be written out to the properties file again
properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
// Similarly for the name of the index FedoraGenericSearch should use
// when performing searches for GS3 docs stored in Fedora's repository.
this.gSearchIndexName = properties.getProperty(
"gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
properties.setProperty("gsearch.indexName", this.gSearchIndexName);
// Create a connection to FedoraGSearch's web services:
initSearchFunctionality();
}
/** Overridden init method to work with the 5 argument constructor, so that we can
* bypass using setInitialisationProperties() which works with a Properties map.
*/
protected void init(String protocol, String host, String port,
final String fedoraServerUsername, final String fedoraServerPassword)
throws ParserConfigurationException, MalformedURLException,
AuthenticationFailedException, RemoteException, Exception
{
super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
// Now need to set username and password for accessing WSDL (after GSearch 2.2)
// http://stackoverflow.com/questions/3037221/401-error-when-consuming-a-web-service-with-http-basic-authentication-using-cxf
// The java.net.Authenticator can be used to send user credentials when needed.
Authenticator.setDefault(new Authenticator() {
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(
fedoraServerUsername,
fedoraServerPassword.toCharArray());
}
});
initSearchFunctionality();
}
/** Init method that instantiates a GSearchConnection object used
* to work with the separate FedoraGSearch web services.
* The url of the WSDL for FedoraGSearch's web services is worked out
* from the baseURL of the Fedora server.
*/
protected void initSearchFunctionality()
{
try {
this.fedoraGSearch = null;
this.fedoraGSearch = new GSearchConnection(
gSearchWSDLURL, gSearchIndexName);
this.serviceNames = SERVICES;
} catch(Exception e){
LOG.error("Cannot connect to FedoraGSearch's web services at "
+ gSearchWSDLURL + "\nQuery services will not be available.", e);
// Exception, e, as parameter prints the stacktrace of the exception to the log
// If an exception occurs, something has gone wrong when
// trying to connect to FedoraGSearch's web services. This
// means, we can't offer query services, as that's provided
// by FedoraGSearch
serviceNames = null;
int countOfNonQueryServices = 0;
for(int i = 0; i < SERVICES.length; i++) {
// do not count query services
if(!SERVICES[i].toLowerCase().contains("query")) {
countOfNonQueryServices++;
}
}
// Services now supported are everything except Query services
serviceNames = new String[countOfNonQueryServices];
int j = 0;
for(int i = 0; i < SERVICES.length; i++) {
if(!SERVICES[i].toLowerCase().contains("query")) {
serviceNames[j] = SERVICES[i];
j++; // valid serviceName, so increment serviceName counter
}
}
}
}
/** @return the gSearchWSDLURL, the url of the WSDL for the
* FedoraGSearch web services */
public String getGSearchWSDLURL() { return gSearchWSDLURL; }
/** Sets the member variable gSearchWSDLURL that specify the location of
* the WSDL file of FedoraGSearch's web services. Then it attempts
* to instantiate a connection to those web services.
* @param url is the new url of the GSearch web services WSDL file */
public void setGSearchWSDLURL(String url) {
this.gSearchWSDLURL = url;
initSearchFunctionality();
}
/** @return the gSearchIndexName, the name of the index Fedora Generic
* Search will search in (where GS3 docs have been indexed into). */
public String getGSearchIndexName() { return gSearchIndexName; }
/** Sets the member variable gSearchIndexName that specifies the name
* of the index containing indexed GS3 documents. Then it attempts
* to instantiate a connection to the Fedora GSearch web services using
* this changed value for indexName.
* @param indexName is the new name of the index containing indexed GS3
* docs that GSearch should search in. */
public void setGSearchIndexName(String indexName) {
this.gSearchIndexName = indexName;
initSearchFunctionality();
}
/** @return the array of the services actually supported by FedoraGS3 */
protected String[] getServiceNames() { return this.serviceNames;}
/**
* For finding out if the sectionNumber is given as part of the docID.
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return true if the document identifier docID contains a section-
* number, and false if it consists solely of the docPID.
* That is, true is returned if
*
* */
protected boolean containsSectionNumber(String docID) {
// if there are two hyphens in the docID, then there are sections
// (and the section number is appended at end of docID)
// docID = "greenstone:colName--"
return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
}
/** This method will extract the docPID from docID and return it.
* (If a sectionNumber is suffixed to the docID, the docPID which is
* the prefix is returned; otherwise the docID is the docPID and is
* returned)
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return only the docPID portion of the docID.
*/
protected String getDocPIDFromDocID(String docID) {
if(containsSectionNumber(docID))
return docID.substring(0, docID.lastIndexOf(HYPHEN));
// else (if there's no sectionNumber), docID is the docPID
return docID;
}
/** This method will return the section Number, if there's any
* suffixed to the docID. Otherwise it will return the empty string
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return only the sectionID portion of the docID - if any, else "".
*/
protected String getSectionIDFromDocID(String docID) {
if(containsSectionNumber(docID))
return docID.substring(
docID.lastIndexOf(HYPHEN)+1, docID.length());
return "";
}
/** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
* response message that gives the metadata for each collection identified
* @param collIDs is an array of fedora pids identifying collections in the
* fedora repository
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX metadata for all the requested collections */
public String getCollectionMetadata(String[] collIDs) {
return getMetadata(collIDs, new String[] {"all"});
}
/** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
* response message is returned containing the metadata for each document.
* @param docIDs is an array of document identifiers (docID can either be
* <pid>s items (documents) in the fedora repository, or
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX, DC, DLS metadata for all the requested documents
* @param metadata is the list of metadata elements to be retrieved for each doc */
public String getDocumentMetadata(String[] docIDs, String[] metadata) {
return getMetadata(docIDs, metadata);
}
/** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
* response message that gives the metadata for the collection identified
* @param collID is a fedora pid identifying a collection in its repository
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX metadata for the requested collection
* @param metadata is the list of metadata elements to be retrieved for each doc */
public String getCollectionMetadata(String collID) {
return getMetadata(new String[] {collID}, new String[] {"all"});
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing the metadata for the document.
* @param docID is a document identifier (docID can either be a <pid>
* of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX, DC, DLS metadata for the requested document */
public String getDocumentMetadata(String docID, String[] metadata) {
return getMetadata(new String[] {docID}, metadata);
}
/** @return a greenstone DocumentMetadataRetrieve response for the
* documents or collections indicated by the docIDsOrCollIDs.
* @param docIDsOrCollIDs is an array of identifiers which may be either the
* fedora pids for collections, or otherwise may be a document identifier.
* In the last case, the document ID may consist of either
* "documentPID-sectionNumber" or may just be just fedora documentPID
* @param metadata is the list of metadata elements to be retrieved for each doc */
public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDsOrCollIDs.length; i++) {
// create the containing the metadata
// for each document docID
Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
docNodeList.appendChild(docNode);
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
try{
return FedoraCommons.elementToString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Method that takes a new DOM document, as well as an identifier of either
* a collection or document (which may be a fedora pid for the collection
* or document, or may be the documentPid-sectionNumber for a document) and
* returns a documentNode element for it:
* <documentNode><metadataList>
* <metadata name="">value</metadata>
* ...
* </metadataList></documentNode>
* @return documentNode containing the metadata for the collection or
* document given by parameter ID
* @param id denotes a collection pid, a document pid or a docID of the
* form "documentpid-sectionNumber"
* @param metadata is the list of metadata elements to be retrieved for each doc */
protected Element getMetadata(Document doc, String id, String[] metadata)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
// We're going to create the documentNode nested inside the following
// documentNodeList:
//
//
// value
//
// ...
//
//
// - the docNode on which a metadata
// retrieve is being performed
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(id);
docNode.setAttributeNode(attribute);
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
String ex = "";
String dc = "";
String dls = "";
if(id.endsWith(_COLLECTION)) { // docID refers to a collection
// Obtain the "EX" datastream (extracted metadata) for the collection
ex = this.getEX(id);
}
else { // docID refers to a document
// work out the document's fedora PID and section ID, and then
// obtain the EX (extracted metadata) and DC datastreams for the doc
// Note that EX/DC for pid="greenstone:-docPID-1"
// is the same as for pid="greenstone:-docPID"
// That is, refers to the toplevel document docPID
// If requested for top-level document, there may also be DLS meta
String sectionID = getSectionIDFromDocID(id);
String docPID = getDocPIDFromDocID(id);
if(sectionID.equals("") || sectionID.equals("1")) {
// metadata of toplevel document is requested
ex = this.getEX(docPID); // slightly faster than doing
//getSectionEXMetadata(docID, "1")
dc = this.getDC(docPID);
dls = this.getDLS(docPID);
}
else {
ex = getSectionEXMetadata(docPID, sectionID);
dc = getSectionDCMetadata(docPID, sectionID);
}
}
String metafields = "";
for(int i = 0; i < metadata.length; i++) {
metafields = metafields + metadata[i] + "|";
}
// Adding in metadata sets in alphabetical order
// DC metadata for a top-level document is different from EX, DLS:
// only the element's namespace prefix is "dc", the rest of a tagname
// is unknown.
if(!dc.equals("")) {
addMetadataWithNamespacedTagNames(doc, metadataList,
dc, DC, metafields);
}
// Check if we were supposed to process dls and dc metadata
// as well. We only ever do this for top-level documents,
// in which case, dls and dc will be non-empty strings
if(!dls.equals("")) {
addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
}
// we definitely have an EX metadatastream for each
// collection object, top-level document object,
// and document section item
addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
// now the metadataList has been built up
docNode.appendChild(metadataList);
return docNode; // return containing the metadata
}
/** This method retrieves all the metadata elements in the metaDataStream
* parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
* metadataSetNS is the namespace of each tag, and creates a new element of
* the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
* each. Each of these are then appended to the metadataList parameter.
* @param doc is the Document object using which the new metadata Elements
* are to be constructed
* @param metadataList is the <metadataList> Element to which the new
* metadata Elements are to be appended as children.
* @param metaDatastream the metadata datastream in string form (e.g. the
* Dublin Core metadata stored in the Fedora repository).
* @param metadataSet is the constant datastream identifier, e.g. "DC".
* At present this method applies to the DC metadata and any others like it
* where each tagname is different except for the constant dc: namespace.
* @param metafields is a | separated string containing the metadatafields to
* extract or "all" if all fields are requested
*/
protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
String metaDatastream, String metadataSet, String metafields)
throws SAXException, IOException
{
Document src = builder.parse(
new InputSource(new StringReader(metaDatastream)));
// The following doesn't work for some reason: to retrieve all elements
// whose namespace prefix starts with "dc", we pass "*" for localName
//NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
// Longer way: get the children of the root document
NodeList children = src.getDocumentElement().getChildNodes();
for(int i = 0; i < children.getLength(); i++) {
String nodeName = children.item(i).getNodeName();
// check that the nodename starts with the metadataSet ("dc") namespace,
// which simultaneously ensures that the node's an element:
if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
// need to have a period for Greenstone instead of Fedora's colon
nodeName = nodeName.replace(COLON, PERIOD);
if(metadataSet.equals(DC)) { // dc:title -> dc.Title
nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
+ nodeName.substring(4);
}
// get the requested metadata fields
if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
Element metatag = (Element)children.item(i);
String value = FedoraCommons.getValue(metatag);
// value
// we're going to put this in our metadata element as
// value
// create metadata of (name, value) pairs in target DOM (doc)
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(nodeName);
metadata.setAttributeNode(attribute);
Text content = doc.createTextNode(value);
metadata.appendChild(content);
metadataList.appendChild(metadata);
}
}
}
}
/** This method retrieves all the metadata elements in the metaDataStream
* of the form <"namespace:"metadata name="metadataName">value</metadata>
* where "namespace" is the namespace prefix of each tag, and metadataName
* is the name of the metadata (like author, title). For each element
* it creates a corresponding new element of the form
* <metadata name="namespace:metadataName">value</metadata>.
* Each of these are then appended to the metadataList parameter.
* @param doc is the Document object using which the new metadata Elements
* are to be constructed
* @param metadataList is the <metadataList> Element to which the new
* metadata Elements are to be appended as children.
* @param metaDatastream the metadata datastream in string form (e.g. the
* EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
* repository).
* @param metadataSet is the constant datastream identifier,
* e.g. "DLS" or "EX".
* At present this method applies to the DLS and EX metadata as they have
* constant tagnames throughout.
* @param metafields is a | separated string containing the metadatafields to
* extract or "all" if all fields are requested.
*/
protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
String metaDatastream, String metadataSet, String metafields)
throws SAXException, IOException
{
// Namespace prefix can be "ex:" or "dls:"
String namespacePrefix = "";
if(!metadataSet.equals(EX)) {
// need to have a period for Greenstone instead of Fedora's colon
namespacePrefix = metadataSet.toLowerCase() + PERIOD;
}
Document src = builder.parse(
new InputSource(new StringReader(metaDatastream)));
NodeList metaTags = src.getElementsByTagName(
metadataSet.toLowerCase()+COLON+METADATA);
// Looking for tagnames: or
for(int i = 0; i < metaTags.getLength(); i++) {
Element metatag = (Element)metaTags.item(i);
// extract the metadata of (name, value) pairs from src DOM
// look for value
String name = metatag.hasAttribute(NAME) ?
metatag.getAttribute(NAME) : "";
// sometimes, there are several metadata for the same name, in this
// case, look for a qualifier and append its value to the name to
// distinguish it uniquely:
if(metatag.hasAttribute(QUALIFIER)) {
name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
}
name = namespacePrefix + name; // prefix with namespace, if any
if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
String value = FedoraCommons.getValue(metatag);
// create metadata of (name, value) pairs in target DOM (doc)
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(name);
metadata.setAttributeNode(attribute);
Text content = doc.createTextNode(value);
metadata.appendChild(content);
metadataList.appendChild(metadata);
}
}
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing ONLY the Title metadata for the document.
* @param docID is a document identifier (docID can either be a <pid>
* of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* Title metadata for the requested document */
public String getTitleMetadata(String docID) {
return getTitleMetadata(new String[] { docID });
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing ONLY the Title metadata for the documents.
* @param docIDs is a list of document identifiers (where docID can either be
* a <pid> of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* Title metadata for all the requested documents */
public String getTitleMetadata(String[] docIDs) {
// Must create message of the following form:
//
// sometitle
//
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDs.length; i++) {
Element docNode = getTitleMetadata(doc, docIDs[i]);
docNodeList.appendChild(docNode);
}
}catch(Exception e) {
ex = new FedoraGS3RunException(e);
//ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
ex.setSpecifics("EX metadata datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
try{
return FedoraCommons.elementToString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Method that takes a new DOM document, as well as an identifier of either
* a document or document section and returns a documentNode element containing
* the title metadata for it:
* <documentNode nodeID="docID"><metadataList>
* <metadata name="Title">sometitle</metadata>
* </metadataList></documentNode>
* @return documentNode containing the metadata for the collection or
* document given by parameter ID
* @param docID denotes the id of a document or a document section, so id
* is either a document-pid or it's of the form documentpid-sectionNumber */
protected Element getTitleMetadata(Document doc, String docID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
// Returns a docNode element of the following form:
//
// sometitle
//
//
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(docID);
docNode.setAttributeNode(attribute);
//
Element metaList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
//
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
// if we connect it all up (append children), we can immediately add
// the name attribute into the metadata element:
metaList.appendChild(metadata);
docNode.appendChild(metaList);
metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
String title = "";
String sectionID = getSectionIDFromDocID(docID);
String docPID = getDocPIDFromDocID(docID);
// check if title of toplevel document is requested
if(sectionID.equals(""))
title = this.getDocTitle(docPID);
else { // title of document section
title = this.getSectionTitle(docPID, sectionID);
}
metadata.appendChild(doc.createTextNode(title));
return docNode;
}
/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
* containing the requested portion of the document structure of the documents
* indicated by docIDs:
* @param docID is the document identifier of the document whose hierarchical
* structure is requested. The name of the collection is already included in the
* docID for a Fedora DL.
* @param structure - strings specifying the required structure of the document.
* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
* @param info - strings specifying the required structural info of the document.
* It can be any combination of: siblingPosition, numSiblings, numChildren.
*/
public String getDocumentStructure(String docID, String[] structure, String[] info) {
return getStructure(new String[]{docID}, structure, info);
}
/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
* containing the requested portion of the document structure of the documents
* indicated by docIDs:
* @param docIDs is an array of document identifiers of documents whose
* hierarchical structures are requested. The name of the collection is already
* included in the docID for a Fedora DL.
* @param structure - strings specifying the required structure of each document.
* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
* @param info - strings specifying the required structural info of each document.
* It can be any combination of: siblingPosition, numSiblings, numChildren.
*/
public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
return getStructure(docIDs, structure, info);
}
/**
* Returns a greenstone3 DocumentStructureRetrieve XML response message
* containing the document structures for the given docIDs.
* Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
* greenstone formatted XML is returned. The requested section of the table
* of contents (TOC) for a document is converted into the greenstone3 xml
* format that is returned upon DocumentStructureRetrieve requests.
* @param docIDs the documentIDs for which the section's structure is returned;
* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
* @param structure - the structure of the sections to return. Can be any combination of:
* ancestors, parent, siblings, children, descendants, entire.
* @param infos - strings containing any combination of the values: numChildren, numSiblings,
* siblingPosition. The requested info gets added as attributes to the returned root element.
* @return a greenstone3 DocumentStructureRetrieve XML response message in
* String format with the structure of the docIDs requested.
*/
protected String getStructure(String[] docIDs, String[] structure, String[] infos)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
// append the for the docIDs
// to the docNodeList
//getStructureElement(docNodeList, docIDs, levels);
getStructureElement(docNodeList, docIDs, structure, infos);
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("(requested portion of) TOC datastream");
}
// insert our into a GS3 response message
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
try{
return FedoraCommons.elementToString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Given a <documentNodeList> portion of a greenstone3
* DocumentStructureRetrieve XML response message, this method will populate
* it with the <documentNodes> that represent the structure of the given docIDs.
* @param docNodeList is a <documentNodeList> to which <documentNodes> of
* the doc structures are appended.
* @param docIDs the documentIDs for which the section's structure is returned;
* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
* @param structures - the structure of the sections to return. Can be any combination of:
* ancestors, parent, siblings, children, descendants, entire.
* @param infos - a string containing any combination of the values: numChildren, numSiblings,
* siblingPosition. The requested info gets added as attributes to the returned root element.
*/
protected void getStructureElement(Element docNodeList, String[] docIDs,
String[] structures, String[] infos)
throws RemoteException, UnsupportedEncodingException, SAXException,
IOException
{
// Make one string out of requested structure components, and one string from info components
String structure = "";
String info = "";
for(int i = 0; i < structures.length; i++) {
structure = structure + structures[i] + "|";
}
for(int i = 0; i < infos.length; i++) {
info = info + infos[i] + "|";
}
// process each docID
for(int i = 0; i < docIDs.length; i++) {
// work out the document's fedora PID and section ID
String sectionID = getSectionIDFromDocID(docIDs[i]);
String docPID = getDocPIDFromDocID(docIDs[i]);
if(sectionID.equals("")) {
sectionID = "1";
}
// get the required section, along with children or descendants
Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
Document doc = docNodeList.getOwnerDocument();
// copy-and-convert that structure into a structure format for GS3
Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
if(!info.equals("")) {
//
//
//
// ...
//
Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
Element root = srcDocElement.getOwnerDocument().getDocumentElement();
if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
Element infoEl = doc.createElement(GSXML.INFO_ATT);
infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
nodeStructureInfo.appendChild(infoEl);
}
if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
Element infoEl = doc.createElement(GSXML.INFO_ATT);
infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
nodeStructureInfo.appendChild(infoEl);
}
if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
Element infoEl = doc.createElement(GSXML.INFO_ATT);
infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
nodeStructureInfo.appendChild(infoEl);
}
if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
Element infoEl = doc.createElement(GSXML.INFO_ATT);
infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
nodeStructureInfo.appendChild(infoEl);
}
docNode.appendChild(nodeStructureInfo);
}
// add it to our list of documentNodes
docNodeList.appendChild(docNode);
}
}
/**
* Takes the portion of the XML document outlining the structure of the
* document (section)--in the format this is stored in Fedora--and returns
* Greenstone 3 DOM XML format for outlining document structure.
* @return a <documentNode> element that contains a greenstone3
* DocumentStructureRetrieve XML corresponding to the parameter Element section
* (which is in fedora XML), for the document indicated by docID.
* @param requestingDocID is the identifier of the document for which the
* structure was requested. It's this document's children or descendants that
* will be returned. Note that this is not always the same as (clear from)
* parameter docID.
* @param docID is the documentID for which the section's structure is
* returned where docID = "docPID-sectionNumber".
* @param section - the fedora section XML that is being mirrored in
* greenstone3 format.
*/
protected Element getStructure(Document doc, String requestingDocID,
String docID, Element section)
{
// we want to mirror the section's DOM (given in fedora XML) in
// greenstone3's XML for a DocumentStructureRetrieve response.
// - the docNode on which a structure retrieve
// is being performed
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
docNode.setAttributeNode(attribute);
//
Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
//
Element rootNode = createDocNodeFromSubsection(doc, section, docID);
// fills in the subtree of the rootNode in our nodeStructure element
createDocStructure(doc, section, rootNode, docID);
//where section represents the root section
nodeStructure.appendChild(rootNode);
docNode.appendChild(nodeStructure);
return docNode;
}
/** Recursive method that creates a documentStructure mirroring parameter
* section, starting from parameter parent down to all descendants
* @param section is the XML <Section> in the fedora repository's TOC
* for the docPID whose substructure is to be mirrored
* @param parent is the XML documentNode in the greenstone repository whose
* descendants created by this method will correspond to the descendants of
* parameter section.
* @param doc is the document containing the parent;
* @param docPID is the prefix of all nodeIDs in the parent's structure
*/
protected void createDocStructure(
Document doc, Element section, Element parent, String docPID)
{
// get the section's children (if any)
NodeList children = section.getChildNodes();
for(int i = 0; i < children.getLength(); i++) {
Node n = children.item(i);
if(n.getNodeName().equals(SECTION_ELEMENT)) {
//then we know it's an element AND that its tagname is "Section"
Element subsection = (Element)n;
Element child = createDocNodeFromSubsection(doc, subsection, docPID);
parent.appendChild(child);
// recursion call on newly found child-element and subsection
createDocStructure(doc, subsection, child, docPID);
}
}
}
/** Given a particular subsection element, this method creates a
* Greenstone3 DocumentNode element that mirrors it.
* @param doc is the document that will contain the created DocumentNode
* @param docID is the prefix of all nodeIDs in the parent's structure
* @param subSection is the XML <Section> in the fedora repository's
* TOC for the docPID which will be mirrored in the greenstone XML
* documentNode that will be returned.
* @return a greenstone <documentNode> that represents the fedora TOC's
* <Section> element passed as parameter subSection. */
protected Element createDocNodeFromSubsection(
Document doc, Element subSection, String docID)
{
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
docNode.setAttributeNode(docType);
Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
String sectionID = subSection.hasAttribute(ID) ?
subSection.getAttribute(ID) : "";
if(sectionID.equals("1")
&& subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
// reset the attribute without the section number (just "docID" may be important for democlient?)
nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
} else {
nodeID.setValue(docID + HYPHEN + sectionID);
}
//nodeID.setValue(docID + HYPHEN + sectionID);
docNode.setAttributeNode(nodeID);
Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
}
docNode.setAttributeNode(nodeType);
return docNode;
}
/** Given an identifier that is either a docPID or a concatenation of
* docPID+sectionID, this method works out the fedora assigned docPID and
* sectionID and then calls getContentBody(docPID, sectionID) with those.
* @param docID is expected to be of the form
* "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
* "greenstone:<collectionName>-<docPID>"
* If it is "greenstone:<collectionName>-<docPID>", then the content for
* "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
* is returned! */
public String getContent(String docID) {
return this.getContent(new String[]{docID});
}
/** Given an identifier that is a concatenation of docID+sectionID, this
* method works out the fedora assigned docPID and sectionID and then calls
* getContentBody(docPID, sectionID) with those.
* @param docIDs is an array of document identifiers of the form
* "greenstone:<collectionName>-<docPID>-<sectionNumber>"
* If it is "greenstone:<collectionName>-<docPID>", then the content for
* "greenstone:<collectionName>-Section1" is returned! */
public String getContent(String[] docIDs) {
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDs.length; i++) {
// get the sectionID and docPID from the docID
String sectionID = this.removePrefix(
getSectionIDFromDocID(docIDs[i]), SECTION);
String docPID = getDocPIDFromDocID(docIDs[i]);
if(sectionID.equals("")) // if no section is specified, get
sectionID = "1"; // get the content for Section id="1"
// Get the contents for the requested section of document docPID
String sectionContent = this.getContentBody(docPID, sectionID);
// set the nodeID attribute
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
nodeId.setValue(docIDs[i]); // just set the docID which will contain
// the docPID (and sectionID if already present)
docNode.setAttributeNode(nodeId);
// set the text content to what was retrieved
Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
Text textNode = doc.createTextNode(sectionContent.trim());
nodeContent.appendChild(textNode);
docNode.appendChild(nodeContent);
//add the documentNode to the docNodeList
docNodeList.appendChild(docNode);
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("requested doc Section datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
try{
return FedoraCommons.elementToString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Gets the contents of a textNode from a section.
* @return the text content of a section.
* @param docPID the pid of the document from which a section's text is to
* be retrieved.
* @param sectionID is the section identifier of the document denoted by
* docPID whose text is to be returned.
*/
protected String getContentBody(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
String section = this.getSection(docPID, sectionID);
// the content is nested inside a element,
// we extract it from there:
InputSource source = new InputSource(new StringReader(section));
Document doc = builder.parse(source);
// The document Element is the we want.
// Get its text contents:
section = FedoraCommons.getValue(doc.getDocumentElement());
// we are going to remove all occurrences of "_httpdocimg_/"
// that precede associated filenames, because that's a GS3
// defined macro for resolving relative urls. It won't help
// with documents stored in fedora.
section = section.replaceAll(GS3FilePathMacro+"/", "");
return section;
}
/** Here we create the greenstone's response message element:
* <message≶<response><content></response></message>
* @return a greenstone response-message element.
* @param doc - the Document object which should me used to create the
* <message> and <response> elements
* @param content - the element that is to be nested inside <response>
* @param ex - any exception that occurred when trying to create
* the content parameter
* @param responseType - the value for the type attribute of <response>,
* such as "describe", "retrieve", "browse", "query"...
* @param originator - indiates the collectionName or service (like
* DocumentContentRetrieve) from where this response message originates
*/
protected Element createResponseMessage(Document doc, Element content,
Exception ex, String responseType, String originator)
{
Element response = doc.createElement(GSXML.RESPONSE_ELEM);
// from = "FedoraGS3"
Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
attribute.setValue(originator);
response.setAttributeNode(attribute);
// type = "describe" or "process" - whatever's given in requestType:
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(responseType);
response.setAttributeNode(attribute);
if(content != null)
response.appendChild(content);
// we'll create an error element for RemoteExceptions (web service problems)
// and UnsupportedEncodingExceptions and
if(ex != null) {
Element error = doc.createElement(GSXML.ERROR_ELEM);
error.appendChild(doc.createTextNode(ex.getMessage()));
// now append the error to the element (after
// the content element whatever that was)
response.appendChild(error);
}
Element message = doc.createElement(GSXML.MESSAGE_ELEM);
message.appendChild(response);
doc.appendChild(message);
return message;
}
/** @return a <serviceList> Element as defined by GS3: containing all the
* services (denoted by <service> elements) that are supported by FedoraGS3.
* At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
* DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
* ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
* @param doc - the Document object which should me used to create the
* <serviceList> element */
protected Element createServiceList(Document doc)
{
Element serviceList = doc.createElement(
GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
for(int i = 0; i < serviceNames.length; i++) {
// create the
Element service = doc.createElement(GSXML.SERVICE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(serviceNames[i]);
service.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
else if(serviceNames[i].contains("Query")) // search services
attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
else
attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
service.setAttributeNode(attribute);
// add the service element to the serviceList element
//
serviceList.appendChild(service);
}
return serviceList;
}
/** @return a GS3 response message for a describe services request:
* indicating the list of services supported by the Fedora-Greenstone
* interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
* DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
* ClassifierBrowseMetadataRetrieve - as indicated by member variable
* serviceNames. */
public String getServiceList()
{
Document doc = builder.newDocument();
Element serviceList = createServiceList(doc);
// make the body of the responseMessage:
//
Element responseMsg = createResponseMessage(doc, serviceList, null,
GSXML.REQUEST_TYPE_DESCRIBE, "");
try {
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message listing the collections and
* collection-specific metadata stored in the Fedora-Greenstone repository. */
public String getCollectionList()
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null; // any RemoteException
// create the element
Element collectionList = doc.createElement(
GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
try{
String[] collectionNames = this.getCollectionNames(
this.getCollections()); // this line could throw RemoteException
for(int i = 0; i < collectionNames.length; i++) {
// create the element
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionNames[i]);
collection.setAttributeNode(attribute);
// append the element as child of
collectionList.appendChild(collection);
//if(collection.hasAttribute(GSXML.NAME_ATT))
//LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
}
} catch(RemoteException e) { // if this happens, perhaps it's because it
// can't find Greenstone collections in fedora repository?
ex = new FedoraGS3RunException(e);
ex.setSpecifics(
"greenstone collections in fedora repository");
}
// make the body of the responseMessage:
//
Element responseMsg = createResponseMessage(doc, collectionList, ex,
GSXML.REQUEST_TYPE_DESCRIBE, "");
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message for a collection in the
* Fedora-Greenstone repository.
* @param collectionName - the name of the collection that is to be described.
* It will be converted to a fedora collection pid, which is of the form
* "greenstone:<collectionName>-collection". */
public String describeCollection(String collectionName)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionName);
collection.setAttributeNode(attribute);
//
//"some display name"
Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
attribute = doc.createAttribute(GSXML.LANG_ATT);
attribute.setValue(this.lang);
displayItem.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
displayItem.setAttributeNode(attribute);
try{
Text textNode = doc.createTextNode(
this.getCollectionTitle(getCollectionPID(collectionName)));
displayItem.appendChild(textNode);
} catch(Exception e) {
// can't find Greenstone collections in fedora repository or problem
// getting their titles from their metadata datastream?
ex = new FedoraGS3RunException(e);
ex.setSpecifics("greenstone collections or their metadata"
+ "in the fedora repository");
}
// now append the displayItem element as child of the collection element
collection.appendChild(displayItem);
// get the and add it into the collection description.
// Services for all collections in the FedoraGS3 repository are the
// same, offering a ClassifierBrowse to browse titles by starting letter
// and DocRetrieve services: Content, Metadata and Structure.
Element serviceList = createServiceList(doc);
collection.appendChild(serviceList);
Element responseMsg = createResponseMessage(doc, collection, ex,
GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message for the services of a collection
* in the Fedora-Greenstone repository. So far, these services are the same for
* all fedora collections: they are the services given in member variable
* serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
* ClassifierBrowseMetadataRetrieve.
* All collections in this Digital Library (Fedora Repository) share the
* same services, so this method returns the same services as getServiceList();
* @param collectionName - the name of the collection whose services are to
* be described. It will be converted to a fedora collection pid, which is of
* the form "greenstone:<collectionName>-collection". */
public String describeCollectionServices(String collectionName)
{
Document doc = builder.newDocument();
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionName);
collection.setAttributeNode(attribute);
Element serviceList = createServiceList(doc);
collection.appendChild(serviceList);
Element responseMsg = createResponseMessage(doc, collection, null,
GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** All collections in this Digital Library (Fedora Repository) share
* the same services, so this method returns the same as
* describeCollectionService(collName, serviceName).
* @return a GS3 describe response message for the requested service
* of the given collection. DocumentContent/Metadata/StructureRetrieve
* return nothing special except their names; browse (and any query)
* return more complex XML responses.
* @param serviceName - the name of the service in the collection which is to
* be described.*/
public String describeService(String serviceName)
{
// For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
// we return:
//
//
// But for browse (and any query) service, we return the data necessary
// for displaying it
Document doc = this.builder.newDocument();
Element service = doc.createElement(GSXML.SERVICE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(serviceName);
service.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
if(serviceName.toLowerCase().endsWith("retrieve")) {
attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
}
else if(serviceName.toLowerCase().contains("browse")) {
attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
// we need name and description elements
Element displayItem
= createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Browse pre-defined classification hierarchies");
service.appendChild(displayItem);
// now need a classifierList
Element classifierList = doc.createElement(
GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
int classifierNum = 1;
// append a
// for each letter of the alphabet:
Element classifier = createClassifierElement(doc, "TitleByLetter",
classifierNum++, "titles by letter", "Browse titles by letter");
// now add this to the
classifierList.appendChild(classifier);
// ANY MORE CLASSIFIERS? ADD THEM HERE
service.appendChild(classifierList);
} // ELSE check for whether it is a query service
else if(serviceName.toLowerCase().contains("query")) {
attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
if(serviceName.equals("TextQuery")) {
describeTextQueryService(service);
} else if(serviceName.equals("FieldQuery")) {
describeFieldQueryService(service);
}
}
// don't forget to add the type attribute to the service!
service.setAttributeNode(attribute);
String from = serviceName;
Element responseMsg = createResponseMessage(doc, service, null,
GSXML.REQUEST_TYPE_DESCRIBE, from);
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Appends children to the parameter service Element that make the
* final service Element into a describe response XML for FedoraGS3's
* TextQuery service.
* @param service is the service Element that is being filled out. */
protected void describeTextQueryService(Element service) {
Document doc = service.getOwnerDocument();
// we need name, submit (button) and description elements
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Text Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Title and full-text search service");
service.appendChild(displayItem);
//create the
Element paramList = doc.createElement(
GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
// we ignore granularity to search at: it will always be
// document and section level
// we ignore casefolding: always on (that is, case is irrelevant)
// we ignore document display order: always ranked
// Constructing the following:
//
// Maximum hits to return
//
Element param = doc.createElement(GSXML.PARAM_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(MAXDOCS);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue("100");
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Maximum hits to return");
param.appendChild(displayItem);
paramList.appendChild(param);
// Constructing the following:
//
// Query string
//
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(QUERY);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_STRING);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Query string");
param.appendChild(displayItem);
paramList.appendChild(param);
service.appendChild(paramList);
}
/** Appends children to the parameter service Element that make the
* final service Element into a describe response XML for FedoraGS3's
* FieldQuery service.
* @param service is the service Element that is being filled out. */
protected void describeFieldQueryService(Element service) {
Document doc = service.getOwnerDocument();
// we need name, submit (button) and description elements
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Form Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Simple fielded search");
service.appendChild(displayItem);
//create the
Element paramList = doc.createElement(
GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
// we ignore granularity to search at: it will always be
// document and section level
// we ignore casefolding: always on (that is, case is irrelevant)
// we ignore document display order: always ranked
// Constructing the following:
//
// Maximum hits to return
//
Element param = doc.createElement(GSXML.PARAM_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(MAXDOCS);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue("100");
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Maximum hits to return");
param.appendChild(displayItem);
paramList.appendChild(param);
// Constructing the following:
//
//
//
//
// Word or phrase
//
//
//
// in field
//
//
//
//
//
//
//
//
Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(SIMPLEFIELD_ATT);
rowOfParams.setAttributeNode(attribute);
// we want the row of controls to occur multiple times
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_MULTI);
rowOfParams.setAttributeNode(attribute);
attribute = doc.createAttribute(OCCURS_ATT);
attribute.setValue("4"); // we want this row to occur 4 times
rowOfParams.setAttributeNode(attribute);
//
// Word or phrase
//
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(QUERY);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_STRING);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Word or phrase");
param.appendChild(displayItem);
rowOfParams.appendChild(param);
//
// in field
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(FIELDNAME_ATT);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue(ALL_FIELDS);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"in field");
param.appendChild(displayItem);
String[] searchFieldNames
= {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
String[] searchFieldDisplay = {"all titles and full-text",
"document titles only", "document and section titles",
"full-text only"};
// for each fieldName create an option element and insert
// the option into the enum_multi drop-down param:
//
for(int i = 0; i < searchFieldNames.length; i++) {
Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(searchFieldNames[i]);
option.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
searchFieldDisplay[i]);
option.appendChild(displayItem);
param.appendChild(option); // add option to the drop-down box
}
rowOfParams.appendChild(param);
paramList.appendChild(rowOfParams);
service.appendChild(paramList);
}
/**
* @return a GS3 describe response message for the requested service
* of the given collection. DocumentContent/Metadata/StructureRetrieve
* return nothing special except their names; browse (and any query)
* return more complex XML responses.
* All collections in this Digital Library (Fedora Repository) share
* the same services, so this method returns the same as
* describeService(serviceName).
* @param collectionName - the name of the collection whose service is to
* be described. It will be converted to a fedora collection pid, which is of
* the form "greenstone:<collectionName>-collection".
* @param serviceName - the name of the service in the collection which is to
* be described. */
public String describeCollectionService(String collectionName,
String serviceName) {
// collectionName can be ignored, because all services are FedoraGS3
// services and are not unique to any particular (greenstone) collection.
return describeService(serviceName);
}
/** This method performs the implemented browse operation: allowing the
* user to browse the titles of documents in the given collection by letter
* and returning the results.
* @param collectionName is the name of the collection whose documents
* starting with the given letter will be returned.
* @param classifierIDs are the ids of the classifiers on which to browse. In
* this case, the classifier indicates whether we browse titles by letter, or
* browse (documents) by collection; and it is of the form <CL(letter)>.
* @param structures - the requested browse substructure. Can be any combination
* of ancestors, parent, siblings, children, descendants.
* @param infos - the requested structural info. Can be numSiblings,
* siblingPosition, numChildren.
* @return a GS3 ClassifierBrowse response message which lists all
* the documents that start with the letter indicated by parameter classifier.
*/
public String browse(String collectionName, String[] classifierIDs,
String[] structures, String[] infos)
{
// Construct one string from the structures and structural info arrays
String structure = "";
String info = "";
for(int i = 0; i < structures.length; i++) {
structure = structure + structures[i] + "|";
}
for(int i = 0; i < infos.length; i++) {
info = info + infos[i] + "|";
}
Document doc = builder.newDocument();
FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
//
Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
for(int i = 0; i < classifierIDs.length; i++) {
if(classifierIDs[i].startsWith("CL1")) { // browse by titles
browseTitlesByLetterClassifier(doc, classifierNodeList,
collectionName, classifierIDs[i],
structure, info);
}
}
Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
try {
return FedoraCommons.elementToString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** CL1 browsing classifier: browsing titles by starting letter.
* The browsing structure is retrieved.
* @param doc - the document object that will contain the CL1 browsing structure.
* @param classifierNodeList - the classifiers will be added to this nodeList.
* @param collectionName - name of the collection through which we are browsing CL1.
* @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
* a letter.
* @param structure - the requested browse substructure. Can be any combination of
* ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
* @param info - the requested structural info. Can be numSiblings, siblingPosition,
* numChildren.
* @return the classifierNodeList with the CL1 classifier browse structure.
*/
public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
String collectionName, String classifierID,
String structure, String info)
{
FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
if(structure.indexOf("entire") != -1) {
structure = structure + "ancestors|descendants";
}
// Structure of ancestors and children only at this stage
int firstLevel = classifierID.indexOf('.');
int secondLevel = classifierID.lastIndexOf('.');
//
Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
// requested classifier node
Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierID);
classNode.setAttributeNode(attribute);
Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
typeAttribute.setValue(GSXML.VLIST);
classNode.setAttributeNode(typeAttribute);
if(firstLevel == -1) { // CL1 - toplevel node
Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
classifierNodeList.appendChild(classNode);
classNode.appendChild(nodeStructure);
nodeStructure.appendChild(root);
if(structure.indexOf("descendants") != -1) {
getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
} else if(structure.indexOf("children") != -1) {
getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
}
// nothing to be done for siblings
}
else if(firstLevel == secondLevel) { // CL1.x, where x is a number
if(structure.indexOf("parent") != -1
|| structure.indexOf("ancestors") != -1
|| structure.indexOf("siblings") != -1) {
String toplevelID = classifierID.substring(0, firstLevel);
Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(toplevelID);
toplevelNode.setAttributeNode(attribute);
typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
typeAttribute.setValue(GSXML.VLIST);
toplevelNode.setAttributeNode(typeAttribute);
Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
classifierNodeList.appendChild(toplevelNode);
toplevelNode.appendChild(nodeStructure);
nodeStructure.appendChild(node);
if(structure.indexOf("siblings") != -1) { // get the children of the parents too
getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
// pass the requested node (classNode) so that it is attached in the correct
// location among its siblings, and to ensure that it is not recreated.
// getTitlesByLetterStructure() will append classNode to node
} else {
node.appendChild(classNode);
}
} else {
Element node = (Element)classNode.cloneNode(true);
classifierNodeList.appendChild(node);
node.appendChild(nodeStructure);
nodeStructure.appendChild(classNode);
}
int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
char ch = (char)(num - 1 + 'A');
if(structure.indexOf("descendants") != -1) {
getTitlesForLetter(ch, collectionName, classNode, "descendants");
} else if(structure.indexOf("children") != -1) {
getTitlesForLetter(ch, collectionName, classNode, "children");
}
}
else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
}
return classifierNodeList;
}
/** Creates a (CL1) subclassifier element for the docs whose titles start with
* the given letter.
* @param ch - the starting letter of the document titles to retrieve.
* @param collectionName - name of the collection through which we are browsing CL1.
* @param classifierNode - the docNodes found will be appended to this node.
* @param depthStructure - can be descendants or children. Specifies what to retrieve:
* gets descendants of any documents found, otherwise gets just the children.
* @return the given classifierNode which will have the child (or descendant) documents
* appended to it.
*/
public Element getTitlesForLetter(char ch, String collectionName,
Element classifierNode, String depthStructure)
{
Document doc = classifierNode.getOwnerDocument();
FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
// Retrieve the document structure for each subClassifierID:
// all the documents that begin with its letter.
String letter = String.valueOf(ch);
try {
String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
if(docPIDs.length == 0) {
return classifierNode; // skip letters that don't have any kids
}
for(int i = 0; i < docPIDs.length; i++) {
// work out the document's fedora PID and section ID
String sectionID = getSectionIDFromDocID(docPIDs[i]);
String docPID = getDocPIDFromDocID(docPIDs[i]);
// get the required section, along with children or descendants
Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
//
Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
// fills in the subtree of the rootNode in our nodeStructure element
createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
classifierNode.appendChild(docRootNode);
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
}
return classifierNode;
}
/** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
* starting letter of the alphabet. X is each letter of the alphabet for which there
* are matching document titles.
* @param collectionName - name of the collection through which we are browsing CL1.
* @param classifierNode - the docNodes found will be appended to this node.
* @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
* the IDs for the subclassifiers (CL.x).
* @param getDescendants - if true, get descendants of any documents found, otherwise
* get just the children.
* @param wantedSibling - the node (already created) whose siblings are requested. We
* need to make sure not to recreate this node when creating its sibling nodes.
* @return the given classifierNode, with the CL.x subclassifiers for the letters of
* the alphabet that are represented in the document titles.
*/
public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
String classifierID, boolean getDescendants,
Element wantedSibling)
{
String ID = "";
if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
}
Document doc = classifierNode.getOwnerDocument();
FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
// We're going to loop to the end of the alphabet
int count = 1;
for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
// Retrieve the document structure for each subClassifierID:
// all the documents that begin with its letter.
String letter = String.valueOf(ch);
try {
String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
if(docPIDs.length == 0) {
continue; // skip letters that don't have any kids
}
Element subClassifier = null;
if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
// already have the requested node, don't recreate it
subClassifier = wantedSibling;
} else {
//
subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
typeAttribute.setValue(GSXML.VLIST);
subClassifier.setAttributeNode(typeAttribute);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierID+"."+count);
subClassifier.setAttributeNode(attribute);
}
classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
if(getDescendants) { // get the documents
// append the for the docPIDs found as children
// of subclassifier
for(int i = 0; i < docPIDs.length; i++) {
// work out the document's fedora PID and section ID
String sectionID = getSectionIDFromDocID(docPIDs[i]);
String docPID = getDocPIDFromDocID(docPIDs[i]);
// get the required section, along with children or descendants
Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
//
Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
// fills in the subtree of the rootNode in our nodeStructure element
createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
subClassifier.appendChild(rootNode);
}
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("requested portion of TOC file or "
+ "trouble with fielded search ");
}
}
return classifierNode;
}
/** This method performs something equivalent to a greenstone3
* ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
* @param classNodeIDs array of classifierNode IDs for which the metadata
* needs to be returned.
* @param metafields are the classifier metadata fields that are to be returned.
* At present this method ignores them/pretends the requested metafields are
* "all" and always returns the Title meta for the requested classifier nodes
* (because that is all the metadata this Fedora classifier has at present).
* @return a GS3 ClassifierBrowseMetadataRetrieve response message which
* lists the metadata for all the classifierNodes passed as parameter.*/
public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
{
Document doc = this.builder.newDocument();
//
Element classifierNodeList = doc.createElement(
GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
// create s
// for all letters of the alphabet
for(int i = 0; i < classNodeIDs.length; i++) {
// strip ID of everything before the first '.' (i.e. remove "CL#.")
int index = classNodeIDs[i].indexOf('.');
String subClassifierNumber = classNodeIDs[i].substring(index+1);
index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
if(index != -1) {
subClassifierNumber = subClassifierNumber.substring(0, index);
}
int subClassifierNum = Integer.parseInt(subClassifierNumber);
String classifierName = "";
if(subClassifierNum == 0) { // no document titles started with a letter
classifierName = "A-Z";
} else {
char letter = (char)('A' + subClassifierNum - 1); // A = 1
classifierName = String.valueOf(letter);
}
//
Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classNodeIDs[i]);
classifierNode.setAttributeNode(attribute);
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
// at least one metadata element: that of the title of this
// classifierNode:
// letter
Element metadata = this.createNameValuePairElement(doc,
GSXML.METADATA_ELEM, "Title", classifierName);
// now connect up everything
metadataList.appendChild(metadata);
classifierNode.appendChild(metadataList);
classifierNodeList.appendChild(classifierNode);
}
Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
GSXML.REQUEST_TYPE_PROCESS, //collName +
"ClassifierBrowseMetadataRetrieve");
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a newly created element of the following format:
* <classifier content="somecontent" name="CL+num">
* <displayItem name="name">someClassifierName</displayItem>
* <displayItem name="description">Browse by classifier name</displayItem>
* </classifier>
* @param doc - the document used to create the element
* @param content - value of the content attribute
* @param classifierNum - the number suffixed to the CL, together forming
* the classifier Node's ID
* @param displayNameVal is the bodytext of a named displayItem element
* @param displayDescrVal is the bodytext of a displayItem element with
* description */
protected Element createClassifierElement(Document doc, String content,
int classifierNum, String displayNameVal, String displayDescrVal)
{
final String CL = "CL";
Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
// content attribute
Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
att.setValue(content);
classifier.setAttributeNode(att);
// name attribute
att = doc.createAttribute(GSXML.NAME_ATT);
att.setValue(CL + classifierNum);
classifier.setAttributeNode(att);
// now create the displayItem children for classifier:
// #letter
// Browse titles starting with #letter
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
classifier.appendChild(displayItem);
displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
classifier.appendChild(displayItem);
return classifier;
}
/** @return a newly created element of the following format:
* <elementName name="somename">"some display value"</elementName>
* @param doc - the document used to create the element
* @param elementName - the tag name
* @param name - value of attribute name
* @param value - the body text of the element */
protected Element createNameValuePairElement(Document doc, String elementName,
String name, String value) {
// "some display value"
Element element = doc.createElement(elementName);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(name);
element.setAttributeNode(attribute);
element.appendChild(doc.createTextNode(value));
return element;
}
/**
* @param collection is the collection to search in
* @param query is the query term to search for. It won't specify the
* indexed field to search in, which will mean that GSearch will
* search all default indexed fields.
* @param maxDocs is the maximum number of results to return (which
* at present we consider equivalent to FedoraGSearch's hitpageSize).
*/
public String[] textQuery(String collection, String query,
int maxDocs)
throws Exception
{
// no need to search there is no query or query is empty spaces
if(query.trim().equals(""))
return new String[]{};
// QUERY value won't specify indexed field to search, Fedora
// Gsearch will take that as meaning all default indexed fields.
// Params to search() method below: string of fielded query terms;
// hitpageStart, hitpageEnd, snippetsMax (leave that 0)
query = query + " " + "PID" + COLON + GREENSTONE;
String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
// now we have the XML returned by FedoraGSearch, get the pids
// of the documents returned (if any)
String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
collection, searchResult);
return pids;
}
/**
* This method performs a fieldquery, searching for x number of phrases
* in each of the 4 indexed fields.
* @param collection is the collection to search in
* @param nameValParamsMap is a Map of several(key, value) entries,
* 4 of which we're concerned with here:
* - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
* - the values are a comma separated list of terms (phrases or single
* words) to search that field in. There may be more than 1 or
* there may be none (in which case there may be N empty values or
* spaces separated by commas).
* @param maxDocs is the maximum number of results to return (which
* at present we consider equivalent to FedoraGSearch's hitpageSize).
* */
public String[] fieldQuery(String collection, Map nameValParamsMap,
int maxDocs)
throws Exception
{
// we're going to maintain a list of UNIQUE pids that were returned
// in search results. Hence we use Set:
java.util.Set set = new java.util.HashSet();
// (1) Use Fedora's search to search document titles, if they were
// specified:
String[] docTitlepids = {};
String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
if(docTitleTerms != null) { // no doc titles may have been specified
String[] phrases = docTitleTerms.split(COMMA);
// search the individual phrases first:
for(int i = 0; i < phrases.length; i++) {
if(phrases.equals("") || phrases.equals(" "))
continue; //skip when there are no terms
docTitlepids = this.searchDocumentTitles(
collection, phrases[i], false);
for(int j = 0; j < docTitlepids.length; j++)
set.add(docTitlepids[j]);
}
}
// (2) use FedoraGSearch to search doc AND section titles, and
// fulltext (in case these were specified in nameValParamsMap):
String searchResult = this.fedoraGSearch.search(
nameValParamsMap, 1, maxDocs);
String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
collection, searchResult);
for(int i = 0; i < pids.length; i++)
set.add(pids[i]);
pids = null;
pids = new String[set.size()];
set.toArray(pids); // unique pids
return pids;
}
/** @return a String representing Greenstone3 XML for a query process
* response returning the results for the query denoted by parameter
* nameValParamsMap.
* @param nameValParamsMap is a Hashmap of name and value pairs for all the
* query field data values. The names match the field names that
* describeCollectionService() would have returned for the query service.
* @param collection is the name of the collection
* @param service is the name of the query service
* This method is only ever called when any of the services in the digital
* library described themselves as type=query. Therefore any digital
* libraries that have no query services, can just return emtpy message
* strings (or even "") since this method will never be called on them
* anyway. */
public String query(String collection, String service,
Map nameValParamsMap)
{
FedoraGS3RunException ex = null;
// (1) obtain the requested number of maximum result documents
int maxDocs = 100;
try{
maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
} catch(NumberFormatException e) {
maxDocs = 100;
}
String pids[] = {};
// (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
if(service.endsWith("TextQuery")) {
try {
// get the Query field:
String query = (String)nameValParamsMap.get(QUERY);
pids = textQuery(collection, query, maxDocs);
}
catch(Exception e) {
LOG.error("Error in TextQuery processing: " + e);
ex = new FedoraGS3RunException(
"When trying to use FedoraGenericSearch for a TextQuery", e);
}
} else { // (3) FieldQuery
// first get the comma-separated lists
String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
// both are comma separated lists, so split both on 'comma'
String[] fieldNames = listOfFieldNames.split(COMMA);
String[] searchTerms = listOfSearchTerms.split(COMMA);
// In the fieldNames and searchTerms lists of nameValParamsMap,
// each searchTerm element was matched with its correspondingly
// indexed fieldName.
// A new map is going to reorganise this, by putting all terms
// for a particular fieldName together in a comma separated list
// and associating that with the fieldName. I.e. (key, value) ->
// (fieldName, comma-separated list of all terms in that field)
Map map = new HashMap();
for(int i = 0; i < searchTerms.length; i++) {
// there may be fewer searchTerms than fieldNames (since some
// fieldNames may have been left empty), so loop on searchTerms
if(map.containsKey(fieldNames[i])) { // fieldName is already
// in the list, so append comma with new value
String termsList = (String)map.get(fieldNames[i]);
termsList = termsList + COMMA + searchTerms[i];
map.put(fieldNames[i], termsList);
} else { // this is the first time this fieldName occurred
// just put the fieldName with searchTerm as-is
map.put(fieldNames[i], searchTerms[i]);
}
}
try {
// For fieldquery, we search on all the fieldNames specified
// - if DOC_TITLES is specified then we use Fedora's search
// - for all other fieldNames specified, we use FedoraGSearch
pids = fieldQuery(collection, map, maxDocs);
}
catch(Exception e) {
LOG.error("Error in FieldQuery processing: " + e);
ex = new FedoraGS3RunException(
"When trying to use FedoraGenericSearch for a FieldQuery", e);
}
}
// Build Greenstone XML Query response message for from
// the pids (which should be document identifiers)
Document doc = builder.newDocument();
//
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(NUM_DOCS_MATCHED);
metadata.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.VALUE_ATT);
attribute.setValue(Integer.toString(pids.length));
metadata.setAttributeNode(attribute);
metadataList.appendChild(metadata);
//
//
// ...
// ...
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
// for each
for(int i = 0; i < pids.length; i++) {
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(pids[i]);
docNode.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
attribute.setValue("hierarchy");
docNode.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
attribute.setValue("root");
docNode.setAttributeNode(attribute);
docNodeList.appendChild(docNode);
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, service);
try{
return FedoraCommons.elementToString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
// FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
/** Given a URL that represents a fedoraPID, will look up the object.
* If it exists, it will return the contents of the DC:Title of its datastream.
* If it doesn't exist, it will return the URL as-is.
* @param URL: the URL that (after modification) represents a fedoraPID to look up.
* @param collection: the name of collection in which to search for the URL
* representing a fedoraPID.
* @return the string (representing a fedoraPID) stored in the DC:Title of the
* URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
* then the parameter URL is returned.
*/
public String getPIDforURL(String url, String collection) {
FedoraGS3RunException ex = null; // any RemoteException
// (1) convert url to the fedorapid
// / -> _ and : -> -
String fedoraPID = url.replaceAll("/", "_");
fedoraPID = fedoraPID.replaceAll(":", "-");
// prefix "greenstone-http:-" to the fedoraPID
fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
//LOG.error("### fedoraPID: " + fedoraPID);
// (2) Look up the datastream for the fedorapid
String dcTitle = "";
try {
dcTitle = getDCTitle(fedoraPID);
} catch(Exception e) {
LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
}
//String dc = this.getDC(fedoraPID);
//LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
// (3) if fedorapid exists, extract the dc:title content.
// if it doesn't exist, return url
if(dcTitle.equals("")) {
return url;
} else {
// It represents a fedoraPID of its own, so prefix fedora namespace and return it.
//return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
return dcTitle+"-1";
}
}
public static void main(String args[]) {
try{
// testing default constructor
//FedoraGS3Connection con = new FedoraGS3Connection();
// testing constructor that takes properties file to show initial
// fedora server values
java.io.File propertyFilename
= new java.io.File("fedoraGS3.properties");
FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
// DESCRIBE: serviceList, collectionList
System.out.println("serviceList:\n" + con.getServiceList());
System.out.println("collectionList:\n" + con.getCollectionList());
String[] colPIDs = con.getCollections();
String[] collectionNames = con.getCollectionNames(con.getCollections());
for(int i = 0; i < collectionNames.length; i++) {
System.out.println("Describing collections:\n");
System.out.println(con.describeCollection(collectionNames[i]));
System.out.println("Describing collection services:\n"
+ con.describeCollectionServices(collectionNames[i]));
}
String[] serviceNames = con.getServiceNames();
for(int i = 0; i < serviceNames.length; i++) {
System.out.println("Describing " + serviceNames[i] + ":\n"
+ con.describeCollectionService("demo", serviceNames[i]));
}
// TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
// along with EX of the top-level document:
System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
String[] docIDs = con.getCollectionDocs(colPIDs[0]);
System.out.println("\nGET CONTENT:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println(con.getContent(docIDs[i]));
}
System.out.println("\nGET META:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
}
String[] getTitlesFor = {
"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
};
// first let's display the regular meta for top-level docs and
// their sections
for(int i = 0; i < getTitlesFor.length; i++) {
System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
}
System.out.println("\nTitles are:");
System.out.println(con.getTitleMetadata(getTitlesFor));
System.out.println("\nGET STRUCTURE:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println("Descendents and numChildren:\n"
+ con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
System.out.println("Parent and numSiblings:\n"
+ con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
}
// TEST ERROR CASES:
System.out.println("\nTESTING ERROR CASES");
System.out.println(con.getContent("greenstone:demo-pinky"));
String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
"greenstone:demo-pinky" };
System.out.println(con.getContent(errorCases));
System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
System.out.println("\nCLASSIFIER BROWSE");
System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
new String[]{"CL1"}, new String[] {""}, new String[] {""}));
System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
String[] classNodeIDs = new String[26];
for(int i = 0; i < classNodeIDs.length; i++) {
int subClassifierNum = i + 1;
classNodeIDs[i] = "CL1." + subClassifierNum;
}
System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
classNodeIDs, new String[]{"all"}));
System.out.println("Testing query services");
System.out.println("TEXT QUERY:");
Map formControlValsMap = new HashMap();
formControlValsMap.put(MAXDOCS, "100");
formControlValsMap.put(QUERY, "snails");
String searchResponse
= con.query("gs2mgdemo", "TextQuery", formControlValsMap);
System.out.println(searchResponse);
System.out.println("FIELD QUERY:");
formControlValsMap.clear();
formControlValsMap.put(MAXDOCS, "100");
formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
formControlValsMap.put(FIELDNAME_ATT,
"allFields,docTitles,allFields,allFields");
searchResponse
= con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
System.out.println(searchResponse);
System.exit(0);
}catch(Exception e) {
JOptionPane.showMessageDialog(
null, e, "Error", JOptionPane.ERROR_MESSAGE);
//System.err.println("ERROR: " + e);
e.printStackTrace();
}
}
}