/**
*#########################################################################
* FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
* of the Greenstone digital library suite from the New Zealand Digital
* Library Project at the * University of Waikato, New Zealand.
*
* Copyright (C) 2008 New Zealand Digital Library Project
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*########################################################################
*/
package org.greenstone.fedora.services;
import java.io.StringReader;
import org.apache.log4j.Logger;
import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
import org.greenstone.gsdl3.util.GSXML;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Attr;
import org.w3c.dom.Text;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import java.io.File;
import java.util.HashMap;
import java.util.Properties;
import java.util.Map;
import javax.swing.JOptionPane;
import org.xml.sax.SAXException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import javax.net.ssl.SSLHandshakeException;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.rmi.RemoteException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
/**
* Class that extends FedoraConnection in order to be able to use
* Fedora's web services to retrieve the specific datastreams of
* Greenstone documents stored in Fedora's repository. This class
* provides methods that convert those datastreams into Greenstone3
* XML response messages which are returned.
* @author ak19
*/
public class FedoraGS3Connection
extends FedoraConnection implements FedoraToGS3Interface,
FedoraToGS3Interface.Constants
{
/** The logging instance for this class */
private static final Logger LOG = Logger.getLogger(
FedoraGS3Connection.class.getName());
/** Default name of Fedora index */
private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
/** Complete list of services that are supported our FedoraGS3 would
* support if everything goes well. If a connection to FedoraGSearch
* cannot be established, the query services will no longer be
* available. The actual services supported are given by member
* variable serviceNames. */
protected static final String[] SERVICES = {
"DocumentContentRetrieve", "DocumentMetadataRetrieve",
"DocumentStructureRetrieve",
"TextQuery", "FieldQuery",
"ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
};
/** List of services actually supported by our FedoraGS3 repository
* after construction. If FedoraGenericSearch can't be connected to,
* then query services will not be offered */
protected String[] serviceNames;
/** constant CHILDREN indicates that a DocumentStructureRetrieve is to
* return only the child nodes of a section, not any further descendants */
protected static final int CHILDREN = 0;
/** constant DESCENDANTS indicates that a DocumentStructureRetrieve is to
* return all descendants of a section */
protected static final int DESCENDANTS = 1;
/** The object used to connect to FedoraGenericSearch, which is used
* for doing full-text searching */
protected GSearchConnection fedoraGSearch;
/** The url for the wsdl file of FedoraGSearch's web services
* by default this will be the Fedora server's base URL
* concatenated to "gsearch/services/FgsOperations?wsdl" */
protected String gSearchWSDLURL;
/** The last part of the gSearchWSDL URL. The first part is
* the same as the fedora server's base url. */
protected String gSearchWSDLSuffix;
/** The name of the index that FedoraGSearch will index the GS3
* documents into. If no name is specified in the properties file,
* this will default to FedoraIndex. */
protected String gSearchIndexName;
/** 5 argument constructor is the same as that of superclass FedoraConnection:
* @param protocol can be either http or https
* @param host is the host where the fedora server is listening
* @param port is the port where the fedora server is listening
* @param fedoraServerUsername is the username for administrative
* authentication required to access the fedora server.
* @param fedoraServerPassword is the password for administrative
* authentication required to access the fedora server. If no password was set
* when installing Fedora, leave the field "".
* Instantiates a FedoraGS3Connection object which connects to Fedora's
* web services through stub classes and tries to connect to FedoraGSearch's
* web services through the default WSDL location for it
* ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
* call setGSearchWSDLURL(url) after the constructor instead.
*/
public FedoraGS3Connection(String protocol, String host, int port,
String fedoraServerUsername, String fedoraServerPassword)
throws ParserConfigurationException, MalformedURLException,
SSLHandshakeException, RemoteException, AuthenticationFailedException,
NotAFedoraServerException, ConnectException, Exception
{
super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** No-argument constructor which is the same as that of superclass
* FedoraConnection: it displays a small dialog requesting input for the
* host, port, administrative password and username of the fedora server.
* If no password was set on the fedora repository when installing it,
* the user can leave the password field blank. */
public FedoraGS3Connection()
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super();
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** Single-argument constructor which is the same as that of superclass
* FedoraConnection: it takes the name of the properties file where
* connection initialisation values may already be provided and then
* displays a small dialog requesting input for the host, port,
* administrative password and username of the fedora server showing
* the values in the properties file as default. If the necessary
* initialisation are not present in the file, the corresponding fields
* in the dialog will be blank.
* If no password was set on the fedora repository when installing it,
* the user can leave the password field blank. */
public FedoraGS3Connection(File propertiesFilename)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super(propertiesFilename);
// super() will call setInitialisationProperties(properties)
// And that will try to instantiate the GSearchConnection.
}
/** The superclass constructor calls this method passing any preset
* properties loaded from a propertiesFile. This method is overridden
* here in order to instantiate the gSearchConnection based on the
* - gSearchWSDLSuffix that will be appended to the fedora base url.
* (If one was not provided in the properties file, gSearchWSDLURL defaults
* to something of the form
* "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
* which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
* "gsearch/services/FgsOperations?wsdl".
* - name of the index into which the GS3 documents have been indexed
* and which FedoraGenericSearch should use to perform searches. If none is
* given in the properties file, then the index name defaults to "FedoraIndex".
* @param properties is the Properties Map loaded from a properties file
* (if there was any) which specifies such things as host and port of the
* FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
* At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
* to whatever the final value of this.gSearchWSDLURL' suffix is, and
* "gsearch.indexName" will be set to to whatever the final value of
* this.gSearchIndexName is.
*/
protected void setInitialisationProperties(Properties properties)
throws ParserConfigurationException, MalformedURLException,
CancelledException, ConnectException, RemoteException,
SSLHandshakeException, Exception
{
super.setInitialisationProperties(properties);
// gsearchWSDL URL suffix, if not specified, defaults to
// "fedoragsearch/services/FgsOperations?wsdl" which is
// concatenated to the baseURL of fedora to give the gsearchWSDLURL.
this.gSearchWSDLSuffix = properties.getProperty(
"gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
// Set the property to whatever this.gSearchWSDLURL is now,
// so that it will be written out to the properties file again
properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
// Similarly for the name of the index FedoraGenericSearch should use
// when performing searches for GS3 docs stored in Fedora's repository.
this.gSearchIndexName = properties.getProperty(
"gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
properties.setProperty("gsearch.indexName", this.gSearchIndexName);
// Create a connection to FedoraGSearch's web services:
initSearchFunctionality();
}
/** Overridden init method to work with the 5 argument constructor, so that we can
* bypass using setInitialisationProperties() which works with a Properties map.
*/
protected void init(String protocol, String host, String port,
String fedoraServerUsername, String fedoraServerPassword)
throws ParserConfigurationException, MalformedURLException,
AuthenticationFailedException, RemoteException, Exception
{
super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
initSearchFunctionality();
}
/** Init method that instantiates a GSearchConnection object used
* to work with the separate FedoraGSearch web services.
* The url of the WSDL for FedoraGSearch's web services is worked out
* from the baseURL of the Fedora server.
*/
protected void initSearchFunctionality()
{
try {
this.fedoraGSearch = null;
this.fedoraGSearch = new GSearchConnection(
gSearchWSDLURL, gSearchIndexName);
this.serviceNames = SERVICES;
} catch(Exception e){
LOG.error("Cannot connect to FedoraGSearch's web services at "
+ gSearchWSDLURL + "\nQuery services will not be available.");
// If an exception occurs, something has gone wrong when
// trying to connect to FedoraGSearch's web services. This
// means, we can't offer query services, as that's provided
// by FedoraGSearch
serviceNames = null;
int countOfNonQueryServices = 0;
for(int i = 0; i < SERVICES.length; i++) {
// do not count query services
if(!SERVICES[i].toLowerCase().contains("query")) {
countOfNonQueryServices++;
}
}
// Services now supported are everything except Query services
serviceNames = new String[countOfNonQueryServices];
int j = 0;
for(int i = 0; i < SERVICES.length; i++) {
if(!SERVICES[i].toLowerCase().contains("query")) {
serviceNames[j] = SERVICES[i];
j++; // valid serviceName, so increment serviceName counter
}
}
}
}
/** @return the gSearchWSDLURL, the url of the WSDL for the
* FedoraGSearch web services */
public String getGSearchWSDLURL() { return gSearchWSDLURL; }
/** Sets the member variable gSearchWSDLURL that specify the location of
* the WSDL file of FedoraGSearch's web services. Then it attempts
* to instantiate a connection to those web services.
* @param url is the new url of the GSearch web services WSDL file */
public void setGSearchWSDLURL(String url) {
this.gSearchWSDLURL = url;
initSearchFunctionality();
}
/** @return the gSearchIndexName, the name of the index Fedora Generic
* Search will search in (where GS3 docs have been indexed into). */
public String getGSearchIndexName() { return gSearchIndexName; }
/** Sets the member variable gSearchIndexName that specifies the name
* of the index containing indexed GS3 documents. Then it attempts
* to instantiate a connection to the Fedora GSearch web services using
* this changed value for indexName.
* @param indexName is the new name of the index containing indexed GS3
* docs that GSearch should search in. */
public void setGSearchIndexName(String indexName) {
this.gSearchIndexName = indexName;
initSearchFunctionality();
}
/** @return the array of the services actually supported by FedoraGS3 */
protected String[] getServiceNames() { return this.serviceNames;}
/**
* For finding out if the sectionNumber is given as part of the docID.
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return true if the document identifier docID contains a section-
* number, and false if it consists solely of the docPID.
* That is, true is returned if
*
* */
protected boolean containsSectionNumber(String docID) {
// if there are two hyphens in the docID, then there are sections
// (and the section number is appended at end of docID)
// docID = "greenstone:colName--"
return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
}
/** This method will extract the docPID from docID and return it.
* (If a sectionNumber is suffixed to the docID, the docPID which is
* the prefix is returned; otherwise the docID is the docPID and is
* returned)
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return only the docPID portion of the docID.
*/
protected String getDocPIDFromDocID(String docID) {
if(containsSectionNumber(docID))
return docID.substring(0, docID.lastIndexOf(HYPHEN));
// else (if there's no sectionNumber), docID is the docPID
return docID;
}
/** This method will return the section Number, if there's any
* suffixed to the docID. Otherwise it will return the empty string
* @param docID is the String that contains the docPID and may also
* contain the section number.
* @return only the sectionID portion of the docID - if any, else "".
*/
protected String getSectionIDFromDocID(String docID) {
if(containsSectionNumber(docID))
return docID.substring(
docID.lastIndexOf(HYPHEN)+1, docID.length());
return "";
}
/** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
* response message that gives the metadata for each collection identified
* @param collIDs is an array of fedora pids identifying collections in the
* fedora repository
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX metadata for all the requested collections */
public String getCollectionMetadata(String[] collIDs) {
return getMetadata(collIDs);
}
/** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
* response message is returned containing the metadata for each document.
* @param docIDs is an array of document identifiers (docID can either be
* <pid>s items (documents) in the fedora repository, or
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX, DC, DLS metadata for all the requested documents */
public String getDocumentMetadata(String[] docIDs) {
return getMetadata(docIDs);
}
/** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
* response message that gives the metadata for the collection identified
* @param collID is a fedora pid identifying a collection in its repository
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX metadata for the requested collection */
public String getCollectionMetadata(String collID) {
return getMetadata(new String[] {collID});
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing the metadata for the document.
* @param docID is a document identifier (docID can either be a <pid>
* of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* EX, DC, DLS metadata for the requested document */
public String getDocumentMetadata(String docID) {
return getMetadata(new String[] {docID});
}
/** @return a greenstone DocumentMetadataRetrieve response for the
* documents or collections indicated by the docIDsOrCollIDs.
* @param docIDsOrCollIDs is an array of identifiers which may be either the
* fedora pids for collections, or otherwise may be a document identifier.
* In the last case, the document ID may consist of either
* "documentPID-sectionNumber" or may just be just fedora documentPID */
public String getMetadata(String[] docIDsOrCollIDs)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDsOrCollIDs.length; i++) {
// create the containing the metadata
// for each document docID
Element docNode = getMetadata(doc, docIDsOrCollIDs[i]);
docNodeList.appendChild(docNode);
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Method that takes a new DOM document, as well as an identifier of either
* a collection or document (which may be a fedora pid for the collection
* or document, or may be the documentPid-sectionNumber for a document) and
* returns a documentNode element for it:
* <documentNode><metadataList>
* <metadata name="">value</metadata>
* ...
* </metadataList></documentNode>
* @return documentNode containing the metadata for the collection or
* document given by parameter ID
* @param id denotes a collection pid, a document pid or a docID of the
* form "documentpid-sectionNumber" */
protected Element getMetadata(Document doc, String id)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
// We're going to create the documentNode nested inside the following
// documentNodeList:
//
//
// value
//
// ...
//
//
// - the docNode on which a structure
// retrieve is being performed
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(id);
docNode.setAttributeNode(attribute);
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
String ex = "";
String dc = "";
String dls = "";
if(id.endsWith(_COLLECTION)) { // docID refers to a collection
// Obtain the "EX" datastream (extracted metadata) for the collection
ex = this.getEX(id);
}
else { // docID refers to a document
// work out the document's fedora PID and section ID, and then
// obtain the EX (extracted metadata) and DC datastreams for the doc
// Note that EX/DC for pid="greenstone:-docPID-1"
// is the same as for pid="greenstone:-docPID"
// That is, refers to the toplevel document docPID
// If requested for top-level document, there may also be DLS meta
String sectionID = getSectionIDFromDocID(id);
String docPID = getDocPIDFromDocID(id);
if(sectionID.equals("") || sectionID.equals("1")) {
// metadata of toplevel document is requested
ex = this.getEX(docPID); // slightly faster than doing
//getSectionEXMetadata(docID, "1")
dc = this.getDC(docPID);
dls = this.getDLS(docPID);
}
else {
ex = getSectionEXMetadata(docPID, sectionID);
dc = getSectionDCMetadata(docPID, sectionID);
}
}
// Adding in metadata sets in alphabetical order
// DC metadata for a top-level document is different from EX, DLS:
// only the element's namespace prefix is "dc", the rest of a tagname
// is unknown.
if(!dc.equals("")) {
addMetadataWithNamespacedTagNames(doc, metadataList,
dc, DC);
}
// Check if we were supposed to process dls and dc metadata
// as well. We only ever do this for top-level documents,
// in which case, dls and dc will be non-empty strings
if(!dls.equals("")) {
addMetadataWithFixedTagName(doc, metadataList, dls, DLS);
}
// we definitely have an EX metadatastream for each
// collection object, top-level document object,
// and document section item
addMetadataWithFixedTagName(doc, metadataList, ex, EX);
// now the metadataList has been built up
docNode.appendChild(metadataList);
return docNode; // return containing the metadata
}
/** This method retrieves all the metadata elements in the metaDataStream
* parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
* metadataSetNS is the namespace of each tag, and creates a new element of
* the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
* each. Each of these are then appended to the metadataList parameter.
* @param doc is the Document object using which the new metadata Elements
* are to be constructed
* @param metadataList is the <metadataList> Element to which the new
* metadata Elements are to be appended as children.
* @param metaDatastream the metadata datastream in string form (e.g. the
* Dublin Core metadata stored in the Fedora repository).
* @param metadataSet is the constant datastream identifier, e.g. "DC".
* At present this method only applies to the DC metadata as that's the only
* one where each tagname is different except for the constant dc: namespace.
*/
protected void addMetadataWithNamespacedTagNames(Document doc,
Element metadataList, String metaDatastream, String metadataSet)
throws SAXException, IOException
{
Document src = builder.parse(
new InputSource(new StringReader(metaDatastream)));
// The following doesn't work for some reason: to retrieve all elements
// whose namespace prefix starts with "dc", we pass "*" for localName
//NodeList dcMetaTags = src.getElementsByTagNameNS(DC.toLowerCase(), "*");
// Longer way: get the children of the root document
NodeList children = src.getDocumentElement().getChildNodes();
for(int i = 0; i < children.getLength(); i++) {
String nodeName = children.item(i).getNodeName();
// check that the nodename starts with the "dc" namespace,
// which simultaneously ensures that the node's an element:
if(nodeName.startsWith(DC.toLowerCase())) {
// need to have a period for Greenstone instead of Fedora's colon
nodeName = nodeName.replace(COLON, PERIOD);
Element metatag = (Element)children.item(i);
String value = FedoraCommons.getValue(metatag);
// value
// we're going to put use this in our metadata element as
// value
// create metadata of (name, value) pairs in target DOM (doc)
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(nodeName);
metadata.setAttributeNode(attribute);
Text content = doc.createTextNode(value);
metadata.appendChild(content);
metadataList.appendChild(metadata);
}
}
}
/** This method retrieves all the metadata elements in the metaDataStream
* of the form <"namespace:"metadata name="metadataName">value</metadata>
* where "namespace" is the namespace prefix of each tag, and metadataName
* is the name of the metadata (like author, title). For each element
* it creates a corresponding new element of the form
* <metadata name="namespace:metadataName">value</metadata>. Each of these
* are then appended to the metadataList parameter.
* @param doc is the Document object using which the new metadata Elements
* are to be constructed
* @param metadataList is the <metadataList> Element to which the new
* metadata Elements are to be appended as children.
* @param metaDatastream the metadata datastream in string form (e.g. the
* EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
* repository).
* @param metadataSet is the constant datastream identifier,
* e.g. "DLS" or "EX".
* At present this method applies to the DLS and EX metadata as they have
* constant tagnames throughout.
*/
protected void addMetadataWithFixedTagName(Document doc,
Element metadataList, String metaDatastream, String metadataSet)
throws SAXException, IOException
{
// Namespace prefix can be "ex:" or "dls:"
String namespacePrefix = "";
if(!metadataSet.equals(EX)) {
// need to have a period for Greenstone instead of Fedora's colon
namespacePrefix = metadataSet.toLowerCase() + PERIOD;
}
Document src = builder.parse(
new InputSource(new StringReader(metaDatastream)));
NodeList metaTags = src.getElementsByTagName(
metadataSet.toLowerCase()+COLON+METADATA);
// Looking for tagnames: or
for(int i = 0; i < metaTags.getLength(); i++) {
Element metatag = (Element)metaTags.item(i);
// extract the metadata of (name, value) pairs from src DOM
// look for value
String name = metatag.hasAttribute(NAME) ?
metatag.getAttribute(NAME) : "";
// sometimes, there are several metadata for the same name, in this
// case, look for a qualifier and append its value to the name to
// distinguish it uniquely:
if(metatag.hasAttribute(QUALIFIER)) {
name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
}
String value = FedoraCommons.getValue(metatag);
// create metadata of (name, value) pairs in target DOM (doc)
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(namespacePrefix + name);
// prefix with namespace, if any
metadata.setAttributeNode(attribute);
Text content = doc.createTextNode(value);
metadata.appendChild(content);
metadataList.appendChild(metadata);
}
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing ONLY the Title metadata for the document.
* @param docID is a document identifier (docID can either be a <pid>
* of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* Title metadata for the requested document */
public String getTitleMetadata(String docID) {
return getTitleMetadata(new String[] { docID });
}
/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
* response message containing ONLY the Title metadata for the documents.
* @param docIDs is a list of document identifiers (where docID can either be
* a <pid> of an item (document) in the fedora repository, or it can be
* "<pid>-sectionNumber".
* @return a GS3 DocumentMetadataRetrieve response message containing the
* Title metadata for all the requested documents */
public String getTitleMetadata(String[] docIDs) {
// Must create message of the following form:
//
// sometitle
//
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDs.length; i++) {
Element docNode = getTitleMetadata(doc, docIDs[i]);
docNodeList.appendChild(docNode);
}
}catch(Exception e) {
ex = new FedoraGS3RunException(e);
//ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
ex.setSpecifics("EX metadata datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Method that takes a new DOM document, as well as an identifier of either
* a document or document section and returns a documentNode element containing
* the title metadata for it:
* <documentNode nodeID="docID"><metadataList>
* <metadata name="Title">sometitle</metadata>
* </metadataList></documentNode>
* @return documentNode containing the metadata for the collection or
* document given by parameter ID
* @param docID denotes the id of a document or a document section, so id
* is either a document-pid or it's of the form documentpid-sectionNumber */
protected Element getTitleMetadata(Document doc, String docID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
// Returns a docNode element of the following form:
//
// sometitle
//
//
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(docID);
docNode.setAttributeNode(attribute);
//
Element metaList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
//
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
// if we connect it all up (append children), we can immediately add
// the name attribute into the metadata element:
metaList.appendChild(metadata);
docNode.appendChild(metaList);
metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
String title = "";
String sectionID = getSectionIDFromDocID(docID);
String docPID = getDocPIDFromDocID(docID);
// check if title of toplevel document is requested
if(sectionID.equals(""))
title = this.getDocTitle(docPID);
else { // title of document section
title = this.getSectionTitle(docPID, sectionID);
}
metadata.appendChild(doc.createTextNode(title));
return docNode;
}
/** @return the documentStructure of the document or section given by docID.
* The structure is returned in the XML format of a Greenstone3
* DocumentStructureRetrieve response message. This method returns the entire
* subSection of the docID (that is, all descendants included).
* @param docID the identifier for the document whose structure is required.
* This is of the format "greenstone:<collectionName>-<docPID>"
* OR "greenstone:<collectionName>-<docPID>-<sectioNumber>"
* where "greenstone:<collectionName>-<docPID>-1" is the same as
* "greenstone:<collectionName>-<docPID>" and will return the
* same response */
public String getDocumentStructure(String docID) {
return getStructure(new String[]{docID}, DESCENDANTS);
}
/** @return a view of the structure of the document or section given by docID
* which contains only the section and its direct children. This structure is
* returned in the XML format of a Greenstone3 DocumentStructureRetrieve
* response message.
* @param docID the identifier for the document whose structure is required.
* This is of the format "greenstone:<collectionName>-<docPID>"
* OR "greenstone:<collectionName>-<docPID>-<sectioNumber>"
* where "greenstone:<collectionName>-<docPID>-1" is the same as
* "greenstone:<collectionName>-<docPID>" and will return the
* same response */
public String getChildren(String docID) {
return getStructure(new String[]{docID}, CHILDREN);
}
/** @return the documentStructure of the documents or sections given by docIDs.
* The structure is returned in the XML format of a Greenstone3
* DocumentStructureRetrieve response message. This method returns the entire
* subSection of each docID (that is, all descendants included).
* @param docIDs is an array of identifiers for the documents whose structures
* are required.
* This is of the format "greenstone:<collectionName>-<docPID>"
* OR "greenstone:<collectionName>-<docPID>-<sectioNumber>"
* where "greenstone:<collectionName>-<docPID>-1" is the same as
* "greenstone:<collectionName>-<docPID>" and will return the
* same response */
public String getDocumentStructure(String[] docIDs) {
return getStructure(docIDs, DESCENDANTS);
}
/** @return the documentStructure of the documents or sections given by docIDs
* but only the sections and their children (not any further descendants).
* The structure is returned in the XML format of a Greenstone3
* DocumentStructureRetrieve response message.
* @param docIDs the identifiers for the documents whose structures are
* required. The docids are of the format "greenstone:<collectionName>-<docPID>"
* OR "greenstone:<collectionName>-<docPID>-<sectioNumber>"
* where "greenstone:<collectionName>-<docPID>-1" is the same as
* "greenstone:<collectionName>-<docPID>" and will return the
* same response */
public String getChildren(String[] docIDs) {
return getStructure(docIDs, CHILDREN);
}
/**
* Returns a greenstone3 DocumentStructureRetrieve XML response message
* containing the document structures for the given docIDs.
* Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
* greenstone formatted XML is returned. The requested section of the table
* of contents (TOC) for a document is converted into the greenstone3 xml
* format that is returned upon DocumentStructureRetrieve requests.
* @param docIDs the documentIDs for which the section's structure is returned;
* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
* @param levels - either CHILDREN or DESCENDANTS.
* CHILDREN returns only the first-level descendants (children) of the
* requested document sections indicated by docIDs.
* DESCENDANTS returns all descendants of all the document-sections denoted by
* docIDs.
* @return a greenstone3 DocumentStructureRetrieve XML response message in
* String format with the structure of the docIDs requested.
*/
protected String getStructure(String[] docIDs, int levels)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
// append the for the docIDs
// to the docNodeList
getStructureElement(docNodeList, docIDs, levels);
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("(requested portion of) TOC datastream");
}
// insert our into a GS3 response message
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Given a <documentNodeList> portion of a greenstone3
* DocumentStructureRetrieve XML response message, this method will populate
* it with the <documentNodes> that represent the structure of the given docIDs.
* @param docNodeList is a <documentNodeList> to which <documentNodes> of
* the doc structures are appended.
* @param docIDs the documentIDs for which the section's structure is returned;
* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
* @param levels - either CHILDREN or DESCENDANTS.
* CHILDREN returns only the first-level descendants (children) of the
* requested document sections indicated by docIDs.
* DESCENDANTS returns all descendants of all the document-sections denoted by
* docIDs.
*/
protected void getStructureElement(Element docNodeList,
String[] docIDs, int levels)
throws RemoteException, UnsupportedEncodingException, SAXException,
IOException
{
// process each docID
for(int i = 0; i < docIDs.length; i++) {
// work out the document's fedora PID and section ID
String sectionID = getSectionIDFromDocID(docIDs[i]);
String docPID = getDocPIDFromDocID(docIDs[i]);
// get the required section, along with children or descendants
Element srcDocElement = null;
if(levels == CHILDREN) // get the requested section with its children
srcDocElement = this.getChildrenOfSectionXML(docPID, sectionID);
else // levels == DESCENDANTS, get the section with all its descendants
srcDocElement = this.getSubsectionXML(docPID, sectionID);
// copy-and-convert that structure into a structure format for GS3
Element docNode = getStructure(docNodeList.getOwnerDocument(),
docIDs[i], docPID, srcDocElement);
// add it to our list of documentNodes
docNodeList.appendChild(docNode);
}
}
/**
* Takes the portion of the XML document outlining the structure of the
* document (section)--in the format this is stored in Fedora--and returns
* Greenstone 3 DOM XML format for outlining document structure.
* @return a <documentNode> element that contains a greenstone3
* DocumentStructureRetrieve XML corresponding to the parameter Element section
* (which is in fedora XML), for the document indicated by docID.
* @param requestingDocID is the identifier of the document for which the
* structure was requested. It's this document's children or descendants that
* will be returned. Note that this is not always the same as (clear from)
* parameter docID.
* @param docID is the documentID for which the section's structure is
* returned where docID = "docPID-sectionNumber".
* @param section - the fedora section XML that is being mirrored in
* greenstone3 format.
*/
protected Element getStructure(Document doc, String requestingDocID,
String docID, Element section)
{
// we want to mirror the section's DOM (given in fedora XML) in
// greenstone3's XML for a DocumentStructureRetrieve response.
// - the docNode on which a structure retrieve
// is being performed
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
docNode.setAttributeNode(attribute);
//
Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
//
Element rootNode = createDocNodeFromSubsection(doc, section, docID);
// fills in the subtree of the rootNode in our nodeStructure element
createDocStructure(doc, section, rootNode, docID);
//where section represents the root section
nodeStructure.appendChild(rootNode);
docNode.appendChild(nodeStructure);
return docNode;
}
/** Recursive method that creates a documentStructure mirroring parameter
* section, starting from parameter parent down to all descendants
* @param section is the XML <Section> in the fedora repository's TOC
* for the docPID whose substructure is to be mirrored
* @param parent is the XML documentNode in the greenstone repository whose
* descendants created by this method will correspond to the descendants of
* parameter section.
* @param doc is the document containing the parent;
* @param docPID is the prefix of all nodeIDs in the parent's structure
*/
protected void createDocStructure(
Document doc, Element section, Element parent, String docPID)
{
// get the section's children (if any)
NodeList children = section.getChildNodes();
for(int i = 0; i < children.getLength(); i++) {
Node n = children.item(i);
if(n.getNodeName().equals(SECTION_ELEMENT)) {
//then we know it's an element AND that its tagname is "Section"
Element subsection = (Element)n;
Element child = createDocNodeFromSubsection(doc, subsection, docPID);
parent.appendChild(child);
// recursion call on newly found child-element and subsection
createDocStructure(doc, subsection, child, docPID);
}
}
}
/** Given a particular subsection element, this method creates a
* Greenstone3 DocumentNode element that mirrors it.
* @param doc is the document that will contain the created DocumentNode
* @param docID is the prefix of all nodeIDs in the parent's structure
* @param subSection is the XML <Section> in the fedora repository's
* TOC for the docPID which will be mirrored in the greenstone XML
* documentNode that will be returned.
* @return a greenstone <documentNode> that represents the fedora TOC's
* <Section> element passed as parameter subSection. */
protected Element createDocNodeFromSubsection(
Document doc, Element subSection, String docID)
{
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
docNode.setAttributeNode(docType);
Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
String sectionID = subSection.hasAttribute(ID) ?
subSection.getAttribute(ID) : "";
nodeID.setValue(docID + HYPHEN + sectionID);
docNode.setAttributeNode(nodeID);
Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
if(sectionID.equals("1")) { // root case
nodeType.setValue(GSXML.NODE_TYPE_ROOT);
// reset the attribute without the section number
docNode.setAttribute(GSXML.NODE_ID_ATT, docID);
}
else if(subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0)
// this section has further children, so it's an internal node
nodeType.setValue(GSXML.NODE_TYPE_INTERNAL);
else if(subSection.hasAttribute(TYPE))
nodeType.setValue(GSXML.NODE_TYPE_INTERNAL);
else // leaf
nodeType.setValue(GSXML.NODE_TYPE_LEAF);
docNode.setAttributeNode(nodeType);
return docNode;
}
/** Given an identifier that is either a docPID or a concatenation of
* docPID+sectionID, this method works out the fedora assigned docPID and
* sectionID and then calls getContentBody(docPID, sectionID) with those.
* @param docID is expected to be of the form
* "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
* "greenstone:<collectionName>-<docPID>"
* If it is "greenstone:<collectionName>-<docPID>", then the content for
* "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
* is returned! */
public String getContent(String docID) {
return this.getContent(new String[]{docID});
}
/** Given an identifier that is a concatenation of docID+sectionID, this
* method works out the fedora assigned docPID and sectionID and then calls
* getContentBody(docPID, sectionID) with those.
* @param docIDs is an array of document identifiers of the form
* "greenstone:<collectionName>-<docPID>-<sectionNumber>"
* If it is "greenstone:<collectionName>-<docPID>", then the content for
* "greenstone:<collectionName>-Section1" is returned! */
public String getContent(String[] docIDs) {
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
try{
for(int i = 0; i < docIDs.length; i++) {
// get the sectionID and docPID from the docID
String sectionID = this.removePrefix(
getSectionIDFromDocID(docIDs[i]), SECTION);
String docPID = getDocPIDFromDocID(docIDs[i]);
if(sectionID.equals("")) // if no section is specified, get
sectionID = "1"; // get the content for Section id="1"
// Get the contents for the requested section of document docPID
String sectionContent = this.getContentBody(docPID, sectionID);
// set the nodeID attribute
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
nodeId.setValue(docIDs[i]); // just set the docID which will contain
// the docPID (and sectionID if already present)
docNode.setAttributeNode(nodeId);
// set the text content to what was retrieved
Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
Text textNode = doc.createTextNode(sectionContent.trim());
nodeContent.appendChild(textNode);
docNode.appendChild(nodeContent);
//add the documentNode to the docNodeList
docNodeList.appendChild(docNode);
}
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("requested doc Section datastream");
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
} catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Gets the contents of a textNode from a section.
* @return the text content of a section.
* @param docPID the pid of the document from which a section's text is to
* be retrieved.
* @param sectionID is the section identifier of the document denoted by
* docPID whose text is to be returned.
*/
protected String getContentBody(String docPID, String sectionID)
throws RemoteException, UnsupportedEncodingException,
SAXException, IOException
{
String section = this.getSection(docPID, sectionID);
// the content is nested inside a element,
// we extract it from there:
InputSource source = new InputSource(new StringReader(section));
Document doc = builder.parse(source);
// The document Element is the we want.
// Get its text contents:
section = FedoraCommons.getValue(doc.getDocumentElement());
// we are going to remove all occurrences of "_httpdocimg_/"
// that precede associated filenames, because that's a GS3
// defined macro for resolving relative urls. It won't help
// with documents stored in fedora.
section = section.replaceAll(GS3FilePathMacro+"/", "");
return section;
}
/** Here we create the greenstone's response message element:
* <message≶<response><content></response></message>
* @return a greenstone response-message element.
* @param doc - the Document object which should me used to create the
* <message> and <response> elements
* @param content - the element that is to be nested inside <response>
* @param ex - any exception that occurred when trying to create
* the content parameter
* @param responseType - the value for the type attribute of <response>,
* such as "describe", "retrieve", "browse", "query"...
* @param originator - indiates the collectionName or service (like
* DocumentContentRetrieve) from where this response message originates
*/
protected Element createResponseMessage(Document doc, Element content,
Exception ex, String responseType, String originator)
{
Element response = doc.createElement(GSXML.RESPONSE_ELEM);
// from = "FedoraGS3"
Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
String from = originator.equals("") ? FEDORA_GS3
: FEDORA_GS3+"/"+originator;
attribute.setValue(from);
response.setAttributeNode(attribute);
// type = "describe" or "process" - whatever's given in requestType:
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(responseType);
response.setAttributeNode(attribute);
if(content != null)
response.appendChild(content);
// we'll create an error element for RemoteExceptions (web service problems)
// and UnsupportedEncodingExceptions and
if(ex != null) {
Element error = doc.createElement(GSXML.ERROR_ELEM);
error.appendChild(doc.createTextNode(ex.getMessage()));
// now append the error to the element (after
// the content element whatever that was)
response.appendChild(error);
}
Element message = doc.createElement(GSXML.MESSAGE_ELEM);
message.appendChild(response);
doc.appendChild(message);
return message;
}
/** @return a <serviceList> Element as defined by GS3: containing all the
* services (denoted by <service> elements) that are supported by FedoraGS3.
* At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
* DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
* ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
* @param doc - the Document object which should me used to create the
* <serviceList> element */
protected Element createServiceList(Document doc)
{
Element serviceList = doc.createElement(
GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
for(int i = 0; i < serviceNames.length; i++) {
// create the
Element service = doc.createElement(GSXML.SERVICE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(serviceNames[i]);
service.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
else if(serviceNames[i].contains("Query")) // search services
attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
else
attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
service.setAttributeNode(attribute);
// add the service element to the serviceList element
//
serviceList.appendChild(service);
}
return serviceList;
}
/** @return a GS3 response message for a describe services request:
* indicating the list of services supported by the Fedora-Greenstone
* interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
* DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
* ClassifierBrowseMetadataRetrieve - as indicated by member variable
* serviceNames. */
public String getServiceList()
{
Document doc = builder.newDocument();
Element serviceList = createServiceList(doc);
// make the body of the responseMessage:
//
Element responseMsg = createResponseMessage(doc, serviceList, null,
GSXML.REQUEST_TYPE_DESCRIBE, "");
try {
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message listing the collections and
* collection-specific metadata stored in the Fedora-Greenstone repository. */
public String getCollectionList()
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null; // any RemoteException
// create the element
Element collectionList = doc.createElement(
GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
try{
String[] collectionNames = this.getCollectionNames(
this.getCollections()); // this line could throw RemoteException
for(int i = 0; i < collectionNames.length; i++) {
// create the element
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionNames[i]);
collection.setAttributeNode(attribute);
// append the element as child of
collectionList.appendChild(collection);
//if(collection.hasAttribute(GSXML.NAME_ATT))
//LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
}
} catch(RemoteException e) { // if this happens, perhaps it's because it
// can't find Greenstone collections in fedora repository?
ex = new FedoraGS3RunException(e);
ex.setSpecifics(
"greenstone collections in fedora repository");
}
// make the body of the responseMessage:
//
Element responseMsg = createResponseMessage(doc, collectionList, ex,
GSXML.REQUEST_TYPE_DESCRIBE, "");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message for a collection in the
* Fedora-Greenstone repository.
* @param collectionName - the name of the collection that is to be described.
* It will be converted to a fedora collection pid, which is of the form
* "greenstone:<collectionName>-collection". */
public String describeCollection(String collectionName)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null;
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionName);
collection.setAttributeNode(attribute);
//
//"some display name"
Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
attribute = doc.createAttribute(GSXML.LANG_ATT);
attribute.setValue(this.lang);
displayItem.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
displayItem.setAttributeNode(attribute);
try{
Text textNode = doc.createTextNode(
this.getCollectionTitle(getCollectionPID(collectionName)));
displayItem.appendChild(textNode);
} catch(Exception e) {
// can't find Greenstone collections in fedora repository or problem
// getting their titles from their metadata datastream?
ex = new FedoraGS3RunException(e);
ex.setSpecifics("greenstone collections or their metadata"
+ "in the fedora repository");
}
// now append the displayItem element as child of the collection element
collection.appendChild(displayItem);
// get the and add it into the collection description.
// Services for all collections in the FedoraGS3 repository are the
// same, offering a ClassifierBrowse to browse titles by starting letter
// and DocRetrieve services: Content, Metadata and Structure.
Element serviceList = createServiceList(doc);
collection.appendChild(serviceList);
Element responseMsg = createResponseMessage(doc, collection, ex,
GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a GS3 describe response message for the services of a collection
* in the Fedora-Greenstone repository. So far, these services are the same for
* all fedora collections: they are the services given in member variable
* serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
* ClassifierBrowseMetadataRetrieve.
* All collections in this Digital Library (Fedora Repository) share the
* same services, so this method returns the same services as getServiceList();
* @param collectionName - the name of the collection whose services are to
* be described. It will be converted to a fedora collection pid, which is of
* the form "greenstone:<collectionName>-collection". */
public String describeCollectionServices(String collectionName)
{
Document doc = builder.newDocument();
Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(collectionName);
collection.setAttributeNode(attribute);
Element serviceList = createServiceList(doc);
collection.appendChild(serviceList);
Element responseMsg = createResponseMessage(doc, collection, null,
GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** All collections in this Digital Library (Fedora Repository) share
* the same services, so this method returns the same as
* describeCollectionService(collName, serviceName).
* @return a GS3 describe response message for the requested service
* of the given collection. DocumentContent/Metadata/StructureRetrieve
* return nothing special except their names; browse (and any query)
* return more complex XML responses.
* @param serviceName - the name of the service in the collection which is to
* be described.*/
public String describeService(String serviceName)
{
// For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
// we return:
//
//
// But for browse (and any query) service, we return the data necessary
// for displaying it
Document doc = this.builder.newDocument();
Element service = doc.createElement(GSXML.SERVICE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(serviceName);
service.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
if(serviceName.toLowerCase().endsWith("retrieve")) {
attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
}
else if(serviceName.toLowerCase().contains("browse")) {
attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
// we need name and description elements
Element displayItem
= createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Browse pre-defined classification hierarchies");
service.appendChild(displayItem);
// now need a classifierList
Element classifierList = doc.createElement(
GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
int classifierNum = 1;
// append a
// for each letter of the alphabet:
Element classifier = createClassifierElement(doc, "TitleByLetter",
classifierNum++, "titles by letter", "Browse titles by letter");
// now add this to the
classifierList.appendChild(classifier);
// ANY MORE CLASSIFIERS? ADD THEM HERE
service.appendChild(classifierList);
} // ELSE check for whether it is a query service
else if(serviceName.toLowerCase().contains("query")) {
attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
if(serviceName.equals("TextQuery")) {
describeTextQueryService(service);
} else if(serviceName.equals("FieldQuery")) {
describeFieldQueryService(service);
}
}
// don't forget to add the type attribute to the service!
service.setAttributeNode(attribute);
String from = serviceName;
Element responseMsg = createResponseMessage(doc, service, null,
GSXML.REQUEST_TYPE_DESCRIBE, from);
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** Appends children to the parameter service Element that make the
* final service Element into a describe response XML for FedoraGS3's
* TextQuery service.
* @param service is the service Element that is being filled out. */
protected void describeTextQueryService(Element service) {
Document doc = service.getOwnerDocument();
// we need name, submit (button) and description elements
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Text Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Title and full-text search service");
service.appendChild(displayItem);
//create the
Element paramList = doc.createElement(
GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
// we ignore granularity to search at: it will always be
// document and section level
// we ignore casefolding: always on (that is, case is irrelevant)
// we ignore document display order: always ranked
// Constructing the following:
//
// Maximum hits to return
//
Element param = doc.createElement(GSXML.PARAM_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(MAXDOCS);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue("100");
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Maximum hits to return");
param.appendChild(displayItem);
paramList.appendChild(param);
// Constructing the following:
//
// Query string
//
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(QUERY);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_STRING);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Query string");
param.appendChild(displayItem);
paramList.appendChild(param);
service.appendChild(paramList);
}
/** Appends children to the parameter service Element that make the
* final service Element into a describe response XML for FedoraGS3's
* FieldQuery service.
* @param service is the service Element that is being filled out. */
protected void describeFieldQueryService(Element service) {
Document doc = service.getOwnerDocument();
// we need name, submit (button) and description elements
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Form Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
service.appendChild(displayItem);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
"Simple fielded search");
service.appendChild(displayItem);
//create the
Element paramList = doc.createElement(
GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
// we ignore granularity to search at: it will always be
// document and section level
// we ignore casefolding: always on (that is, case is irrelevant)
// we ignore document display order: always ranked
// Constructing the following:
//
// Maximum hits to return
//
Element param = doc.createElement(GSXML.PARAM_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(MAXDOCS);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue("100");
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Maximum hits to return");
param.appendChild(displayItem);
paramList.appendChild(param);
// Constructing the following:
//
//
//
//
// Word or phrase
//
//
//
// in field
//
//
//
//
//
//
//
//
Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(SIMPLEFIELD_ATT);
rowOfParams.setAttributeNode(attribute);
// we want the row of controls to occur multiple times
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_MULTI);
rowOfParams.setAttributeNode(attribute);
attribute = doc.createAttribute(OCCURS_ATT);
attribute.setValue("4"); // we want this row to occur 4 times
rowOfParams.setAttributeNode(attribute);
//
// Word or phrase
//
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(QUERY);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_STRING);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"Word or phrase");
param.appendChild(displayItem);
rowOfParams.appendChild(param);
//
// in field
param = doc.createElement(GSXML.PARAM_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(FIELDNAME_ATT);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.TYPE_ATT);
attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
param.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
attribute.setValue(ALL_FIELDS);
param.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
"in field");
param.appendChild(displayItem);
String[] searchFieldNames
= {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
String[] searchFieldDisplay = {"all titles and full-text",
"document titles only", "document and section titles",
"full-text only"};
// for each fieldName create an option element and insert
// the option into the enum_multi drop-down param:
//
for(int i = 0; i < searchFieldNames.length; i++) {
Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(searchFieldNames[i]);
option.setAttributeNode(attribute);
displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
searchFieldDisplay[i]);
option.appendChild(displayItem);
param.appendChild(option); // add option to the drop-down box
}
rowOfParams.appendChild(param);
paramList.appendChild(rowOfParams);
service.appendChild(paramList);
}
/**
* @return a GS3 describe response message for the requested service
* of the given collection. DocumentContent/Metadata/StructureRetrieve
* return nothing special except their names; browse (and any query)
* return more complex XML responses.
* All collections in this Digital Library (Fedora Repository) share
* the same services, so this method returns the same as
* describeService(serviceName).
* @param collectionName - the name of the collection whose service is to
* be described. It will be converted to a fedora collection pid, which is of
* the form "greenstone:<collectionName>-collection".
* @param serviceName - the name of the service in the collection which is to
* be described. */
public String describeCollectionService(String collectionName,
String serviceName) {
// collectionName can be ignored, because all services are FedoraGS3
// services and are not unique to any particular (greenstone) collection.
return describeService(serviceName);
}
/** This method performs the implemented browse operation: allowing the
* user to browse the titles of documents in the given collection by letter
* and returning the results.
* @param classifierIDs are the ids of the classifiers on which to browse. In
* this case, the classifier indicates whether we browse titles by letter, or
* browse (documents) by collection; and it is of the form <CL(letter)>.
* @param collectionName is the name of the collection whose documents
* starting with the given letter will be returned.
* @return a GS3 DocumentStructureRetrieve response message which lists all
* the documents that start with the letter indicated by parameter classifier.
*/
public String browse(String collectionName, String[] classifierIDs)
{
Document doc = builder.newDocument();
FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
//
Element classifierNodeList = doc.createElement(
GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
for(int i = 0; i < classifierIDs.length; i++) {
//
Element requestedClassifierNode = doc.createElement(
GSXML.CLASS_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierIDs[i]);
requestedClassifierNode.setAttributeNode(attribute);
classifierNodeList.appendChild(requestedClassifierNode);
//
Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
requestedClassifierNode.appendChild(nodeStructure);
// And one more time, the top level classifierNode:
Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierIDs[i]);
classifierNode.setAttributeNode(attribute);
nodeStructure.appendChild(classifierNode);
// Work out what we're browsing base on the classifierID's number
// classifier CL1 = browse titles by letter;
// classifier CL2 = browse by collection;
//remove the CL prefix and obtain the number from the id:
int classifierNum = Integer.parseInt(classifierIDs[i].replace("CL", ""));
switch(classifierNum) {
case 1:
// we're going to loop to the end of the alphabet
int num = 1;
for(char ch = 'A'; ch <= 'Z'; ch++, num++) {
// Retrieve the document structure for each subClassifierID:
// all the documents that begin with its letter.
String letter = String.valueOf(ch);
try {
String[] docPIDs = this.browseTitlesByLetter(
collectionName, letter);
if(docPIDs.length == 0) {
continue; // skip letters that don't have any kids
}
//
Element subClassifier = doc.createElement(
GSXML.CLASS_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierIDs[i]+"."+num);
subClassifier.setAttributeNode(attribute);
classifierNode.appendChild(subClassifier);
// append the for the docPIDs found as children
// of subclassifier
getStructureElement(subClassifier, docPIDs, DESCENDANTS);
//CHILDREN); // for testing
} catch(Exception e) {
ex = new FedoraGS3RunException(e);
ex.setSpecifics("requested portion of TOC file or "
+ "trouble with fielded search ");
}
}
// No titles in this collection that start with a letter at all
if(!classifierNode.hasChildNodes()) {
// which we will equate with A-Z
Element subClassifier = doc.createElement(
GSXML.CLASS_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classifierIDs[i]+"."+0); //
subClassifier.setAttributeNode(attribute);
classifierNode.appendChild(subClassifier);
}
break;
case 2:
break;
default:
ex = new FedoraGS3RunException( // cause is regular exception
new Exception("Unknown classifier ID: " + classifierIDs[i]));
}
}
Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
GSXML.REQUEST_TYPE_DESCRIBE, collectionName+"/ClassifierBrowse");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** This method performs something equivalent to a greenstone3
* ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
* @param classNodeIDs array of classifierNode IDs of for which the metadata
* needs to be returned.
* @return a GS3 ClassifierBrowseMetadataRetrieve response message which
* lists the metadata for all the classifierNodes passed as parameter.*/
public String browseMetadataRetrieve(String[] classNodeIDs)
{
Document doc = this.builder.newDocument();
//
Element classifierNodeList = doc.createElement(
GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
// create s
// for all letters of alphabet
for(int i = 0; i < classNodeIDs.length; i++) {
// strip ID of everything before the first '.' (i.e. remove "CL#.")
int index = classNodeIDs[i].indexOf('.');
String subClassifierNumber = classNodeIDs[i].substring(index+1);
int subClassifierNum = Integer.parseInt(subClassifierNumber);
String classifierName = "";
if(subClassifierNum == 0) { // no document titles started with a letter
classifierName = "A-Z";
} else {
char letter = (char)('A' + subClassifierNum - 1); // A = 1
classifierName = String.valueOf(letter);
}
//
Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(classNodeIDs[i]);
classifierNode.setAttributeNode(attribute);
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
// at least one metadata element: that of the title of this
// classifierNode:
// letter
Element metadata = this.createNameValuePairElement(doc,
GSXML.METADATA_ELEM, "Title", classifierName);
// now connect up everything
metadataList.appendChild(metadata);
classifierNode.appendChild(metadataList);
classifierNodeList.appendChild(classifierNode);
}
Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
GSXML.REQUEST_TYPE_PROCESS, //collName +
"ClassifierBrowseMetadataRetrieve");
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
/** @return a newly created element of the following format:
* <classifier content="somecontent" name="CL+num">
* <displayItem name="name">someClassifierName</displayItem>
* <displayItem name="description">Browse by classifier name</displayItem>
* </classifier>
* @param doc - the document used to create the element
* @param content - value of the content attribute
* @param classifierNum - the number suffixed to the CL, together forming
* the classifier Node's ID
* @param displayNameVal is the bodytext of a named displayItem element
* @param displayDescrVal is the bodytext of a displayItem element with
* description */
protected Element createClassifierElement(Document doc, String content,
int classifierNum, String displayNameVal, String displayDescrVal)
{
final String CL = "CL";
Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
// content attribute
Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
att.setValue(content);
classifier.setAttributeNode(att);
// name attribute
att = doc.createAttribute(GSXML.NAME_ATT);
att.setValue(CL + classifierNum);
classifier.setAttributeNode(att);
// now create the displayItem children for classifier:
// #letter
//
//Browse titles starting with #letter
Element displayItem = createNameValuePairElement(doc,
GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
classifier.appendChild(displayItem);
displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
classifier.appendChild(displayItem);
return classifier;
}
/** @return a newly created element of the following format:
* <elementName name="somename">"some display value"</elementName>
* @param doc - the document used to create the element
* @param elementName - the tag name
* @param name - value of attribute name
* @param value - the body text of the element */
protected Element createNameValuePairElement(Document doc, String elementName,
String name, String value) {
// "some display value"
Element element = doc.createElement(elementName);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(name);
element.setAttributeNode(attribute);
element.appendChild(doc.createTextNode(value));
return element;
}
/**
* @param collection is the collection to search in
* @param query is the query term to search for. It won't specify the
* indexed field to search in, which will mean that GSearch will
* search all default indexed fields.
* @param maxDocs is the maximum number of results to return (which
* at present we consider equivalent to FedoraGSearch's hitpageSize).
*/
public String[] textQuery(String collection, String query,
int maxDocs)
throws Exception
{
// no need to search there is no query or query is empty spaces
if(query.trim().equals(""))
return new String[]{};
// QUERY value won't specify indexed field to search, Fedora
// Gsearch will take that as meaning all default indexed fields.
// Params to search() method below: string of fielded query terms;
// hitpageStart, hitpageEnd, snippetsMax (leave that 0)
query = query + " " + "PID" + COLON + GREENSTONE;
String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
// now we have the XML returned by FedoraGSearch, get the pids
// of the documents returned (if any)
String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
collection, searchResult);
return pids;
}
/**
* This method performs a fieldquery, searching for x number of phrases
* in each of the 4 indexed fields.
* @param collection is the collection to search in
* @param nameValParamsMap is a Map of several(key, value) entries,
* 4 of which we're concerned with here:
* - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
* - the values are a comma separated list of terms (phrases or single
* words) to search that field in. There may be more than 1 or
* there may be none (in which case there may be N empty values or
* spaces separated by commas).
* @param maxDocs is the maximum number of results to return (which
* at present we consider equivalent to FedoraGSearch's hitpageSize).
* */
public String[] fieldQuery(String collection, Map nameValParamsMap,
int maxDocs)
throws Exception
{
// we're going to maintain a list of UNIQUE pids that were returned
// in search results. Hence we use Set:
java.util.Set set = new java.util.HashSet();
// (1) Use Fedora's search to search document titles, if they were
// specified:
String[] docTitlepids = {};
String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
if(docTitleTerms != null) { // no doc titles may have been specified
String[] phrases = docTitleTerms.split(COMMA);
// search the individual phrases first:
for(int i = 0; i < phrases.length; i++) {
if(phrases.equals("") || phrases.equals(" "))
continue; //skip when there are no terms
docTitlepids = this.searchDocumentTitles(
collection, phrases[i], false);
for(int j = 0; j < docTitlepids.length; j++)
set.add(docTitlepids[j]);
}
}
// (2) use FedoraGSearch to search doc AND section titles, and
// fulltext (in case these were specified in nameValParamsMap):
String searchResult = this.fedoraGSearch.search(
nameValParamsMap, 1, maxDocs);
String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
collection, searchResult);
for(int i = 0; i < pids.length; i++)
set.add(pids[i]);
pids = null;
pids = new String[set.size()];
set.toArray(pids); // unique pids
return pids;
}
/** @return a String representing Greenstone3 XML for a query process
* response returning the results for the query denoted by parameter
* nameValParamsMap.
* @param nameValParamsMap is a Hashmap of name and value pairs for all the
* query field data values. The names match the field names that
* describeCollectionService() would have returned for the query service.
* @param collection is the name of the collection
* @param service is the name of the query service
* This method is only ever called when any of the services in the digital
* library described themselves as type=query. Therefore any digital
* libraries that have no query services, can just return emtpy message
* strings (or even "") since this method will never be called on them
* anyway. */
public String query(String collection, String service,
Map nameValParamsMap)
{
FedoraGS3RunException ex = null;
// (1) obtain the requested number of maximum result documents
int maxDocs = 100;
try{
maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
} catch(NumberFormatException e) {
maxDocs = 100;
}
String pids[] = {};
// (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
if(service.equals("TextQuery")) {
try {
// get the Query field:
String query = (String)nameValParamsMap.get(QUERY);
pids = textQuery(collection, query, maxDocs);
}
catch(Exception e) {
LOG.error("Error in TextQuery processing: " + e);
ex = new FedoraGS3RunException(
"When trying to use FedoraGenericSearch for a TextQuery", e);
}
} else { // (3) FieldQuery
// first get the comma-separated lists
String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
// both are comma separated lists, so split both on 'comma'
String[] fieldNames = listOfFieldNames.split(COMMA);
String[] searchTerms = listOfSearchTerms.split(COMMA);
// In the fieldNames and searchTerms lists of nameValParamsMap,
// each searchTerm element was matched with its correspondingly
// indexed fieldName.
// A new map is going to reorganise this, by putting all terms
// for a particular fieldName together in a comma separated list
// and associating that with the fieldName. I.e. (key, value) ->
// (fieldName, comma-separated list of all terms in that field)
Map map = new HashMap();
for(int i = 0; i < searchTerms.length; i++) {
// there may be fewer searchTerms than fieldNames (since some
// fieldNames may have been left empty), so loop on searchTerms
if(map.containsKey(fieldNames[i])) { // fieldName is already
// in the list, so append comma with new value
String termsList = (String)map.get(fieldNames[i]);
termsList = termsList + COMMA + searchTerms[i];
map.put(fieldNames[i], termsList);
} else { // this is the first time this fieldName occurred
// just put the fieldName with searchTerm as-is
map.put(fieldNames[i], searchTerms[i]);
}
}
try {
// For fieldquery, we search on all the fieldNames specified
// - if DOC_TITLES is specified then we use Fedora's search
// - for all other fieldNames specified, we use FedoraGSearch
pids = fieldQuery(collection, map, maxDocs);
}
catch(Exception e) {
LOG.error("Error in FieldQuery processing: " + e);
ex = new FedoraGS3RunException(
"When trying to use FedoraGenericSearch for a FieldQuery", e);
}
}
// Build Greenstone XML Query response message for from
// the pids (which should be document identifiers)
Document doc = builder.newDocument();
//
//
Element metadataList = doc.createElement(
GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
Element metadata = doc.createElement(GSXML.METADATA_ELEM);
Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
attribute.setValue(NUM_DOCS_MATCHED);
metadata.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.VALUE_ATT);
attribute.setValue(Integer.toString(pids.length));
metadata.setAttributeNode(attribute);
metadataList.appendChild(metadata);
//
//
// ...
// ...
//
Element docNodeList = doc.createElement(
GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
// for each
for(int i = 0; i < pids.length; i++) {
Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
attribute.setValue(pids[i]);
docNode.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
attribute.setValue("hierarchy");
docNode.setAttributeNode(attribute);
attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
attribute.setValue("root");
docNode.setAttributeNode(attribute);
docNodeList.appendChild(docNode);
}
Element responseMsg = createResponseMessage(doc, docNodeList, ex,
GSXML.REQUEST_TYPE_PROCESS, service);
try{
return FedoraCommons.elementToFormattedString(responseMsg);
}catch(TransformerException e) {
return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
+ " " + e;
}
}
public static void main(String args[]) {
try{
// testing default constructor
//FedoraGS3Connection con = new FedoraGS3Connection();
// testing constructor that takes properties file to show initial
// fedora server values
java.io.File propertyFilename
= new java.io.File("fedoraGS3.properties");
FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
// DESCRIBE: serviceList, collectionList
System.out.println("serviceList:\n" + con.getServiceList());
System.out.println("collectionList:\n" + con.getCollectionList());
String[] colPIDs = con.getCollections();
String[] collectionNames = con.getCollectionNames(con.getCollections());
for(int i = 0; i < collectionNames.length; i++) {
System.out.println("Describing collections:\n");
System.out.println(con.describeCollection(collectionNames[i]));
System.out.println("Describing collection services:\n"
+ con.describeCollectionServices(collectionNames[i]));
}
String[] serviceNames = con.getServiceNames();
for(int i = 0; i < serviceNames.length; i++) {
System.out.println("Describing " + serviceNames[i] + ":\n"
+ con.describeCollectionService("demo", serviceNames[i]));
}
// TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
// along with EX of the top-level document:
System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}));
String[] docIDs = con.getCollectionDocs(colPIDs[0]);
System.out.println("\nGET CONTENT:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println(con.getContent(docIDs[i]));
}
System.out.println("\nGET META:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println(con.getDocumentMetadata(docIDs[i]));
}
String[] getTitlesFor = {
"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
};
// first let's display the regular meta for top-level docs and
// their sections
for(int i = 0; i < getTitlesFor.length; i++) {
System.out.println(con.getDocumentMetadata(getTitlesFor[i]));
}
System.out.println("\nTitles are:");
System.out.println(con.getTitleMetadata(getTitlesFor));
System.out.println("\nGET STRUCTURE:");
for(int i = 0; i < docIDs.length; i++) {
System.out.println(con.getChildren(docIDs[i]));
System.out.println(con.getDocumentStructure(docIDs[i]));
}
// TEST ERROR CASES:
System.out.println("\nTESTING ERROR CASES");
System.out.println(con.getContent("greenstone:demo-pinky"));
String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
"greenstone:demo-pinky" };
System.out.println(con.getContent(errorCases));
System.out.println(con.getDocumentMetadata(errorCases));
System.out.println(con.getDocumentStructure(errorCases));
System.out.println("\nCLASSIFIER BROWSE");
System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
new String[]{"CL1"}));
System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
String[] classNodeIDs = new String[26];
for(int i = 0; i < classNodeIDs.length; i++) {
int subClassifierNum = i + 1;
classNodeIDs[i] = "CL1." + subClassifierNum;
}
System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
classNodeIDs));
System.out.println("Testing query services");
System.out.println("TEXT QUERY:");
Map formControlValsMap = new HashMap();
formControlValsMap.put(MAXDOCS, "100");
formControlValsMap.put(QUERY, "snails");
String searchResponse
= con.query("gs2mgdemo", "TextQuery", formControlValsMap);
System.out.println(searchResponse);
System.out.println("FIELD QUERY:");
formControlValsMap.clear();
formControlValsMap.put(MAXDOCS, "100");
formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
formControlValsMap.put(FIELDNAME_ATT,
"allFields,docTitles,allFields,allFields");
searchResponse
= con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
System.out.println(searchResponse);
System.exit(0);
}catch(Exception e) {
JOptionPane.showMessageDialog(
null, e, "Error", JOptionPane.ERROR_MESSAGE);
//System.err.println("ERROR: " + e);
e.printStackTrace();
}
}
}