/** *######################################################################### * FedoraGS3Connection.java - works with the demo-client for Greenstone 3, * of the Greenstone digital library suite from the New Zealand Digital * Library Project at the * University of Waikato, New Zealand. *

* Copyright (C) 2008 New Zealand Digital Library Project *

* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. *

* This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *######################################################################## */ package org.greenstone.fedora.services; import java.io.StringReader; import org.apache.log4j.Logger; import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException; import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException; import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException; import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException; import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants import org.greenstone.gsdl3.util.GSXML; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Attr; import org.w3c.dom.Text; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.xml.sax.InputSource; import java.io.File; import java.util.HashMap; import java.util.Properties; import java.util.Map; import javax.swing.JOptionPane; import org.xml.sax.SAXException; import java.io.UnsupportedEncodingException; import java.io.IOException; import javax.net.ssl.SSLHandshakeException; import java.net.Authenticator; import java.net.ConnectException; import java.net.MalformedURLException; import java.net.PasswordAuthentication; import java.rmi.RemoteException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; /** * Class that extends FedoraConnection in order to be able to use * Fedora's web services to retrieve the specific datastreams of * Greenstone documents stored in Fedora's repository. This class * provides methods that convert those datastreams into Greenstone3 * XML response messages which are returned. * @author ak19 */ public class FedoraGS3Connection extends FedoraConnection implements FedoraToGS3Interface, FedoraToGS3Interface.Constants { /** The logging instance for this class */ private static final Logger LOG = Logger.getLogger( FedoraGS3Connection.class.getName()); /** Default name of Fedora index */ private static final String DEFAULT_FEDORA_INDEX = "FgsIndex"; //"BasicIndex" for older versions of GSearch /** Complete list of services that our FedoraGS3 would support * if everything goes well. If a connection to FedoraGSearch * cannot be established, the query services will no longer be * available. The actual services supported are given by member * variable serviceNames. */ protected static final String[] SERVICES = { "DocumentContentRetrieve", "DocumentMetadataRetrieve", "DocumentStructureRetrieve", "TextQuery", "FieldQuery", "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve" }; /** List of services actually supported by our FedoraGS3 repository * after construction. If FedoraGenericSearch can't be connected to, * then query services will not be offered */ protected String[] serviceNames; /** The object used to connect to FedoraGenericSearch, which is used * for doing full-text searching */ protected GSearchConnection fedoraGSearch; /** The url for the wsdl file of FedoraGSearch's web services * by default this will be the Fedora server's base URL * concatenated to "gsearch/services/FgsOperations?wsdl" */ protected String gSearchWSDLURL; /** The last part of the gSearchWSDL URL. The first part is * the same as the fedora server's base url. */ protected String gSearchWSDLSuffix; /** The name of the index that FedoraGSearch will index the GS3 * documents into. If no name is specified in the properties file, * this will default to FedoraIndex. */ protected String gSearchIndexName; /** 5 argument constructor is the same as that of superclass FedoraConnection: * @param protocol can be either http or https * @param host is the host where the fedora server is listening * @param port is the port where the fedora server is listening * @param fedoraServerUsername is the username for administrative * authentication required to access the fedora server. * @param fedoraServerPassword is the password for administrative * authentication required to access the fedora server. If no password was set * when installing Fedora, leave the field "". * Instantiates a FedoraGS3Connection object which connects to Fedora's * web services through stub classes and tries to connect to FedoraGSearch's * web services through the default WSDL location for it * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used, * call setGSearchWSDLURL(url) after the constructor instead. */ public FedoraGS3Connection(String protocol, String host, int port, String fedoraServerUsername, String fedoraServerPassword) throws ParserConfigurationException, MalformedURLException, SSLHandshakeException, RemoteException, AuthenticationFailedException, NotAFedoraServerException, ConnectException, Exception { super(protocol, host, port, fedoraServerUsername, fedoraServerPassword); // super() will call setInitialisationProperties(properties) // And that will try to instantiate the GSearchConnection. } /** No-argument constructor which is the same as that of superclass * FedoraConnection: it displays a small dialog requesting input for the * host, port, administrative password and username of the fedora server. * If no password was set on the fedora repository when installing it, * the user can leave the password field blank. */ public FedoraGS3Connection() throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { super(); // super() will call setInitialisationProperties(properties) // And that will try to instantiate the GSearchConnection. } /** Single-argument constructor which is the same as that of superclass * FedoraConnection: it takes the name of the properties file where * connection initialisation values may already be provided and then * displays a small dialog requesting input for the host, port, * administrative password and username of the fedora server showing * the values in the properties file as default. If the necessary * initialisation are not present in the file, the corresponding fields * in the dialog will be blank. * If no password was set on the fedora repository when installing it, * the user can leave the password field blank. */ public FedoraGS3Connection(File propertiesFilename) throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { super(propertiesFilename); // super() will call setInitialisationProperties(properties) // And that will try to instantiate the GSearchConnection. } /** The superclass constructor calls this method passing any preset * properties loaded from a propertiesFile. This method is overridden * here in order to instantiate the gSearchConnection based on the * - gSearchWSDLSuffix that will be appended to the fedora base url. * (If one was not provided in the properties file, gSearchWSDLURL defaults * to something of the form * "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl" * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix, * "gsearch/services/FgsOperations?wsdl". * - name of the index into which the GS3 documents have been indexed * and which FedoraGenericSearch should use to perform searches. If none is * given in the properties file, then the index name defaults to "FgsIndex" * (no longer BasicIndex or FedoraIndex). * @param properties is the Properties Map loaded from a properties file * (if there was any) which specifies such things as host and port of the * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix". * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set * to whatever the final value of this.gSearchWSDLURL' suffix is, and * "gsearch.indexName" will be set to to whatever the final value of * this.gSearchIndexName is. */ protected void setInitialisationProperties(Properties properties) throws ParserConfigurationException, MalformedURLException, CancelledException, ConnectException, RemoteException, SSLHandshakeException, Exception { super.setInitialisationProperties(properties); // gsearchWSDL URL suffix, if not specified, defaults to // "fedoragsearch/services/FgsOperations?wsdl" which is // concatenated to the baseURL of fedora to give the gsearchWSDLURL. this.gSearchWSDLSuffix = properties.getProperty( "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl"); this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix; // Set the property to whatever this.gSearchWSDLURL is now, // so that it will be written out to the properties file again properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix); // Similarly for the name of the index FedoraGenericSearch should use // when performing searches for GS3 docs stored in Fedora's repository. this.gSearchIndexName = properties.getProperty( "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex properties.setProperty("gsearch.indexName", this.gSearchIndexName); // Create a connection to FedoraGSearch's web services: initSearchFunctionality(); } /** Overridden init method to work with the 5 argument constructor, so that we can * bypass using setInitialisationProperties() which works with a Properties map. */ protected void init(String protocol, String host, String port, final String fedoraServerUsername, final String fedoraServerPassword) throws ParserConfigurationException, MalformedURLException, AuthenticationFailedException, RemoteException, Exception { super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword); this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl"; this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix; this.gSearchIndexName = DEFAULT_FEDORA_INDEX; // Now need to set username and password for accessing WSDL (after GSearch 2.2) // http://stackoverflow.com/questions/3037221/401-error-when-consuming-a-web-service-with-http-basic-authentication-using-cxf // The java.net.Authenticator can be used to send user credentials when needed. Authenticator.setDefault(new Authenticator() { @Override protected PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication( fedoraServerUsername, fedoraServerPassword.toCharArray()); } }); initSearchFunctionality(); } /** Init method that instantiates a GSearchConnection object used * to work with the separate FedoraGSearch web services. * The url of the WSDL for FedoraGSearch's web services is worked out * from the baseURL of the Fedora server. */ protected void initSearchFunctionality() { try { this.fedoraGSearch = null; this.fedoraGSearch = new GSearchConnection( gSearchWSDLURL, gSearchIndexName); this.serviceNames = SERVICES; } catch(Exception e){ LOG.error("Cannot connect to FedoraGSearch's web services at " + gSearchWSDLURL + "\nQuery services will not be available.", e); // Exception, e, as parameter prints the stacktrace of the exception to the log // If an exception occurs, something has gone wrong when // trying to connect to FedoraGSearch's web services. This // means, we can't offer query services, as that's provided // by FedoraGSearch serviceNames = null; int countOfNonQueryServices = 0; for(int i = 0; i < SERVICES.length; i++) { // do not count query services if(!SERVICES[i].toLowerCase().contains("query")) { countOfNonQueryServices++; } } // Services now supported are everything except Query services serviceNames = new String[countOfNonQueryServices]; int j = 0; for(int i = 0; i < SERVICES.length; i++) { if(!SERVICES[i].toLowerCase().contains("query")) { serviceNames[j] = SERVICES[i]; j++; // valid serviceName, so increment serviceName counter } } } } /** @return the gSearchWSDLURL, the url of the WSDL for the * FedoraGSearch web services */ public String getGSearchWSDLURL() { return gSearchWSDLURL; } /** Sets the member variable gSearchWSDLURL that specify the location of * the WSDL file of FedoraGSearch's web services. Then it attempts * to instantiate a connection to those web services. * @param url is the new url of the GSearch web services WSDL file */ public void setGSearchWSDLURL(String url) { this.gSearchWSDLURL = url; initSearchFunctionality(); } /** @return the gSearchIndexName, the name of the index Fedora Generic * Search will search in (where GS3 docs have been indexed into). */ public String getGSearchIndexName() { return gSearchIndexName; } /** Sets the member variable gSearchIndexName that specifies the name * of the index containing indexed GS3 documents. Then it attempts * to instantiate a connection to the Fedora GSearch web services using * this changed value for indexName. * @param indexName is the new name of the index containing indexed GS3 * docs that GSearch should search in. */ public void setGSearchIndexName(String indexName) { this.gSearchIndexName = indexName; initSearchFunctionality(); } /** @return the array of the services actually supported by FedoraGS3 */ protected String[] getServiceNames() { return this.serviceNames;} /** * For finding out if the sectionNumber is given as part of the docID. * @param docID is the String that contains the docPID and may also * contain the section number. * @return true if the document identifier docID contains a section- * number, and false if it consists solely of the docPID. * That is, true is returned if *
docID = "greenstone:colName-<docPID>-<sectionNum>"
* and false is returned if *
docID = "greenstone:colName-<docPID>"
* */ protected boolean containsSectionNumber(String docID) { // if there are two hyphens in the docID, then there are sections // (and the section number is appended at end of docID) // docID = "greenstone:colName--" return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN)); } /** This method will extract the docPID from docID and return it. * (If a sectionNumber is suffixed to the docID, the docPID which is * the prefix is returned; otherwise the docID is the docPID and is * returned) * @param docID is the String that contains the docPID and may also * contain the section number. * @return only the docPID portion of the docID. */ protected String getDocPIDFromDocID(String docID) { if(containsSectionNumber(docID)) return docID.substring(0, docID.lastIndexOf(HYPHEN)); // else (if there's no sectionNumber), docID is the docPID return docID; } /** This method will return the section Number, if there's any * suffixed to the docID. Otherwise it will return the empty string * @param docID is the String that contains the docPID and may also * contain the section number. * @return only the sectionID portion of the docID - if any, else "". */ protected String getSectionIDFromDocID(String docID) { if(containsSectionNumber(docID)) return docID.substring( docID.lastIndexOf(HYPHEN)+1, docID.length()); return ""; } /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve * response message that gives the metadata for each collection identified * @param collIDs is an array of fedora pids identifying collections in the * fedora repository * @return a GS3 DocumentMetadataRetrieve response message containing the * EX metadata for all the requested collections */ public String getCollectionMetadata(String[] collIDs) { return getMetadata(collIDs, new String[] {"all"}); } /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve * response message is returned containing the metadata for each document. * @param docIDs is an array of document identifiers (docID can either be * <pid>s items (documents) in the fedora repository, or * "<pid>-sectionNumber". * @return a GS3 DocumentMetadataRetrieve response message containing the * EX, DC, DLS metadata for all the requested documents * @param metadata is the list of metadata elements to be retrieved for each doc */ public String getDocumentMetadata(String[] docIDs, String[] metadata) { return getMetadata(docIDs, metadata); } /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve * response message that gives the metadata for the collection identified * @param collID is a fedora pid identifying a collection in its repository * @return a GS3 DocumentMetadataRetrieve response message containing the * EX metadata for the requested collection * @param metadata is the list of metadata elements to be retrieved for each doc */ public String getCollectionMetadata(String collID) { return getMetadata(new String[] {collID}, new String[] {"all"}); } /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve * response message containing the metadata for the document. * @param docID is a document identifier (docID can either be a <pid> * of an item (document) in the fedora repository, or it can be * "<pid>-sectionNumber". * @return a GS3 DocumentMetadataRetrieve response message containing the * EX, DC, DLS metadata for the requested document */ public String getDocumentMetadata(String docID, String[] metadata) { return getMetadata(new String[] {docID}, metadata); } /** @return a greenstone DocumentMetadataRetrieve response for the * documents or collections indicated by the docIDsOrCollIDs. * @param docIDsOrCollIDs is an array of identifiers which may be either the * fedora pids for collections, or otherwise may be a document identifier. * In the last case, the document ID may consist of either * "documentPID-sectionNumber" or may just be just fedora documentPID * @param metadata is the list of metadata elements to be retrieved for each doc */ public String getMetadata(String[] docIDsOrCollIDs, String[] metadata) { Document doc = builder.newDocument(); FedoraGS3RunException ex = null; Element docNodeList = doc.createElement( GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); try{ for(int i = 0; i < docIDsOrCollIDs.length; i++) { // create the containing the metadata // for each document docID Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata); docNodeList.appendChild(docNode); } } catch(Exception e) { ex = new FedoraGS3RunException(e); ex.setSpecifics("EX (and/or DC, DLS) metadata datastream"); } Element responseMsg = createResponseMessage(doc, docNodeList, ex, GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve"); try{ return FedoraCommons.elementToString(responseMsg); } catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** Method that takes a new DOM document, as well as an identifier of either * a collection or document (which may be a fedora pid for the collection * or document, or may be the documentPid-sectionNumber for a document) and * returns a documentNode element for it: * <documentNode><metadataList> * <metadata name="">value</metadata> * ... * </metadataList></documentNode> * @return documentNode containing the metadata for the collection or * document given by parameter ID * @param id denotes a collection pid, a document pid or a docID of the * form "documentpid-sectionNumber" * @param metadata is the list of metadata elements to be retrieved for each doc */ protected Element getMetadata(Document doc, String id, String[] metadata) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // We're going to create the documentNode nested inside the following // documentNodeList: // // // value // // ... // // // - the docNode on which a metadata // retrieve is being performed Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(id); docNode.setAttributeNode(attribute); // Element metadataList = doc.createElement( GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); String ex = ""; String dc = ""; String dls = ""; if(id.endsWith(_COLLECTION)) { // docID refers to a collection // Obtain the "EX" datastream (extracted metadata) for the collection ex = this.getEX(id); } else { // docID refers to a document // work out the document's fedora PID and section ID, and then // obtain the EX (extracted metadata) and DC datastreams for the doc // Note that EX/DC for pid="greenstone:-docPID-1" // is the same as for pid="greenstone:-docPID" // That is,
refers to the toplevel document docPID // If requested for top-level document, there may also be DLS meta String sectionID = getSectionIDFromDocID(id); String docPID = getDocPIDFromDocID(id); if(sectionID.equals("") || sectionID.equals("1")) { // metadata of toplevel document is requested ex = this.getEX(docPID); // slightly faster than doing //getSectionEXMetadata(docID, "1") dc = this.getDC(docPID); dls = this.getDLS(docPID); } else { ex = getSectionEXMetadata(docPID, sectionID); dc = getSectionDCMetadata(docPID, sectionID); } } String metafields = ""; for(int i = 0; i < metadata.length; i++) { metafields = metafields + metadata[i] + "|"; } // Adding in metadata sets in alphabetical order // DC metadata for a top-level document is different from EX, DLS: // only the element's namespace prefix is "dc", the rest of a tagname // is unknown. if(!dc.equals("")) { addMetadataWithNamespacedTagNames(doc, metadataList, dc, DC, metafields); } // Check if we were supposed to process dls and dc metadata // as well. We only ever do this for top-level documents, // in which case, dls and dc will be non-empty strings if(!dls.equals("")) { addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields); } // we definitely have an EX metadatastream for each // collection object, top-level document object, // and document section item addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields); // now the metadataList has been built up docNode.appendChild(metadataList); return docNode; // return containing the metadata } /** This method retrieves all the metadata elements in the metaDataStream * parameter of the form <"metadataSetNS:metadata">"value"</metadata> where * metadataSetNS is the namespace of each tag, and creates a new element of * the form <metadata name="metadataSetNS:metadata">"value"</metadata> for * each. Each of these are then appended to the metadataList parameter. * @param doc is the Document object using which the new metadata Elements * are to be constructed * @param metadataList is the <metadataList> Element to which the new * metadata Elements are to be appended as children. * @param metaDatastream the metadata datastream in string form (e.g. the * Dublin Core metadata stored in the Fedora repository). * @param metadataSet is the constant datastream identifier, e.g. "DC". * At present this method applies to the DC metadata and any others like it * where each tagname is different except for the constant dc: namespace. * @param metafields is a | separated string containing the metadatafields to * extract or "all" if all fields are requested */ protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList, String metaDatastream, String metadataSet, String metafields) throws SAXException, IOException { Document src = builder.parse( new InputSource(new StringReader(metaDatastream))); // The following doesn't work for some reason: to retrieve all elements // whose namespace prefix starts with "dc", we pass "*" for localName //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*"); // Longer way: get the children of the root document NodeList children = src.getDocumentElement().getChildNodes(); for(int i = 0; i < children.getLength(); i++) { String nodeName = children.item(i).getNodeName(); // check that the nodename starts with the metadataSet ("dc") namespace, // which simultaneously ensures that the node's an element: if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) { // need to have a period for Greenstone instead of Fedora's colon nodeName = nodeName.replace(COLON, PERIOD); if(metadataSet.equals(DC)) { // dc:title -> dc.Title nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3))) + nodeName.substring(4); } // get the requested metadata fields if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) { Element metatag = (Element)children.item(i); String value = FedoraCommons.getValue(metatag); // value // we're going to put this in our metadata element as // value // create metadata of (name, value) pairs in target DOM (doc) Element metadata = doc.createElement(GSXML.METADATA_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(nodeName); metadata.setAttributeNode(attribute); Text content = doc.createTextNode(value); metadata.appendChild(content); metadataList.appendChild(metadata); } } } } /** This method retrieves all the metadata elements in the metaDataStream * of the form <"namespace:"metadata name="metadataName">value</metadata> * where "namespace" is the namespace prefix of each tag, and metadataName * is the name of the metadata (like author, title). For each element * it creates a corresponding new element of the form * <metadata name="namespace:metadataName">value</metadata>. * Each of these are then appended to the metadataList parameter. * @param doc is the Document object using which the new metadata Elements * are to be constructed * @param metadataList is the <metadataList> Element to which the new * metadata Elements are to be appended as children. * @param metaDatastream the metadata datastream in string form (e.g. the * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora * repository). * @param metadataSet is the constant datastream identifier, * e.g. "DLS" or "EX". * At present this method applies to the DLS and EX metadata as they have * constant tagnames throughout. * @param metafields is a | separated string containing the metadatafields to * extract or "all" if all fields are requested. */ protected void addMetadataWithFixedTagName(Document doc, Element metadataList, String metaDatastream, String metadataSet, String metafields) throws SAXException, IOException { // Namespace prefix can be "ex:" or "dls:" String namespacePrefix = ""; if(!metadataSet.equals(EX)) { // need to have a period for Greenstone instead of Fedora's colon namespacePrefix = metadataSet.toLowerCase() + PERIOD; } Document src = builder.parse( new InputSource(new StringReader(metaDatastream))); NodeList metaTags = src.getElementsByTagName( metadataSet.toLowerCase()+COLON+METADATA); // Looking for tagnames: or for(int i = 0; i < metaTags.getLength(); i++) { Element metatag = (Element)metaTags.item(i); // extract the metadata of (name, value) pairs from src DOM // look for value String name = metatag.hasAttribute(NAME) ? metatag.getAttribute(NAME) : ""; // sometimes, there are several metadata for the same name, in this // case, look for a qualifier and append its value to the name to // distinguish it uniquely: if(metatag.hasAttribute(QUALIFIER)) { name = name + HYPHEN + metatag.getAttribute(QUALIFIER); } name = namespacePrefix + name; // prefix with namespace, if any if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) { String value = FedoraCommons.getValue(metatag); // create metadata of (name, value) pairs in target DOM (doc) Element metadata = doc.createElement(GSXML.METADATA_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(name); metadata.setAttributeNode(attribute); Text content = doc.createTextNode(value); metadata.appendChild(content); metadataList.appendChild(metadata); } } } /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve * response message containing ONLY the Title metadata for the document. * @param docID is a document identifier (docID can either be a <pid> * of an item (document) in the fedora repository, or it can be * "<pid>-sectionNumber". * @return a GS3 DocumentMetadataRetrieve response message containing the * Title metadata for the requested document */ public String getTitleMetadata(String docID) { return getTitleMetadata(new String[] { docID }); } /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve * response message containing ONLY the Title metadata for the documents. * @param docIDs is a list of document identifiers (where docID can either be * a <pid> of an item (document) in the fedora repository, or it can be * "<pid>-sectionNumber". * @return a GS3 DocumentMetadataRetrieve response message containing the * Title metadata for all the requested documents */ public String getTitleMetadata(String[] docIDs) { // Must create message of the following form: // // sometitle // Document doc = builder.newDocument(); FedoraGS3RunException ex = null; Element docNodeList = doc.createElement( GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); try{ for(int i = 0; i < docIDs.length; i++) { Element docNode = getTitleMetadata(doc, docIDs[i]); docNodeList.appendChild(docNode); } }catch(Exception e) { ex = new FedoraGS3RunException(e); //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID ex.setSpecifics("EX metadata datastream"); } Element responseMsg = createResponseMessage(doc, docNodeList, ex, GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve"); try{ return FedoraCommons.elementToString(responseMsg); } catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** Method that takes a new DOM document, as well as an identifier of either * a document or document section and returns a documentNode element containing * the title metadata for it: * <documentNode nodeID="docID"><metadataList> * <metadata name="Title">sometitle</metadata> * </metadataList></documentNode> * @return documentNode containing the metadata for the collection or * document given by parameter ID * @param docID denotes the id of a document or a document section, so id * is either a document-pid or it's of the form documentpid-sectionNumber */ protected Element getTitleMetadata(Document doc, String docID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // Returns a docNode element of the following form: // // sometitle // // Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(docID); docNode.setAttributeNode(attribute); // Element metaList = doc.createElement( GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); // Element metadata = doc.createElement(GSXML.METADATA_ELEM); // if we connect it all up (append children), we can immediately add // the name attribute into the metadata element: metaList.appendChild(metadata); docNode.appendChild(metaList); metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute String title = ""; String sectionID = getSectionIDFromDocID(docID); String docPID = getDocPIDFromDocID(docID); // check if title of toplevel document is requested if(sectionID.equals("")) title = this.getDocTitle(docPID); else { // title of document section title = this.getSectionTitle(docPID, sectionID); } metadata.appendChild(doc.createTextNode(title)); return docNode; } /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML * containing the requested portion of the document structure of the documents * indicated by docIDs: * @param docID is the document identifier of the document whose hierarchical * structure is requested. The name of the collection is already included in the * docID for a Fedora DL. * @param structure - strings specifying the required structure of the document. * It can be a combination of: ancestors, parent, siblings, children, descendants, entire. * @param info - strings specifying the required structural info of the document. * It can be any combination of: siblingPosition, numSiblings, numChildren. */ public String getDocumentStructure(String docID, String[] structure, String[] info) { return getStructure(new String[]{docID}, structure, info); } /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML * containing the requested portion of the document structure of the documents * indicated by docIDs: * @param docIDs is an array of document identifiers of documents whose * hierarchical structures are requested. The name of the collection is already * included in the docID for a Fedora DL. * @param structure - strings specifying the required structure of each document. * It can be a combination of: ancestors, parent, siblings, children, descendants, entire. * @param info - strings specifying the required structural info of each document. * It can be any combination of: siblingPosition, numSiblings, numChildren. */ public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) { return getStructure(docIDs, structure, info); } /** * Returns a greenstone3 DocumentStructureRetrieve XML response message * containing the document structures for the given docIDs. * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML, * greenstone formatted XML is returned. The requested section of the table * of contents (TOC) for a document is converted into the greenstone3 xml * format that is returned upon DocumentStructureRetrieve requests. * @param docIDs the documentIDs for which the section's structure is returned; * where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>. * @param structure - the structure of the sections to return. Can be any combination of: * ancestors, parent, siblings, children, descendants, entire. * @param infos - strings containing any combination of the values: numChildren, numSiblings, * siblingPosition. The requested info gets added as attributes to the returned root element. * @return a greenstone3 DocumentStructureRetrieve XML response message in * String format with the structure of the docIDs requested. */ protected String getStructure(String[] docIDs, String[] structure, String[] infos) { Document doc = builder.newDocument(); FedoraGS3RunException ex = null; // Element docNodeList = doc.createElement( GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); try{ // append the for the docIDs // to the docNodeList //getStructureElement(docNodeList, docIDs, levels); getStructureElement(docNodeList, docIDs, structure, infos); } catch(Exception e) { ex = new FedoraGS3RunException(e); ex.setSpecifics("(requested portion of) TOC datastream"); } // insert our into a GS3 response message Element responseMsg = createResponseMessage(doc, docNodeList, ex, GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve"); try{ return FedoraCommons.elementToString(responseMsg); } catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** Given a <documentNodeList> portion of a greenstone3 * DocumentStructureRetrieve XML response message, this method will populate * it with the <documentNodes> that represent the structure of the given docIDs. * @param docNodeList is a <documentNodeList> to which <documentNodes> of * the doc structures are appended. * @param docIDs the documentIDs for which the section's structure is returned; * where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>. * @param structures - the structure of the sections to return. Can be any combination of: * ancestors, parent, siblings, children, descendants, entire. * @param infos - a string containing any combination of the values: numChildren, numSiblings, * siblingPosition. The requested info gets added as attributes to the returned root element. */ protected void getStructureElement(Element docNodeList, String[] docIDs, String[] structures, String[] infos) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { // Make one string out of requested structure components, and one string from info components String structure = ""; String info = ""; for(int i = 0; i < structures.length; i++) { structure = structure + structures[i] + "|"; } for(int i = 0; i < infos.length; i++) { info = info + infos[i] + "|"; } // process each docID for(int i = 0; i < docIDs.length; i++) { // work out the document's fedora PID and section ID String sectionID = getSectionIDFromDocID(docIDs[i]); String docPID = getDocPIDFromDocID(docIDs[i]); if(sectionID.equals("")) { sectionID = "1"; } // get the required section, along with children or descendants Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info); Document doc = docNodeList.getOwnerDocument(); // copy-and-convert that structure into a structure format for GS3 Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement); if(!info.equals("")) { // // // // ... // Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info"); Element root = srcDocElement.getOwnerDocument().getDocumentElement(); if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) { String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS); Element infoEl = doc.createElement(GSXML.INFO_ATT); infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS); infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings); nodeStructureInfo.appendChild(infoEl); } if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) { String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS); Element infoEl = doc.createElement(GSXML.INFO_ATT); infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS); infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition); nodeStructureInfo.appendChild(infoEl); } if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) { String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN); Element infoEl = doc.createElement(GSXML.INFO_ATT); infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN); infoEl.setAttribute(GSXML.VALUE_ATT, numChildren); nodeStructureInfo.appendChild(infoEl); } if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) { String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE); Element infoEl = doc.createElement(GSXML.INFO_ATT); infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE); infoEl.setAttribute(GSXML.VALUE_ATT, documentType); nodeStructureInfo.appendChild(infoEl); } docNode.appendChild(nodeStructureInfo); } // add it to our list of documentNodes docNodeList.appendChild(docNode); } } /** * Takes the portion of the XML document outlining the structure of the * document (section)--in the format this is stored in Fedora--and returns * Greenstone 3 DOM XML format for outlining document structure. * @return a <documentNode> element that contains a greenstone3 * DocumentStructureRetrieve XML corresponding to the parameter Element section * (which is in fedora XML), for the document indicated by docID. * @param requestingDocID is the identifier of the document for which the * structure was requested. It's this document's children or descendants that * will be returned. Note that this is not always the same as (clear from) * parameter docID. * @param docID is the documentID for which the section's structure is * returned where docID = "docPID-sectionNumber". * @param section - the fedora section XML that is being mirrored in * greenstone3 format. */ protected Element getStructure(Document doc, String requestingDocID, String docID, Element section) { // we want to mirror the section's DOM (given in fedora XML) in // greenstone3's XML for a DocumentStructureRetrieve response. // - the docNode on which a structure retrieve // is being performed Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "") docNode.setAttributeNode(attribute); // Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM); // Element rootNode = createDocNodeFromSubsection(doc, section, docID); // fills in the subtree of the rootNode in our nodeStructure element createDocStructure(doc, section, rootNode, docID); //where section represents the root section nodeStructure.appendChild(rootNode); docNode.appendChild(nodeStructure); return docNode; } /** Recursive method that creates a documentStructure mirroring parameter * section, starting from parameter parent down to all descendants * @param section is the XML <Section> in the fedora repository's TOC * for the docPID whose substructure is to be mirrored * @param parent is the XML documentNode in the greenstone repository whose * descendants created by this method will correspond to the descendants of * parameter section. * @param doc is the document containing the parent; * @param docPID is the prefix of all nodeIDs in the parent's structure */ protected void createDocStructure( Document doc, Element section, Element parent, String docPID) { // get the section's children (if any) NodeList children = section.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { //then we know it's an element AND that its tagname is "Section" Element subsection = (Element)n; Element child = createDocNodeFromSubsection(doc, subsection, docPID); parent.appendChild(child); // recursion call on newly found child-element and subsection createDocStructure(doc, subsection, child, docPID); } } } /** Given a particular subsection element, this method creates a * Greenstone3 DocumentNode element that mirrors it. * @param doc is the document that will contain the created DocumentNode * @param docID is the prefix of all nodeIDs in the parent's structure * @param subSection is the XML <Section> in the fedora repository's * TOC for the docPID which will be mirrored in the greenstone XML * documentNode that will be returned. * @return a greenstone <documentNode> that represents the fedora TOC's * <Section> element passed as parameter subSection. */ protected Element createDocNodeFromSubsection( Document doc, Element subSection, String docID) { Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT); docType.setValue(GSXML.DOC_TYPE_HIERARCHY); docNode.setAttributeNode(docType); Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT); String sectionID = subSection.hasAttribute(ID) ? subSection.getAttribute(ID) : ""; if(sectionID.equals("1") && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case // reset the attribute without the section number (just "docID" may be important for democlient?) nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID); } else { nodeID.setValue(docID + HYPHEN + sectionID); } //nodeID.setValue(docID + HYPHEN + sectionID); docNode.setAttributeNode(nodeID); Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT); if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) { nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT)); } docNode.setAttributeNode(nodeType); return docNode; } /** Given an identifier that is either a docPID or a concatenation of * docPID+sectionID, this method works out the fedora assigned docPID and * sectionID and then calls getContentBody(docPID, sectionID) with those. * @param docID is expected to be of the form * "greenstone:<collectionName>-<docPID>-<sectionNumber>" or * "greenstone:<collectionName>-<docPID>" * If it is "greenstone:<collectionName>-<docPID>", then the content for * "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1") * is returned! */ public String getContent(String docID) { return this.getContent(new String[]{docID}); } /** Given an identifier that is a concatenation of docID+sectionID, this * method works out the fedora assigned docPID and sectionID and then calls * getContentBody(docPID, sectionID) with those. * @param docIDs is an array of document identifiers of the form * "greenstone:<collectionName>-<docPID>-<sectionNumber>" * If it is "greenstone:<collectionName>-<docPID>", then the content for * "greenstone:<collectionName>-Section1" is returned! */ public String getContent(String[] docIDs) { Document doc = builder.newDocument(); FedoraGS3RunException ex = null; // Element docNodeList = doc.createElement( GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); try{ for(int i = 0; i < docIDs.length; i++) { // get the sectionID and docPID from the docID String sectionID = this.removePrefix( getSectionIDFromDocID(docIDs[i]), SECTION); String docPID = getDocPIDFromDocID(docIDs[i]); if(sectionID.equals("")) // if no section is specified, get sectionID = "1"; // get the content for Section id="1" // Get the contents for the requested section of document docPID String sectionContent = this.getContentBody(docPID, sectionID); // set the nodeID attribute Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT); nodeId.setValue(docIDs[i]); // just set the docID which will contain // the docPID (and sectionID if already present) docNode.setAttributeNode(nodeId); // set the text content to what was retrieved Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM); Text textNode = doc.createTextNode(sectionContent.trim()); nodeContent.appendChild(textNode); docNode.appendChild(nodeContent); //add the documentNode to the docNodeList docNodeList.appendChild(docNode); } } catch(Exception e) { ex = new FedoraGS3RunException(e); ex.setSpecifics("requested doc Section datastream"); } Element responseMsg = createResponseMessage(doc, docNodeList, ex, GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve"); try{ return FedoraCommons.elementToString(responseMsg); } catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** Gets the contents of a textNode from a section. * @return the text content of a section. * @param docPID the pid of the document from which a section's text is to * be retrieved. * @param sectionID is the section identifier of the document denoted by * docPID whose text is to be returned. */ protected String getContentBody(String docPID, String sectionID) throws RemoteException, UnsupportedEncodingException, SAXException, IOException { String section = this.getSection(docPID, sectionID); // the content is nested inside a
element, // we extract it from there: InputSource source = new InputSource(new StringReader(section)); Document doc = builder.parse(source); // The document Element is the
we want. // Get its text contents: section = FedoraCommons.getValue(doc.getDocumentElement()); // we are going to remove all occurrences of "_httpdocimg_/" // that precede associated filenames, because that's a GS3 // defined macro for resolving relative urls. It won't help // with documents stored in fedora. section = section.replaceAll(GS3FilePathMacro+"/", ""); return section; } /** Here we create the greenstone's response message element: * <message≶<response><content></response></message> * @return a greenstone response-message element. * @param doc - the Document object which should me used to create the * <message> and <response> elements * @param content - the element that is to be nested inside <response> * @param ex - any exception that occurred when trying to create * the content parameter * @param responseType - the value for the type attribute of <response>, * such as "describe", "retrieve", "browse", "query"... * @param originator - indiates the collectionName or service (like * DocumentContentRetrieve) from where this response message originates */ protected Element createResponseMessage(Document doc, Element content, Exception ex, String responseType, String originator) { Element response = doc.createElement(GSXML.RESPONSE_ELEM); // from = "FedoraGS3" Attr attribute = doc.createAttribute(GSXML.FROM_ATT); attribute.setValue(originator); response.setAttributeNode(attribute); // type = "describe" or "process" - whatever's given in requestType: attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(responseType); response.setAttributeNode(attribute); if(content != null) response.appendChild(content); // we'll create an error element for RemoteExceptions (web service problems) // and UnsupportedEncodingExceptions and if(ex != null) { Element error = doc.createElement(GSXML.ERROR_ELEM); error.appendChild(doc.createTextNode(ex.getMessage())); // now append the error to the element (after // the content element whatever that was) response.appendChild(error); } Element message = doc.createElement(GSXML.MESSAGE_ELEM); message.appendChild(response); doc.appendChild(message); return message; } /** @return a <serviceList> Element as defined by GS3: containing all the * services (denoted by <service> elements) that are supported by FedoraGS3. * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve, * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse, * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames). * @param doc - the Document object which should me used to create the * <serviceList> element */ protected Element createServiceList(Document doc) { Element serviceList = doc.createElement( GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER); for(int i = 0; i < serviceNames.length; i++) { // create the Element service = doc.createElement(GSXML.SERVICE_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(serviceNames[i]); service.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter attribute.setValue(GSXML.SERVICE_TYPE_BROWSE); else if(serviceNames[i].contains("Query")) // search services attribute.setValue(GSXML.SERVICE_TYPE_QUERY); else attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE); service.setAttributeNode(attribute); // add the service element to the serviceList element // serviceList.appendChild(service); } return serviceList; } /** @return a GS3 response message for a describe services request: * indicating the list of services supported by the Fedora-Greenstone * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve, * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery, * ClassifierBrowseMetadataRetrieve - as indicated by member variable * serviceNames. */ public String getServiceList() { Document doc = builder.newDocument(); Element serviceList = createServiceList(doc); // make the body of the responseMessage: // Element responseMsg = createResponseMessage(doc, serviceList, null, GSXML.REQUEST_TYPE_DESCRIBE, ""); try { return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** @return a GS3 describe response message listing the collections and * collection-specific metadata stored in the Fedora-Greenstone repository. */ public String getCollectionList() { Document doc = builder.newDocument(); FedoraGS3RunException ex = null; // any RemoteException // create the element Element collectionList = doc.createElement( GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER); try{ String[] collectionNames = this.getCollectionNames( this.getCollections()); // this line could throw RemoteException for(int i = 0; i < collectionNames.length; i++) { // create the element Element collection = doc.createElement(GSXML.COLLECTION_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(collectionNames[i]); collection.setAttributeNode(attribute); // append the element as child of collectionList.appendChild(collection); //if(collection.hasAttribute(GSXML.NAME_ATT)) //LOG.debug(collection.getAttribute(GSXML.NAME_ATT)); } } catch(RemoteException e) { // if this happens, perhaps it's because it // can't find Greenstone collections in fedora repository? ex = new FedoraGS3RunException(e); ex.setSpecifics( "greenstone collections in fedora repository"); } // make the body of the responseMessage: // Element responseMsg = createResponseMessage(doc, collectionList, ex, GSXML.REQUEST_TYPE_DESCRIBE, ""); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** @return a GS3 describe response message for a collection in the * Fedora-Greenstone repository. * @param collectionName - the name of the collection that is to be described. * It will be converted to a fedora collection pid, which is of the form * "greenstone:<collectionName>-collection". */ public String describeCollection(String collectionName) { Document doc = builder.newDocument(); FedoraGS3RunException ex = null; Element collection = doc.createElement(GSXML.COLLECTION_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(collectionName); collection.setAttributeNode(attribute); // //"some display name" Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM); attribute = doc.createAttribute(GSXML.LANG_ATT); attribute.setValue(this.lang); displayItem.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(GSXML.DISPLAY_TEXT_NAME); displayItem.setAttributeNode(attribute); try{ Text textNode = doc.createTextNode( this.getCollectionTitle(getCollectionPID(collectionName))); displayItem.appendChild(textNode); } catch(Exception e) { // can't find Greenstone collections in fedora repository or problem // getting their titles from their metadata datastream? ex = new FedoraGS3RunException(e); ex.setSpecifics("greenstone collections or their metadata" + "in the fedora repository"); } // now append the displayItem element as child of the collection element collection.appendChild(displayItem); // get the and add it into the collection description. // Services for all collections in the FedoraGS3 repository are the // same, offering a ClassifierBrowse to browse titles by starting letter // and DocRetrieve services: Content, Metadata and Structure. Element serviceList = createServiceList(doc); collection.appendChild(serviceList); Element responseMsg = createResponseMessage(doc, collection, ex, GSXML.REQUEST_TYPE_DESCRIBE, collectionName); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** @return a GS3 describe response message for the services of a collection * in the Fedora-Greenstone repository. So far, these services are the same for * all fedora collections: they are the services given in member variable * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse, * ClassifierBrowseMetadataRetrieve. * All collections in this Digital Library (Fedora Repository) share the * same services, so this method returns the same services as getServiceList(); * @param collectionName - the name of the collection whose services are to * be described. It will be converted to a fedora collection pid, which is of * the form "greenstone:<collectionName>-collection". */ public String describeCollectionServices(String collectionName) { Document doc = builder.newDocument(); Element collection = doc.createElement(GSXML.COLLECTION_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(collectionName); collection.setAttributeNode(attribute); Element serviceList = createServiceList(doc); collection.appendChild(serviceList); Element responseMsg = createResponseMessage(doc, collection, null, GSXML.REQUEST_TYPE_DESCRIBE, collectionName); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** All collections in this Digital Library (Fedora Repository) share * the same services, so this method returns the same as * describeCollectionService(collName, serviceName). * @return a GS3 describe response message for the requested service * of the given collection. DocumentContent/Metadata/StructureRetrieve * return nothing special except their names; browse (and any query) * return more complex XML responses. * @param serviceName - the name of the service in the collection which is to * be described.*/ public String describeService(String serviceName) { // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve) // we return: // // // But for browse (and any query) service, we return the data necessary // for displaying it Document doc = this.builder.newDocument(); Element service = doc.createElement(GSXML.SERVICE_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(serviceName); service.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); if(serviceName.toLowerCase().endsWith("retrieve")) { attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE); } else if(serviceName.toLowerCase().contains("browse")) { attribute.setValue(GSXML.SERVICE_TYPE_BROWSE); // we need name and description elements Element displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse"); service.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION, "Browse pre-defined classification hierarchies"); service.appendChild(displayItem); // now need a classifierList Element classifierList = doc.createElement( GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER); int classifierNum = 1; // append a // for each letter of the alphabet: Element classifier = createClassifierElement(doc, "TitleByLetter", classifierNum++, "titles by letter", "Browse titles by letter"); // now add this to the classifierList.appendChild(classifier); // ANY MORE CLASSIFIERS? ADD THEM HERE service.appendChild(classifierList); } // ELSE check for whether it is a query service else if(serviceName.toLowerCase().contains("query")) { attribute.setValue(GSXML.SERVICE_TYPE_QUERY); if(serviceName.equals("TextQuery")) { describeTextQueryService(service); } else if(serviceName.equals("FieldQuery")) { describeFieldQueryService(service); } } // don't forget to add the type attribute to the service! service.setAttributeNode(attribute); String from = serviceName; Element responseMsg = createResponseMessage(doc, service, null, GSXML.REQUEST_TYPE_DESCRIBE, from); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** Appends children to the parameter service Element that make the * final service Element into a describe response XML for FedoraGS3's * TextQuery service. * @param service is the service Element that is being filled out. */ protected void describeTextQueryService(Element service) { Document doc = service.getOwnerDocument(); // we need name, submit (button) and description elements Element displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Text Search"); service.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search"); service.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION, "Title and full-text search service"); service.appendChild(displayItem); //create the Element paramList = doc.createElement( GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); // we ignore granularity to search at: it will always be // document and section level // we ignore casefolding: always on (that is, case is irrelevant) // we ignore document display order: always ranked // Constructing the following: // // Maximum hits to return // Element param = doc.createElement(GSXML.PARAM_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(MAXDOCS); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.DEFAULT_ATT); attribute.setValue("100"); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_INTEGER); param.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Maximum hits to return"); param.appendChild(displayItem); paramList.appendChild(param); // Constructing the following: // // Query string // param = doc.createElement(GSXML.PARAM_ELEM); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(QUERY); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_STRING); param.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Query string"); param.appendChild(displayItem); paramList.appendChild(param); service.appendChild(paramList); } /** Appends children to the parameter service Element that make the * final service Element into a describe response XML for FedoraGS3's * FieldQuery service. * @param service is the service Element that is being filled out. */ protected void describeFieldQueryService(Element service) { Document doc = service.getOwnerDocument(); // we need name, submit (button) and description elements Element displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Form Search"); service.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search"); service.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION, "Simple fielded search"); service.appendChild(displayItem); //create the Element paramList = doc.createElement( GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); // we ignore granularity to search at: it will always be // document and section level // we ignore casefolding: always on (that is, case is irrelevant) // we ignore document display order: always ranked // Constructing the following: // // Maximum hits to return // Element param = doc.createElement(GSXML.PARAM_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(MAXDOCS); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.DEFAULT_ATT); attribute.setValue("100"); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_INTEGER); param.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Maximum hits to return"); param.appendChild(displayItem); paramList.appendChild(param); // Constructing the following: // // // // // Word or phrase // // // // in field // // // // // // // // Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(SIMPLEFIELD_ATT); rowOfParams.setAttributeNode(attribute); // we want the row of controls to occur multiple times attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_MULTI); rowOfParams.setAttributeNode(attribute); attribute = doc.createAttribute(OCCURS_ATT); attribute.setValue("4"); // we want this row to occur 4 times rowOfParams.setAttributeNode(attribute); // // Word or phrase // param = doc.createElement(GSXML.PARAM_ELEM); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(QUERY); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_STRING); param.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Word or phrase"); param.appendChild(displayItem); rowOfParams.appendChild(param); // // in field param = doc.createElement(GSXML.PARAM_ELEM); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(FIELDNAME_ATT); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.TYPE_ATT); attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE); param.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.DEFAULT_ATT); attribute.setValue(ALL_FIELDS); param.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "in field"); param.appendChild(displayItem); String[] searchFieldNames = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT}; String[] searchFieldDisplay = {"all titles and full-text", "document titles only", "document and section titles", "full-text only"}; // for each fieldName create an option element and insert // the option into the enum_multi drop-down param: // for(int i = 0; i < searchFieldNames.length; i++) { Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM); attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(searchFieldNames[i]); option.setAttributeNode(attribute); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, searchFieldDisplay[i]); option.appendChild(displayItem); param.appendChild(option); // add option to the drop-down box } rowOfParams.appendChild(param); paramList.appendChild(rowOfParams); service.appendChild(paramList); } /** * @return a GS3 describe response message for the requested service * of the given collection. DocumentContent/Metadata/StructureRetrieve * return nothing special except their names; browse (and any query) * return more complex XML responses. * All collections in this Digital Library (Fedora Repository) share * the same services, so this method returns the same as * describeService(serviceName). * @param collectionName - the name of the collection whose service is to * be described. It will be converted to a fedora collection pid, which is of * the form "greenstone:<collectionName>-collection". * @param serviceName - the name of the service in the collection which is to * be described. */ public String describeCollectionService(String collectionName, String serviceName) { // collectionName can be ignored, because all services are FedoraGS3 // services and are not unique to any particular (greenstone) collection. return describeService(serviceName); } /** This method performs the implemented browse operation: allowing the * user to browse the titles of documents in the given collection by letter * and returning the results. * @param collectionName is the name of the collection whose documents * starting with the given letter will be returned. * @param classifierIDs are the ids of the classifiers on which to browse. In * this case, the classifier indicates whether we browse titles by letter, or * browse (documents) by collection; and it is of the form <CL(letter)>. * @param structures - the requested browse substructure. Can be any combination * of ancestors, parent, siblings, children, descendants. * @param infos - the requested structural info. Can be numSiblings, * siblingPosition, numChildren. * @return a GS3 ClassifierBrowse response message which lists all * the documents that start with the letter indicated by parameter classifier. */ public String browse(String collectionName, String[] classifierIDs, String[] structures, String[] infos) { // Construct one string from the structures and structural info arrays String structure = ""; String info = ""; for(int i = 0; i < structures.length; i++) { structure = structure + structures[i] + "|"; } for(int i = 0; i < infos.length; i++) { info = info + infos[i] + "|"; } Document doc = builder.newDocument(); FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException // Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER); for(int i = 0; i < classifierIDs.length; i++) { if(classifierIDs[i].startsWith("CL1")) { // browse by titles browseTitlesByLetterClassifier(doc, classifierNodeList, collectionName, classifierIDs[i], structure, info); } } Element responseMsg = createResponseMessage(doc, classifierNodeList, ex, GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse"); try { return FedoraCommons.elementToString(responseMsg); } catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** CL1 browsing classifier: browsing titles by starting letter. * The browsing structure is retrieved. * @param doc - the document object that will contain the CL1 browsing structure. * @param classifierNodeList - the classifiers will be added to this nodeList. * @param collectionName - name of the collection through which we are browsing CL1. * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is * a letter. * @param structure - the requested browse substructure. Can be any combination of * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented. * @param info - the requested structural info. Can be numSiblings, siblingPosition, * numChildren. * @return the classifierNodeList with the CL1 classifier browse structure. */ public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList, String collectionName, String classifierID, String structure, String info) { FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException if(structure.indexOf("entire") != -1) { structure = structure + "ancestors|descendants"; } // Structure of ancestors and children only at this stage int firstLevel = classifierID.indexOf('.'); int secondLevel = classifierID.lastIndexOf('.'); // Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM); // requested classifier node Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(classifierID); classNode.setAttributeNode(attribute); Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT); typeAttribute.setValue(GSXML.VLIST); classNode.setAttributeNode(typeAttribute); if(firstLevel == -1) { // CL1 - toplevel node Element root = (Element)classNode.cloneNode(true); // clone the node before appending children classifierNodeList.appendChild(classNode); classNode.appendChild(nodeStructure); nodeStructure.appendChild(root); if(structure.indexOf("descendants") != -1) { getTitlesByLetterStructure(collectionName, root, classifierID, true, null); } else if(structure.indexOf("children") != -1) { getTitlesByLetterStructure(collectionName, root, classifierID, false, null); } // nothing to be done for siblings } else if(firstLevel == secondLevel) { // CL1.x, where x is a number if(structure.indexOf("parent") != -1 || structure.indexOf("ancestors") != -1 || structure.indexOf("siblings") != -1) { String toplevelID = classifierID.substring(0, firstLevel); Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM); attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(toplevelID); toplevelNode.setAttributeNode(attribute); typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT); typeAttribute.setValue(GSXML.VLIST); toplevelNode.setAttributeNode(typeAttribute); Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children classifierNodeList.appendChild(toplevelNode); toplevelNode.appendChild(nodeStructure); nodeStructure.appendChild(node); if(structure.indexOf("siblings") != -1) { // get the children of the parents too getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode); // pass the requested node (classNode) so that it is attached in the correct // location among its siblings, and to ensure that it is not recreated. // getTitlesByLetterStructure() will append classNode to node } else { node.appendChild(classNode); } } else { Element node = (Element)classNode.cloneNode(true); classifierNodeList.appendChild(node); node.appendChild(nodeStructure); nodeStructure.appendChild(classNode); } int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x char ch = (char)(num - 1 + 'A'); if(structure.indexOf("descendants") != -1) { getTitlesForLetter(ch, collectionName, classNode, "descendants"); } else if(structure.indexOf("children") != -1) { getTitlesForLetter(ch, collectionName, classNode, "children"); } } else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method"); } return classifierNodeList; } /** Creates a (CL1) subclassifier element for the docs whose titles start with * the given letter. * @param ch - the starting letter of the document titles to retrieve. * @param collectionName - name of the collection through which we are browsing CL1. * @param classifierNode - the docNodes found will be appended to this node. * @param depthStructure - can be descendants or children. Specifies what to retrieve: * gets descendants of any documents found, otherwise gets just the children. * @return the given classifierNode which will have the child (or descendant) documents * appended to it. */ public Element getTitlesForLetter(char ch, String collectionName, Element classifierNode, String depthStructure) { Document doc = classifierNode.getOwnerDocument(); FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException // Retrieve the document structure for each subClassifierID: // all the documents that begin with its letter. String letter = String.valueOf(ch); try { String[] docPIDs = this.browseTitlesByLetter(collectionName, letter); if(docPIDs.length == 0) { return classifierNode; // skip letters that don't have any kids } for(int i = 0; i < docPIDs.length; i++) { // work out the document's fedora PID and section ID String sectionID = getSectionIDFromDocID(docPIDs[i]); String docPID = getDocPIDFromDocID(docPIDs[i]); // get the required section, along with children or descendants Element section = getSectionStructureXML(docPID, sectionID, depthStructure, ""); // Element docRootNode = createDocNodeFromSubsection(doc, section, docPID); // fills in the subtree of the rootNode in our nodeStructure element createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section classifierNode.appendChild(docRootNode); } } catch(Exception e) { ex = new FedoraGS3RunException(e); ex.setSpecifics("requested portion of TOC file or trouble with fielded search "); } return classifierNode; } /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the * starting letter of the alphabet. X is each letter of the alphabet for which there * are matching document titles. * @param collectionName - name of the collection through which we are browsing CL1. * @param classifierNode - the docNodes found will be appended to this node. * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create * the IDs for the subclassifiers (CL.x). * @param getDescendants - if true, get descendants of any documents found, otherwise * get just the children. * @param wantedSibling - the node (already created) whose siblings are requested. We * need to make sure not to recreate this node when creating its sibling nodes. * @return the given classifierNode, with the CL.x subclassifiers for the letters of * the alphabet that are represented in the document titles. */ public Element getTitlesByLetterStructure(String collectionName, Element classifierNode, String classifierID, boolean getDescendants, Element wantedSibling) { String ID = ""; if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT); } Document doc = classifierNode.getOwnerDocument(); FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException // We're going to loop to the end of the alphabet int count = 1; for(char ch = 'A'; ch <= 'Z'; ch++, count++) { // Retrieve the document structure for each subClassifierID: // all the documents that begin with its letter. String letter = String.valueOf(ch); try { String[] docPIDs = this.browseTitlesByLetter(collectionName, letter); if(docPIDs.length == 0) { continue; // skip letters that don't have any kids } Element subClassifier = null; if(wantedSibling != null && ID.equals(classifierID+"."+count)) { // already have the requested node, don't recreate it subClassifier = wantedSibling; } else { // subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM); Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT); typeAttribute.setValue(GSXML.VLIST); subClassifier.setAttributeNode(typeAttribute); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(classifierID+"."+count); subClassifier.setAttributeNode(attribute); } classifierNode.appendChild(subClassifier); // either way, append the subClassifier node if(getDescendants) { // get the documents // append the for the docPIDs found as children // of subclassifier for(int i = 0; i < docPIDs.length; i++) { // work out the document's fedora PID and section ID String sectionID = getSectionIDFromDocID(docPIDs[i]); String docPID = getDocPIDFromDocID(docPIDs[i]); // get the required section, along with children or descendants Element section = getSectionStructureXML(docPID, sectionID, "descendants", ""); // Element rootNode = createDocNodeFromSubsection(doc, section, docPID); // fills in the subtree of the rootNode in our nodeStructure element createDocStructure(doc, section, rootNode, docPID); //where section represents the root section subClassifier.appendChild(rootNode); } } } catch(Exception e) { ex = new FedoraGS3RunException(e); ex.setSpecifics("requested portion of TOC file or " + "trouble with fielded search "); } } return classifierNode; } /** This method performs something equivalent to a greenstone3 * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs * @param classNodeIDs array of classifierNode IDs for which the metadata * needs to be returned. * @param metafields are the classifier metadata fields that are to be returned. * At present this method ignores them/pretends the requested metafields are * "all" and always returns the Title meta for the requested classifier nodes * (because that is all the metadata this Fedora classifier has at present). * @return a GS3 ClassifierBrowseMetadataRetrieve response message which * lists the metadata for all the classifierNodes passed as parameter.*/ public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields) { Document doc = this.builder.newDocument(); // Element classifierNodeList = doc.createElement( GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER); // create s // for all letters of the alphabet for(int i = 0; i < classNodeIDs.length; i++) { // strip ID of everything before the first '.' (i.e. remove "CL#.") int index = classNodeIDs[i].indexOf('.'); String subClassifierNumber = classNodeIDs[i].substring(index+1); index = subClassifierNumber.indexOf('.'); // find next decimal point, if any if(index != -1) { subClassifierNumber = subClassifierNumber.substring(0, index); } int subClassifierNum = Integer.parseInt(subClassifierNumber); String classifierName = ""; if(subClassifierNum == 0) { // no document titles started with a letter classifierName = "A-Z"; } else { char letter = (char)('A' + subClassifierNum - 1); // A = 1 classifierName = String.valueOf(letter); } // Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM); Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(classNodeIDs[i]); classifierNode.setAttributeNode(attribute); // Element metadataList = doc.createElement( GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); // at least one metadata element: that of the title of this // classifierNode: // letter Element metadata = this.createNameValuePairElement(doc, GSXML.METADATA_ELEM, "Title", classifierName); // now connect up everything metadataList.appendChild(metadata); classifierNode.appendChild(metadataList); classifierNodeList.appendChild(classifierNode); } Element responseMsg = createResponseMessage(doc, classifierNodeList, null, GSXML.REQUEST_TYPE_PROCESS, //collName + "ClassifierBrowseMetadataRetrieve"); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } /** @return a newly created element of the following format: * <classifier content="somecontent" name="CL+num"> * <displayItem name="name">someClassifierName</displayItem> * <displayItem name="description">Browse by classifier name</displayItem> * </classifier> * @param doc - the document used to create the element * @param content - value of the content attribute * @param classifierNum - the number suffixed to the CL, together forming * the classifier Node's ID * @param displayNameVal is the bodytext of a named displayItem element * @param displayDescrVal is the bodytext of a displayItem element with * description */ protected Element createClassifierElement(Document doc, String content, int classifierNum, String displayNameVal, String displayDescrVal) { final String CL = "CL"; Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM); // content attribute Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT); att.setValue(content); classifier.setAttributeNode(att); // name attribute att = doc.createAttribute(GSXML.NAME_ATT); att.setValue(CL + classifierNum); classifier.setAttributeNode(att); // now create the displayItem children for classifier: // #letter // Browse titles starting with #letter Element displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal); classifier.appendChild(displayItem); displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal); classifier.appendChild(displayItem); return classifier; } /** @return a newly created element of the following format: * <elementName name="somename">"some display value"</elementName> * @param doc - the document used to create the element * @param elementName - the tag name * @param name - value of attribute name * @param value - the body text of the element */ protected Element createNameValuePairElement(Document doc, String elementName, String name, String value) { // "some display value" Element element = doc.createElement(elementName); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(name); element.setAttributeNode(attribute); element.appendChild(doc.createTextNode(value)); return element; } /** * @param collection is the collection to search in * @param query is the query term to search for. It won't specify the * indexed field to search in, which will mean that GSearch will * search all default indexed fields. * @param maxDocs is the maximum number of results to return (which * at present we consider equivalent to FedoraGSearch's hitpageSize). */ public String[] textQuery(String collection, String query, int maxDocs) throws Exception { // no need to search there is no query or query is empty spaces if(query.trim().equals("")) return new String[]{}; // QUERY value won't specify indexed field to search, Fedora // Gsearch will take that as meaning all default indexed fields. // Params to search() method below: string of fielded query terms; // hitpageStart, hitpageEnd, snippetsMax (leave that 0) query = query + " " + "PID" + COLON + GREENSTONE; String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0); // now we have the XML returned by FedoraGSearch, get the pids // of the documents returned (if any) String[] pids = this.fedoraGSearch.getPIDsFromSearchResult( collection, searchResult); return pids; } /** * This method performs a fieldquery, searching for x number of phrases * in each of the 4 indexed fields. * @param collection is the collection to search in * @param nameValParamsMap is a Map of several(key, value) entries, * 4 of which we're concerned with here: * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT * - the values are a comma separated list of terms (phrases or single * words) to search that field in. There may be more than 1 or * there may be none (in which case there may be N empty values or * spaces separated by commas). * @param maxDocs is the maximum number of results to return (which * at present we consider equivalent to FedoraGSearch's hitpageSize). * */ public String[] fieldQuery(String collection, Map nameValParamsMap, int maxDocs) throws Exception { // we're going to maintain a list of UNIQUE pids that were returned // in search results. Hence we use Set: java.util.Set set = new java.util.HashSet(); // (1) Use Fedora's search to search document titles, if they were // specified: String[] docTitlepids = {}; String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES); if(docTitleTerms != null) { // no doc titles may have been specified String[] phrases = docTitleTerms.split(COMMA); // search the individual phrases first: for(int i = 0; i < phrases.length; i++) { if(phrases.equals("") || phrases.equals(" ")) continue; //skip when there are no terms docTitlepids = this.searchDocumentTitles( collection, phrases[i], false); for(int j = 0; j < docTitlepids.length; j++) set.add(docTitlepids[j]); } } // (2) use FedoraGSearch to search doc AND section titles, and // fulltext (in case these were specified in nameValParamsMap): String searchResult = this.fedoraGSearch.search( nameValParamsMap, 1, maxDocs); String[] pids = this.fedoraGSearch.getPIDsFromSearchResult( collection, searchResult); for(int i = 0; i < pids.length; i++) set.add(pids[i]); pids = null; pids = new String[set.size()]; set.toArray(pids); // unique pids return pids; } /** @return a String representing Greenstone3 XML for a query process * response returning the results for the query denoted by parameter * nameValParamsMap. * @param nameValParamsMap is a Hashmap of name and value pairs for all the * query field data values. The names match the field names that * describeCollectionService() would have returned for the query service. * @param collection is the name of the collection * @param service is the name of the query service * This method is only ever called when any of the services in the digital * library described themselves as type=query. Therefore any digital * libraries that have no query services, can just return emtpy message * strings (or even "") since this method will never be called on them * anyway. */ public String query(String collection, String service, Map nameValParamsMap) { FedoraGS3RunException ex = null; // (1) obtain the requested number of maximum result documents int maxDocs = 100; try{ maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS)); } catch(NumberFormatException e) { maxDocs = 100; } String pids[] = {}; // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch if(service.endsWith("TextQuery")) { try { // get the Query field: String query = (String)nameValParamsMap.get(QUERY); pids = textQuery(collection, query, maxDocs); } catch(Exception e) { LOG.error("Error in TextQuery processing: " + e); ex = new FedoraGS3RunException( "When trying to use FedoraGenericSearch for a TextQuery", e); } } else { // (3) FieldQuery // first get the comma-separated lists String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT); String listOfSearchTerms = (String)nameValParamsMap.get(QUERY); // both are comma separated lists, so split both on 'comma' String[] fieldNames = listOfFieldNames.split(COMMA); String[] searchTerms = listOfSearchTerms.split(COMMA); // In the fieldNames and searchTerms lists of nameValParamsMap, // each searchTerm element was matched with its correspondingly // indexed fieldName. // A new map is going to reorganise this, by putting all terms // for a particular fieldName together in a comma separated list // and associating that with the fieldName. I.e. (key, value) -> // (fieldName, comma-separated list of all terms in that field) Map map = new HashMap(); for(int i = 0; i < searchTerms.length; i++) { // there may be fewer searchTerms than fieldNames (since some // fieldNames may have been left empty), so loop on searchTerms if(map.containsKey(fieldNames[i])) { // fieldName is already // in the list, so append comma with new value String termsList = (String)map.get(fieldNames[i]); termsList = termsList + COMMA + searchTerms[i]; map.put(fieldNames[i], termsList); } else { // this is the first time this fieldName occurred // just put the fieldName with searchTerm as-is map.put(fieldNames[i], searchTerms[i]); } } try { // For fieldquery, we search on all the fieldNames specified // - if DOC_TITLES is specified then we use Fedora's search // - for all other fieldNames specified, we use FedoraGSearch pids = fieldQuery(collection, map, maxDocs); } catch(Exception e) { LOG.error("Error in FieldQuery processing: " + e); ex = new FedoraGS3RunException( "When trying to use FedoraGenericSearch for a FieldQuery", e); } } // Build Greenstone XML Query response message for from // the pids (which should be document identifiers) Document doc = builder.newDocument(); // // Element metadataList = doc.createElement( GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); Element metadata = doc.createElement(GSXML.METADATA_ELEM); Attr attribute = doc.createAttribute(GSXML.NAME_ATT); attribute.setValue(NUM_DOCS_MATCHED); metadata.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.VALUE_ATT); attribute.setValue(Integer.toString(pids.length)); metadata.setAttributeNode(attribute); metadataList.appendChild(metadata); // // // ... // ... // Element docNodeList = doc.createElement( GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); // for each for(int i = 0; i < pids.length; i++) { Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM); attribute = doc.createAttribute(GSXML.NODE_ID_ATT); attribute.setValue(pids[i]); docNode.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT); attribute.setValue("hierarchy"); docNode.setAttributeNode(attribute); attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT); attribute.setValue("root"); docNode.setAttributeNode(attribute); docNodeList.appendChild(docNode); } Element responseMsg = createResponseMessage(doc, docNodeList, ex, GSXML.REQUEST_TYPE_PROCESS, service); try{ return FedoraCommons.elementToString(responseMsg); }catch(TransformerException e) { return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg + " " + e; } } // FOR NOW, add the new method that converts URLs to document identifiers(PIDs) /** Given a URL that represents a fedoraPID, will look up the object. * If it exists, it will return the contents of the DC:Title of its datastream. * If it doesn't exist, it will return the URL as-is. * @param URL: the URL that (after modification) represents a fedoraPID to look up. * @param collection: the name of collection in which to search for the URL * representing a fedoraPID. * @return the string (representing a fedoraPID) stored in the DC:Title of the * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection, * then the parameter URL is returned. */ public String getPIDforURL(String url, String collection) { FedoraGS3RunException ex = null; // any RemoteException // (1) convert url to the fedorapid // / -> _ and : -> - String fedoraPID = url.replaceAll("/", "_"); fedoraPID = fedoraPID.replaceAll(":", "-"); // prefix "greenstone-http:-" to the fedoraPID fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID; //LOG.error("### fedoraPID: " + fedoraPID); // (2) Look up the datastream for the fedorapid String dcTitle = ""; try { dcTitle = getDCTitle(fedoraPID); } catch(Exception e) { LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e); ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e); } //String dc = this.getDC(fedoraPID); //LOG.error("### document ID (in dcTitle) found is: " + dcTitle); // (3) if fedorapid exists, extract the dc:title content. // if it doesn't exist, return url if(dcTitle.equals("")) { return url; } else { // It represents a fedoraPID of its own, so prefix fedora namespace and return it. //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl return dcTitle+"-1"; } } public static void main(String args[]) { try{ // testing default constructor //FedoraGS3Connection con = new FedoraGS3Connection(); // testing constructor that takes properties file to show initial // fedora server values java.io.File propertyFilename = new java.io.File("fedoraGS3.properties"); FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename); // DESCRIBE: serviceList, collectionList System.out.println("serviceList:\n" + con.getServiceList()); System.out.println("collectionList:\n" + con.getCollectionList()); String[] colPIDs = con.getCollections(); String[] collectionNames = con.getCollectionNames(con.getCollections()); for(int i = 0; i < collectionNames.length; i++) { System.out.println("Describing collections:\n"); System.out.println(con.describeCollection(collectionNames[i])); System.out.println("Describing collection services:\n" + con.describeCollectionServices(collectionNames[i])); } String[] serviceNames = con.getServiceNames(); for(int i = 0; i < serviceNames.length; i++) { System.out.println("Describing " + serviceNames[i] + ":\n" + con.describeCollectionService("demo", serviceNames[i])); } // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC) // along with EX of the top-level document: System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:"); System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"})); String[] docIDs = con.getCollectionDocs(colPIDs[0]); System.out.println("\nGET CONTENT:"); for(int i = 0; i < docIDs.length; i++) { System.out.println(con.getContent(docIDs[i])); } System.out.println("\nGET META:"); for(int i = 0; i < docIDs.length; i++) { System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"})); } String[] getTitlesFor = { "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae", "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b", "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1", "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7", "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1" }; // first let's display the regular meta for top-level docs and // their sections for(int i = 0; i < getTitlesFor.length; i++) { System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"})); } System.out.println("\nTitles are:"); System.out.println(con.getTitleMetadata(getTitlesFor)); System.out.println("\nGET STRUCTURE:"); for(int i = 0; i < docIDs.length; i++) { System.out.println("Descendents and numChildren:\n" + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN})); System.out.println("Parent and numSiblings:\n" + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS})); } // TEST ERROR CASES: System.out.println("\nTESTING ERROR CASES"); System.out.println(con.getContent("greenstone:demo-pinky")); String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5", "greenstone:demo-pinky" }; System.out.println(con.getContent(errorCases)); System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"})); System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN})); System.out.println("\nCLASSIFIER BROWSE"); System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse", new String[]{"CL1"}, new String[] {""}, new String[] {""})); System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE"); String[] classNodeIDs = new String[26]; for(int i = 0; i < classNodeIDs.length; i++) { int subClassifierNum = i + 1; classNodeIDs[i] = "CL1." + subClassifierNum; } System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo", classNodeIDs, new String[]{"all"})); System.out.println("Testing query services"); System.out.println("TEXT QUERY:"); Map formControlValsMap = new HashMap(); formControlValsMap.put(MAXDOCS, "100"); formControlValsMap.put(QUERY, "snails"); String searchResponse = con.query("gs2mgdemo", "TextQuery", formControlValsMap); System.out.println(searchResponse); System.out.println("FIELD QUERY:"); formControlValsMap.clear(); formControlValsMap.put(MAXDOCS, "100"); formControlValsMap.put(QUERY, "interview,Gender equality,cyclone"); formControlValsMap.put(FIELDNAME_ATT, "allFields,docTitles,allFields,allFields"); searchResponse = con.query("gs2mgdemo", "FieldQuery", formControlValsMap); System.out.println(searchResponse); System.exit(0); }catch(Exception e) { JOptionPane.showMessageDialog( null, e, "Error", JOptionPane.ERROR_MESSAGE); //System.err.println("ERROR: " + e); e.printStackTrace(); } } }