[15222] | 1 | /**
|
---|
| 2 | *#########################################################################
|
---|
| 3 | * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
|
---|
| 4 | * of the Greenstone digital library suite from the New Zealand Digital
|
---|
| 5 | * Library Project at the * University of Waikato, New Zealand.
|
---|
| 6 | * <BR><BR>
|
---|
| 7 | * Copyright (C) 2008 New Zealand Digital Library Project
|
---|
| 8 | * <BR><BR>
|
---|
| 9 | * This program is free software; you can redistribute it and/or modify
|
---|
| 10 | * it under the terms of the GNU General Public License as published by
|
---|
| 11 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 12 | * (at your option) any later version.
|
---|
| 13 | * <BR><BR>
|
---|
| 14 | * This program is distributed in the hope that it will be useful,
|
---|
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 17 | * GNU General Public License for more details.
|
---|
| 18 | *########################################################################
|
---|
| 19 | */
|
---|
| 20 |
|
---|
| 21 | package org.greenstone.fedora.services;
|
---|
| 22 |
|
---|
| 23 |
|
---|
| 24 | import java.io.StringReader;
|
---|
| 25 |
|
---|
| 26 | import org.apache.log4j.Logger;
|
---|
| 27 | import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
|
---|
| 28 | import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
|
---|
| 29 | import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
|
---|
| 30 | import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
|
---|
[26270] | 31 | import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
|
---|
[15222] | 32 | import org.greenstone.gsdl3.util.GSXML;
|
---|
| 33 | import org.w3c.dom.Document;
|
---|
| 34 | import org.w3c.dom.Element;
|
---|
| 35 | import org.w3c.dom.Attr;
|
---|
| 36 | import org.w3c.dom.Text;
|
---|
| 37 | import org.w3c.dom.NodeList;
|
---|
| 38 | import org.w3c.dom.Node;
|
---|
| 39 | import org.xml.sax.InputSource;
|
---|
| 40 |
|
---|
| 41 | import java.io.File;
|
---|
| 42 | import java.util.HashMap;
|
---|
| 43 | import java.util.Properties;
|
---|
| 44 | import java.util.Map;
|
---|
| 45 |
|
---|
| 46 | import javax.swing.JOptionPane;
|
---|
| 47 |
|
---|
| 48 | import org.xml.sax.SAXException;
|
---|
| 49 | import java.io.UnsupportedEncodingException;
|
---|
| 50 | import java.io.IOException;
|
---|
| 51 | import javax.net.ssl.SSLHandshakeException;
|
---|
| 52 | import java.net.ConnectException;
|
---|
| 53 | import java.net.MalformedURLException;
|
---|
| 54 | import java.rmi.RemoteException;
|
---|
| 55 | import javax.xml.parsers.ParserConfigurationException;
|
---|
| 56 | import javax.xml.transform.TransformerException;
|
---|
| 57 |
|
---|
| 58 | /**
|
---|
| 59 | * Class that extends FedoraConnection in order to be able to use
|
---|
| 60 | * Fedora's web services to retrieve the specific datastreams of
|
---|
| 61 | * Greenstone documents stored in Fedora's repository. This class
|
---|
| 62 | * provides methods that convert those datastreams into Greenstone3
|
---|
| 63 | * XML response messages which are returned.
|
---|
| 64 | * @author ak19
|
---|
| 65 | */
|
---|
| 66 | public class FedoraGS3Connection
|
---|
| 67 | extends FedoraConnection implements FedoraToGS3Interface,
|
---|
| 68 | FedoraToGS3Interface.Constants
|
---|
| 69 | {
|
---|
| 70 | /** The logging instance for this class */
|
---|
| 71 | private static final Logger LOG = Logger.getLogger(
|
---|
| 72 | FedoraGS3Connection.class.getName());
|
---|
[21859] | 73 |
|
---|
| 74 | /** Default name of Fedora index */
|
---|
| 75 | private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
|
---|
| 76 |
|
---|
[15222] | 77 | /** Complete list of services that are supported our FedoraGS3 would
|
---|
| 78 | * support if everything goes well. If a connection to FedoraGSearch
|
---|
| 79 | * cannot be established, the query services will no longer be
|
---|
| 80 | * available. The actual services supported are given by member
|
---|
| 81 | * variable serviceNames. */
|
---|
| 82 | protected static final String[] SERVICES = {
|
---|
| 83 | "DocumentContentRetrieve", "DocumentMetadataRetrieve",
|
---|
| 84 | "DocumentStructureRetrieve",
|
---|
| 85 | "TextQuery", "FieldQuery",
|
---|
| 86 | "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
|
---|
| 87 | };
|
---|
| 88 |
|
---|
| 89 | /** List of services actually supported by our FedoraGS3 repository
|
---|
| 90 | * after construction. If FedoraGenericSearch can't be connected to,
|
---|
| 91 | * then query services will not be offered */
|
---|
| 92 | protected String[] serviceNames;
|
---|
| 93 |
|
---|
| 94 | /** The object used to connect to FedoraGenericSearch, which is used
|
---|
| 95 | * for doing full-text searching */
|
---|
| 96 | protected GSearchConnection fedoraGSearch;
|
---|
| 97 |
|
---|
| 98 | /** The url for the wsdl file of FedoraGSearch's web services
|
---|
| 99 | * by default this will be the Fedora server's base URL
|
---|
| 100 | * concatenated to "gsearch/services/FgsOperations?wsdl" */
|
---|
| 101 | protected String gSearchWSDLURL;
|
---|
[15733] | 102 |
|
---|
| 103 | /** The last part of the gSearchWSDL URL. The first part is
|
---|
| 104 | * the same as the fedora server's base url. */
|
---|
| 105 | protected String gSearchWSDLSuffix;
|
---|
| 106 |
|
---|
[15437] | 107 | /** The name of the index that FedoraGSearch will index the GS3
|
---|
| 108 | * documents into. If no name is specified in the properties file,
|
---|
| 109 | * this will default to FedoraIndex. */
|
---|
| 110 | protected String gSearchIndexName;
|
---|
| 111 |
|
---|
[15222] | 112 | /** 5 argument constructor is the same as that of superclass FedoraConnection:
|
---|
| 113 | * @param protocol can be either http or https
|
---|
| 114 | * @param host is the host where the fedora server is listening
|
---|
| 115 | * @param port is the port where the fedora server is listening
|
---|
| 116 | * @param fedoraServerUsername is the username for administrative
|
---|
| 117 | * authentication required to access the fedora server.
|
---|
| 118 | * @param fedoraServerPassword is the password for administrative
|
---|
| 119 | * authentication required to access the fedora server. If no password was set
|
---|
| 120 | * when installing Fedora, leave the field "".
|
---|
| 121 | * Instantiates a FedoraGS3Connection object which connects to Fedora's
|
---|
| 122 | * web services through stub classes and tries to connect to FedoraGSearch's
|
---|
| 123 | * web services through the default WSDL location for it
|
---|
| 124 | * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
|
---|
| 125 | * call setGSearchWSDLURL(url) after the constructor instead.
|
---|
| 126 | */
|
---|
| 127 | public FedoraGS3Connection(String protocol, String host, int port,
|
---|
| 128 | String fedoraServerUsername, String fedoraServerPassword)
|
---|
| 129 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 130 | SSLHandshakeException, RemoteException, AuthenticationFailedException,
|
---|
| 131 | NotAFedoraServerException, ConnectException, Exception
|
---|
| 132 | {
|
---|
| 133 | super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
|
---|
| 134 | // super() will call setInitialisationProperties(properties)
|
---|
| 135 | // And that will try to instantiate the GSearchConnection.
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | /** No-argument constructor which is the same as that of superclass
|
---|
| 139 | * FedoraConnection: it displays a small dialog requesting input for the
|
---|
| 140 | * host, port, administrative password and username of the fedora server.
|
---|
| 141 | * If no password was set on the fedora repository when installing it,
|
---|
| 142 | * the user can leave the password field blank. */
|
---|
| 143 | public FedoraGS3Connection()
|
---|
| 144 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 145 | CancelledException, ConnectException, RemoteException,
|
---|
| 146 | SSLHandshakeException, Exception
|
---|
| 147 | {
|
---|
| 148 | super();
|
---|
| 149 | // super() will call setInitialisationProperties(properties)
|
---|
| 150 | // And that will try to instantiate the GSearchConnection.
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | /** Single-argument constructor which is the same as that of superclass
|
---|
| 154 | * FedoraConnection: it takes the name of the properties file where
|
---|
| 155 | * connection initialisation values may already be provided and then
|
---|
| 156 | * displays a small dialog requesting input for the host, port,
|
---|
| 157 | * administrative password and username of the fedora server showing
|
---|
| 158 | * the values in the properties file as default. If the necessary
|
---|
[21835] | 159 | * initialisation are not present in the file, the corresponding fields
|
---|
[15222] | 160 | * in the dialog will be blank.
|
---|
| 161 | * If no password was set on the fedora repository when installing it,
|
---|
| 162 | * the user can leave the password field blank. */
|
---|
| 163 | public FedoraGS3Connection(File propertiesFilename)
|
---|
| 164 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 165 | CancelledException, ConnectException, RemoteException,
|
---|
| 166 | SSLHandshakeException, Exception
|
---|
| 167 | {
|
---|
| 168 | super(propertiesFilename);
|
---|
| 169 | // super() will call setInitialisationProperties(properties)
|
---|
| 170 | // And that will try to instantiate the GSearchConnection.
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | /** The superclass constructor calls this method passing any preset
|
---|
| 174 | * properties loaded from a propertiesFile. This method is overridden
|
---|
| 175 | * here in order to instantiate the gSearchConnection based on the
|
---|
[15733] | 176 | * - gSearchWSDLSuffix that will be appended to the fedora base url.
|
---|
| 177 | * (If one was not provided in the properties file, gSearchWSDLURL defaults
|
---|
| 178 | * to something of the form
|
---|
| 179 | * "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
|
---|
| 180 | * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
|
---|
| 181 | * "gsearch/services/FgsOperations?wsdl".
|
---|
[15437] | 182 | * - name of the index into which the GS3 documents have been indexed
|
---|
| 183 | * and which FedoraGenericSearch should use to perform searches. If none is
|
---|
[21573] | 184 | * given in the properties file, then the index name defaults to "FedoraIndex".
|
---|
[15222] | 185 | * @param properties is the Properties Map loaded from a properties file
|
---|
| 186 | * (if there was any) which specifies such things as host and port of the
|
---|
[15733] | 187 | * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
|
---|
| 188 | * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
|
---|
| 189 | * to whatever the final value of this.gSearchWSDLURL' suffix is, and
|
---|
[15437] | 190 | * "gsearch.indexName" will be set to to whatever the final value of
|
---|
| 191 | * this.gSearchIndexName is.
|
---|
[15222] | 192 | */
|
---|
| 193 | protected void setInitialisationProperties(Properties properties)
|
---|
| 194 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 195 | CancelledException, ConnectException, RemoteException,
|
---|
| 196 | SSLHandshakeException, Exception
|
---|
| 197 | {
|
---|
| 198 | super.setInitialisationProperties(properties);
|
---|
[15733] | 199 | // gsearchWSDL URL suffix, if not specified, defaults to
|
---|
| 200 | // "fedoragsearch/services/FgsOperations?wsdl" which is
|
---|
| 201 | // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
|
---|
| 202 | this.gSearchWSDLSuffix = properties.getProperty(
|
---|
| 203 | "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
|
---|
| 204 | this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
|
---|
[15222] | 205 | // Set the property to whatever this.gSearchWSDLURL is now,
|
---|
| 206 | // so that it will be written out to the properties file again
|
---|
[15733] | 207 | properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
|
---|
[15437] | 208 |
|
---|
| 209 | // Similarly for the name of the index FedoraGenericSearch should use
|
---|
| 210 | // when performing searches for GS3 docs stored in Fedora's repository.
|
---|
| 211 | this.gSearchIndexName = properties.getProperty(
|
---|
[21859] | 212 | "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
|
---|
[15437] | 213 | properties.setProperty("gsearch.indexName", this.gSearchIndexName);
|
---|
[15222] | 214 | // Create a connection to FedoraGSearch's web services:
|
---|
| 215 | initSearchFunctionality();
|
---|
| 216 | }
|
---|
[21859] | 217 |
|
---|
| 218 | /** Overridden init method to work with the 5 argument constructor, so that we can
|
---|
| 219 | * bypass using setInitialisationProperties() which works with a Properties map.
|
---|
| 220 | */
|
---|
| 221 | protected void init(String protocol, String host, String port,
|
---|
| 222 | String fedoraServerUsername, String fedoraServerPassword)
|
---|
| 223 | throws ParserConfigurationException, MalformedURLException,
|
---|
| 224 | AuthenticationFailedException, RemoteException, Exception
|
---|
| 225 | {
|
---|
| 226 | super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
|
---|
| 227 | this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
|
---|
| 228 | this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
|
---|
| 229 | this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
|
---|
| 230 | initSearchFunctionality();
|
---|
| 231 | }
|
---|
[15222] | 232 |
|
---|
[21859] | 233 |
|
---|
[15222] | 234 | /** Init method that instantiates a GSearchConnection object used
|
---|
| 235 | * to work with the separate FedoraGSearch web services.
|
---|
| 236 | * The url of the WSDL for FedoraGSearch's web services is worked out
|
---|
| 237 | * from the baseURL of the Fedora server.
|
---|
| 238 | */
|
---|
| 239 | protected void initSearchFunctionality()
|
---|
| 240 | {
|
---|
| 241 | try {
|
---|
[15437] | 242 | this.fedoraGSearch = null;
|
---|
| 243 | this.fedoraGSearch = new GSearchConnection(
|
---|
| 244 | gSearchWSDLURL, gSearchIndexName);
|
---|
[15222] | 245 | this.serviceNames = SERVICES;
|
---|
| 246 | } catch(Exception e){
|
---|
| 247 | LOG.error("Cannot connect to FedoraGSearch's web services at "
|
---|
| 248 | + gSearchWSDLURL + "\nQuery services will not be available.");
|
---|
| 249 | // If an exception occurs, something has gone wrong when
|
---|
| 250 | // trying to connect to FedoraGSearch's web services. This
|
---|
| 251 | // means, we can't offer query services, as that's provided
|
---|
| 252 | // by FedoraGSearch
|
---|
| 253 | serviceNames = null;
|
---|
| 254 | int countOfNonQueryServices = 0;
|
---|
| 255 | for(int i = 0; i < SERVICES.length; i++) {
|
---|
| 256 | // do not count query services
|
---|
| 257 | if(!SERVICES[i].toLowerCase().contains("query")) {
|
---|
| 258 | countOfNonQueryServices++;
|
---|
| 259 | }
|
---|
| 260 | }
|
---|
| 261 | // Services now supported are everything except Query services
|
---|
| 262 | serviceNames = new String[countOfNonQueryServices];
|
---|
[15331] | 263 | int j = 0;
|
---|
[15222] | 264 | for(int i = 0; i < SERVICES.length; i++) {
|
---|
| 265 | if(!SERVICES[i].toLowerCase().contains("query")) {
|
---|
[15331] | 266 | serviceNames[j] = SERVICES[i];
|
---|
| 267 | j++; // valid serviceName, so increment serviceName counter
|
---|
[15222] | 268 | }
|
---|
| 269 |
|
---|
| 270 | }
|
---|
| 271 | }
|
---|
| 272 | }
|
---|
| 273 |
|
---|
| 274 | /** @return the gSearchWSDLURL, the url of the WSDL for the
|
---|
| 275 | * FedoraGSearch web services */
|
---|
| 276 | public String getGSearchWSDLURL() { return gSearchWSDLURL; }
|
---|
| 277 |
|
---|
| 278 | /** Sets the member variable gSearchWSDLURL that specify the location of
|
---|
| 279 | * the WSDL file of FedoraGSearch's web services. Then it attempts
|
---|
| 280 | * to instantiate a connection to those web services.
|
---|
| 281 | * @param url is the new url of the GSearch web services WSDL file */
|
---|
| 282 | public void setGSearchWSDLURL(String url) {
|
---|
[15437] | 283 | this.gSearchWSDLURL = url;
|
---|
[15222] | 284 | initSearchFunctionality();
|
---|
| 285 | }
|
---|
| 286 |
|
---|
[15437] | 287 | /** @return the gSearchIndexName, the name of the index Fedora Generic
|
---|
| 288 | * Search will search in (where GS3 docs have been indexed into). */
|
---|
| 289 | public String getGSearchIndexName() { return gSearchIndexName; }
|
---|
| 290 |
|
---|
| 291 | /** Sets the member variable gSearchIndexName that specifies the name
|
---|
| 292 | * of the index containing indexed GS3 documents. Then it attempts
|
---|
| 293 | * to instantiate a connection to the Fedora GSearch web services using
|
---|
| 294 | * this changed value for indexName.
|
---|
| 295 | * @param indexName is the new name of the index containing indexed GS3
|
---|
| 296 | * docs that GSearch should search in. */
|
---|
| 297 | public void setGSearchIndexName(String indexName) {
|
---|
| 298 | this.gSearchIndexName = indexName;
|
---|
| 299 | initSearchFunctionality();
|
---|
| 300 | }
|
---|
| 301 |
|
---|
[15222] | 302 | /** @return the array of the services actually supported by FedoraGS3 */
|
---|
| 303 | protected String[] getServiceNames() { return this.serviceNames;}
|
---|
| 304 |
|
---|
| 305 | /**
|
---|
| 306 | * For finding out if the sectionNumber is given as part of the docID.
|
---|
| 307 | * @param docID is the String that contains the docPID and may also
|
---|
| 308 | * contain the section number.
|
---|
| 309 | * @return true if the document identifier docID contains a section-
|
---|
| 310 | * number, and false if it consists solely of the docPID.
|
---|
| 311 | * That is, true is returned if
|
---|
| 312 | * <pre>docID = "greenstone:colName-<docPID>-<sectionNum>"</pre>
|
---|
| 313 | * and false is returned if
|
---|
| 314 | * <pre>docID = "greenstone:colName-<docPID>"</pre>
|
---|
| 315 | * */
|
---|
| 316 | protected boolean containsSectionNumber(String docID) {
|
---|
| 317 | // if there are two hyphens in the docID, then there are sections
|
---|
| 318 | // (and the section number is appended at end of docID)
|
---|
| 319 | // docID = "greenstone:colName-<docPID>-<sectionNum>"
|
---|
| 320 | return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | /** This method will extract the docPID from docID and return it.
|
---|
| 324 | * (If a sectionNumber is suffixed to the docID, the docPID which is
|
---|
| 325 | * the prefix is returned; otherwise the docID is the docPID and is
|
---|
| 326 | * returned)
|
---|
| 327 | * @param docID is the String that contains the docPID and may also
|
---|
| 328 | * contain the section number.
|
---|
| 329 | * @return only the docPID portion of the docID.
|
---|
| 330 | */
|
---|
| 331 | protected String getDocPIDFromDocID(String docID) {
|
---|
| 332 | if(containsSectionNumber(docID))
|
---|
| 333 | return docID.substring(0, docID.lastIndexOf(HYPHEN));
|
---|
| 334 | // else (if there's no sectionNumber), docID is the docPID
|
---|
| 335 | return docID;
|
---|
| 336 | }
|
---|
| 337 |
|
---|
| 338 | /** This method will return the section Number, if there's any
|
---|
| 339 | * suffixed to the docID. Otherwise it will return the empty string
|
---|
| 340 | * @param docID is the String that contains the docPID and may also
|
---|
| 341 | * contain the section number.
|
---|
| 342 | * @return only the sectionID portion of the docID - if any, else "".
|
---|
| 343 | */
|
---|
| 344 | protected String getSectionIDFromDocID(String docID) {
|
---|
| 345 | if(containsSectionNumber(docID))
|
---|
| 346 | return docID.substring(
|
---|
| 347 | docID.lastIndexOf(HYPHEN)+1, docID.length());
|
---|
| 348 | return "";
|
---|
| 349 | }
|
---|
| 350 |
|
---|
| 351 | /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
|
---|
| 352 | * response message that gives the metadata for each collection identified
|
---|
| 353 | * @param collIDs is an array of fedora pids identifying collections in the
|
---|
| 354 | * fedora repository
|
---|
| 355 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
| 356 | * EX metadata for all the requested collections */
|
---|
[22300] | 357 | public String getCollectionMetadata(String[] collIDs) {
|
---|
| 358 | return getMetadata(collIDs, new String[] {"all"});
|
---|
[15222] | 359 | }
|
---|
| 360 |
|
---|
| 361 | /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
|
---|
| 362 | * response message is returned containing the metadata for each document.
|
---|
| 363 | * @param docIDs is an array of document identifiers (docID can either be
|
---|
| 364 | * <pid>s items (documents) in the fedora repository, or
|
---|
| 365 | * "<pid>-sectionNumber".
|
---|
| 366 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
[22300] | 367 | * EX, DC, DLS metadata for all the requested documents
|
---|
| 368 | * @param metadata is the list of metadata elements to be retrieved for each doc */
|
---|
| 369 | public String getDocumentMetadata(String[] docIDs, String[] metadata) {
|
---|
| 370 | return getMetadata(docIDs, metadata);
|
---|
[15222] | 371 | }
|
---|
| 372 |
|
---|
| 373 | /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
|
---|
| 374 | * response message that gives the metadata for the collection identified
|
---|
| 375 | * @param collID is a fedora pid identifying a collection in its repository
|
---|
| 376 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
[22300] | 377 | * EX metadata for the requested collection
|
---|
| 378 | * @param metadata is the list of metadata elements to be retrieved for each doc */
|
---|
[15222] | 379 | public String getCollectionMetadata(String collID) {
|
---|
[22300] | 380 | return getMetadata(new String[] {collID}, new String[] {"all"});
|
---|
[15222] | 381 | }
|
---|
| 382 |
|
---|
| 383 | /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
|
---|
| 384 | * response message containing the metadata for the document.
|
---|
| 385 | * @param docID is a document identifier (docID can either be a <pid>
|
---|
| 386 | * of an item (document) in the fedora repository, or it can be
|
---|
| 387 | * "<pid>-sectionNumber".
|
---|
| 388 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
| 389 | * EX, DC, DLS metadata for the requested document */
|
---|
[22300] | 390 | public String getDocumentMetadata(String docID, String[] metadata) {
|
---|
| 391 | return getMetadata(new String[] {docID}, metadata);
|
---|
[15222] | 392 | }
|
---|
| 393 |
|
---|
| 394 | /** @return a greenstone DocumentMetadataRetrieve response for the
|
---|
| 395 | * documents or collections indicated by the docIDsOrCollIDs.
|
---|
| 396 | * @param docIDsOrCollIDs is an array of identifiers which may be either the
|
---|
| 397 | * fedora pids for collections, or otherwise may be a document identifier.
|
---|
| 398 | * In the last case, the document ID may consist of either
|
---|
[22300] | 399 | * "documentPID-sectionNumber" or may just be just fedora documentPID
|
---|
| 400 | * @param metadata is the list of metadata elements to be retrieved for each doc */
|
---|
| 401 | public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
|
---|
[15222] | 402 | {
|
---|
| 403 | Document doc = builder.newDocument();
|
---|
| 404 | FedoraGS3RunException ex = null;
|
---|
| 405 |
|
---|
| 406 | Element docNodeList = doc.createElement(
|
---|
| 407 | GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 408 |
|
---|
| 409 | try{
|
---|
| 410 | for(int i = 0; i < docIDsOrCollIDs.length; i++) {
|
---|
| 411 | // create the <documentNode> containing the metadata
|
---|
| 412 | // for each document docID
|
---|
[22300] | 413 | Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
|
---|
[15222] | 414 | docNodeList.appendChild(docNode);
|
---|
| 415 | }
|
---|
| 416 | } catch(Exception e) {
|
---|
| 417 | ex = new FedoraGS3RunException(e);
|
---|
| 418 | ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
|
---|
| 419 | }
|
---|
| 420 |
|
---|
| 421 | Element responseMsg = createResponseMessage(doc, docNodeList, ex,
|
---|
| 422 | GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
|
---|
| 423 | try{
|
---|
[22300] | 424 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 425 | } catch(TransformerException e) {
|
---|
| 426 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 427 | + " " + e;
|
---|
| 428 | }
|
---|
| 429 | }
|
---|
| 430 |
|
---|
| 431 | /** Method that takes a new DOM document, as well as an identifier of either
|
---|
| 432 | * a collection or document (which may be a fedora pid for the collection
|
---|
| 433 | * or document, or may be the documentPid-sectionNumber for a document) and
|
---|
| 434 | * returns a documentNode element for it:
|
---|
| 435 | * <documentNode><metadataList>
|
---|
| 436 | * <metadata name="">value</metadata>
|
---|
| 437 | * ...
|
---|
| 438 | * </metadataList></documentNode>
|
---|
| 439 | * @return documentNode containing the metadata for the collection or
|
---|
| 440 | * document given by parameter ID
|
---|
| 441 | * @param id denotes a collection pid, a document pid or a docID of the
|
---|
[22300] | 442 | * form "documentpid-sectionNumber"
|
---|
| 443 | * @param metadata is the list of metadata elements to be retrieved for each doc */
|
---|
| 444 | protected Element getMetadata(Document doc, String id, String[] metadata)
|
---|
[15222] | 445 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 446 | SAXException, IOException
|
---|
| 447 | {
|
---|
| 448 | // We're going to create the documentNode nested inside the following
|
---|
| 449 | // documentNodeList:
|
---|
| 450 | // <documentNodeList>
|
---|
| 451 | // <documentNode nodeID=""><metadataList>
|
---|
| 452 | // <metadata name="">value</metadata>
|
---|
| 453 | // </metadataList></documentNode>
|
---|
| 454 | // <documentNode>...</documentNode>
|
---|
| 455 | // </documentNodeList>
|
---|
| 456 | // <documentNodeList>
|
---|
| 457 |
|
---|
[26171] | 458 | // <documentNode nodeID="docID"> - the docNode on which a metadata
|
---|
[15222] | 459 | // retrieve is being performed
|
---|
| 460 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 461 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 462 | attribute.setValue(id);
|
---|
| 463 | docNode.setAttributeNode(attribute);
|
---|
| 464 |
|
---|
| 465 | // <metadataList>
|
---|
| 466 | Element metadataList = doc.createElement(
|
---|
| 467 | GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 468 |
|
---|
| 469 | String ex = "";
|
---|
| 470 | String dc = "";
|
---|
| 471 | String dls = "";
|
---|
| 472 | if(id.endsWith(_COLLECTION)) { // docID refers to a collection
|
---|
| 473 | // Obtain the "EX" datastream (extracted metadata) for the collection
|
---|
| 474 | ex = this.getEX(id);
|
---|
| 475 | }
|
---|
| 476 | else { // docID refers to a document
|
---|
| 477 | // work out the document's fedora PID and section ID, and then
|
---|
| 478 | // obtain the EX (extracted metadata) and DC datastreams for the doc
|
---|
| 479 |
|
---|
| 480 | // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
|
---|
| 481 | // is the same as for pid="greenstone:<colname>-docPID"
|
---|
| 482 | // That is, <Section id="1"> refers to the toplevel document docPID
|
---|
| 483 | // If requested for top-level document, there may also be DLS meta
|
---|
| 484 | String sectionID = getSectionIDFromDocID(id);
|
---|
| 485 | String docPID = getDocPIDFromDocID(id);
|
---|
| 486 | if(sectionID.equals("") || sectionID.equals("1")) {
|
---|
[21573] | 487 | // metadata of toplevel document is requested
|
---|
[15222] | 488 | ex = this.getEX(docPID); // slightly faster than doing
|
---|
| 489 | //getSectionEXMetadata(docID, "1")
|
---|
| 490 | dc = this.getDC(docPID);
|
---|
| 491 | dls = this.getDLS(docPID);
|
---|
| 492 | }
|
---|
| 493 | else {
|
---|
| 494 | ex = getSectionEXMetadata(docPID, sectionID);
|
---|
| 495 | dc = getSectionDCMetadata(docPID, sectionID);
|
---|
| 496 | }
|
---|
| 497 | }
|
---|
| 498 |
|
---|
[22300] | 499 | String metafields = "";
|
---|
| 500 | for(int i = 0; i < metadata.length; i++) {
|
---|
| 501 | metafields = metafields + metadata[i] + "|";
|
---|
| 502 | }
|
---|
| 503 |
|
---|
[15222] | 504 | // Adding in metadata sets in alphabetical order
|
---|
| 505 | // DC metadata for a top-level document is different from EX, DLS:
|
---|
| 506 | // only the element's namespace prefix is "dc", the rest of a tagname
|
---|
| 507 | // is unknown.
|
---|
| 508 | if(!dc.equals("")) {
|
---|
| 509 | addMetadataWithNamespacedTagNames(doc, metadataList,
|
---|
[22300] | 510 | dc, DC, metafields);
|
---|
[15222] | 511 | }
|
---|
| 512 |
|
---|
| 513 | // Check if we were supposed to process dls and dc metadata
|
---|
| 514 | // as well. We only ever do this for top-level documents,
|
---|
| 515 | // in which case, dls and dc will be non-empty strings
|
---|
| 516 | if(!dls.equals("")) {
|
---|
[22300] | 517 | addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
|
---|
[15222] | 518 | }
|
---|
| 519 |
|
---|
| 520 | // we definitely have an EX metadatastream for each
|
---|
| 521 | // collection object, top-level document object,
|
---|
| 522 | // and document section item
|
---|
[22300] | 523 | addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
|
---|
[15222] | 524 |
|
---|
| 525 | // now the metadataList has been built up
|
---|
| 526 | docNode.appendChild(metadataList);
|
---|
| 527 |
|
---|
| 528 | return docNode; // return <documentNode> containing the metadata
|
---|
| 529 | }
|
---|
| 530 |
|
---|
| 531 | /** This method retrieves all the metadata elements in the metaDataStream
|
---|
| 532 | * parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
|
---|
| 533 | * metadataSetNS is the namespace of each tag, and creates a new element of
|
---|
| 534 | * the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
|
---|
| 535 | * each. Each of these are then appended to the metadataList parameter.
|
---|
| 536 | * @param doc is the Document object using which the new metadata Elements
|
---|
| 537 | * are to be constructed
|
---|
| 538 | * @param metadataList is the <metadataList> Element to which the new
|
---|
| 539 | * metadata Elements are to be appended as children.
|
---|
| 540 | * @param metaDatastream the metadata datastream in string form (e.g. the
|
---|
| 541 | * Dublin Core metadata stored in the Fedora repository).
|
---|
| 542 | * @param metadataSet is the constant datastream identifier, e.g. "DC".
|
---|
[22300] | 543 | * At present this method applies to the DC metadata and any others like it
|
---|
| 544 | * where each tagname is different except for the constant dc: namespace.
|
---|
| 545 | * @param metafields is a | separated string containing the metadatafields to
|
---|
| 546 | * extract or "all" if all fields are requested
|
---|
[15222] | 547 | */
|
---|
[22300] | 548 | protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
|
---|
| 549 | String metaDatastream, String metadataSet, String metafields)
|
---|
[15222] | 550 | throws SAXException, IOException
|
---|
| 551 | {
|
---|
| 552 | Document src = builder.parse(
|
---|
| 553 | new InputSource(new StringReader(metaDatastream)));
|
---|
| 554 |
|
---|
| 555 | // The following doesn't work for some reason: to retrieve all elements
|
---|
| 556 | // whose namespace prefix starts with "dc", we pass "*" for localName
|
---|
[22300] | 557 | //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
|
---|
[15222] | 558 |
|
---|
| 559 | // Longer way: get the children of the root document
|
---|
| 560 | NodeList children = src.getDocumentElement().getChildNodes();
|
---|
| 561 |
|
---|
| 562 | for(int i = 0; i < children.getLength(); i++) {
|
---|
| 563 | String nodeName = children.item(i).getNodeName();
|
---|
[22300] | 564 | // check that the nodename starts with the metadataSet ("dc") namespace,
|
---|
[15222] | 565 | // which simultaneously ensures that the node's an element:
|
---|
[22300] | 566 | if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
|
---|
| 567 | // need to have a period for Greenstone instead of Fedora's colon
|
---|
| 568 | nodeName = nodeName.replace(COLON, PERIOD);
|
---|
| 569 | if(metadataSet.equals(DC)) { // dc:title -> dc.Title
|
---|
| 570 | nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
|
---|
| 571 | + nodeName.substring(4);
|
---|
| 572 | }
|
---|
| 573 |
|
---|
| 574 | // get the requested metadata fields
|
---|
| 575 | if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
|
---|
[15222] | 576 | Element metatag = (Element)children.item(i);
|
---|
| 577 | String value = FedoraCommons.getValue(metatag);
|
---|
| 578 | // <dc:tagname>value</dc:tagname>
|
---|
[22300] | 579 | // we're going to put this in our metadata element as
|
---|
| 580 | // <metadata name="dc.Tagname">value</metadata>
|
---|
[15222] | 581 |
|
---|
| 582 | // create metadata of (name, value) pairs in target DOM (doc)
|
---|
| 583 | Element metadata = doc.createElement(GSXML.METADATA_ELEM);
|
---|
| 584 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
[22300] | 585 |
|
---|
[15222] | 586 | attribute.setValue(nodeName);
|
---|
| 587 | metadata.setAttributeNode(attribute);
|
---|
| 588 | Text content = doc.createTextNode(value);
|
---|
| 589 | metadata.appendChild(content);
|
---|
| 590 | metadataList.appendChild(metadata);
|
---|
[22300] | 591 | }
|
---|
[15222] | 592 | }
|
---|
| 593 | }
|
---|
| 594 | }
|
---|
| 595 |
|
---|
| 596 | /** This method retrieves all the metadata elements in the metaDataStream
|
---|
| 597 | * of the form <"namespace:"metadata name="metadataName">value</metadata>
|
---|
| 598 | * where "namespace" is the namespace prefix of each tag, and metadataName
|
---|
| 599 | * is the name of the metadata (like author, title). For each element
|
---|
| 600 | * it creates a corresponding new element of the form
|
---|
[22300] | 601 | * <metadata name="namespace:metadataName">value</metadata>.
|
---|
| 602 | * Each of these are then appended to the metadataList parameter.
|
---|
[15222] | 603 | * @param doc is the Document object using which the new metadata Elements
|
---|
| 604 | * are to be constructed
|
---|
| 605 | * @param metadataList is the <metadataList> Element to which the new
|
---|
| 606 | * metadata Elements are to be appended as children.
|
---|
| 607 | * @param metaDatastream the metadata datastream in string form (e.g. the
|
---|
| 608 | * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
|
---|
| 609 | * repository).
|
---|
| 610 | * @param metadataSet is the constant datastream identifier,
|
---|
| 611 | * e.g. "DLS" or "EX".
|
---|
| 612 | * At present this method applies to the DLS and EX metadata as they have
|
---|
| 613 | * constant tagnames throughout.
|
---|
[22300] | 614 | * @param metafields is a | separated string containing the metadatafields to
|
---|
| 615 | * extract or "all" if all fields are requested.
|
---|
[15222] | 616 | */
|
---|
[22300] | 617 | protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
|
---|
| 618 | String metaDatastream, String metadataSet, String metafields)
|
---|
[15222] | 619 | throws SAXException, IOException
|
---|
| 620 | {
|
---|
| 621 | // Namespace prefix can be "ex:" or "dls:"
|
---|
| 622 | String namespacePrefix = "";
|
---|
| 623 | if(!metadataSet.equals(EX)) {
|
---|
| 624 | // need to have a period for Greenstone instead of Fedora's colon
|
---|
| 625 | namespacePrefix = metadataSet.toLowerCase() + PERIOD;
|
---|
| 626 | }
|
---|
| 627 |
|
---|
| 628 | Document src = builder.parse(
|
---|
| 629 | new InputSource(new StringReader(metaDatastream)));
|
---|
| 630 | NodeList metaTags = src.getElementsByTagName(
|
---|
| 631 | metadataSet.toLowerCase()+COLON+METADATA);
|
---|
| 632 | // Looking for tagnames: <ex:metadata> or <dls:metadata>
|
---|
| 633 |
|
---|
| 634 | for(int i = 0; i < metaTags.getLength(); i++) {
|
---|
| 635 | Element metatag = (Element)metaTags.item(i);
|
---|
| 636 |
|
---|
| 637 | // extract the metadata of (name, value) pairs from src DOM
|
---|
| 638 | // look for <metadata name="name">value</metadata>
|
---|
| 639 | String name = metatag.hasAttribute(NAME) ?
|
---|
| 640 | metatag.getAttribute(NAME) : "";
|
---|
| 641 | // sometimes, there are several metadata for the same name, in this
|
---|
| 642 | // case, look for a qualifier and append its value to the name to
|
---|
| 643 | // distinguish it uniquely:
|
---|
| 644 | if(metatag.hasAttribute(QUALIFIER)) {
|
---|
| 645 | name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
|
---|
| 646 | }
|
---|
[22300] | 647 | name = namespacePrefix + name; // prefix with namespace, if any
|
---|
| 648 | if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
|
---|
| 649 | String value = FedoraCommons.getValue(metatag);
|
---|
| 650 |
|
---|
| 651 | // create metadata of (name, value) pairs in target DOM (doc)
|
---|
| 652 | Element metadata = doc.createElement(GSXML.METADATA_ELEM);
|
---|
| 653 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 654 | attribute.setValue(name);
|
---|
| 655 | metadata.setAttributeNode(attribute);
|
---|
| 656 | Text content = doc.createTextNode(value);
|
---|
| 657 | metadata.appendChild(content);
|
---|
| 658 |
|
---|
| 659 | metadataList.appendChild(metadata);
|
---|
| 660 | }
|
---|
[15222] | 661 | }
|
---|
| 662 | }
|
---|
| 663 |
|
---|
| 664 | /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
|
---|
| 665 | * response message containing ONLY the Title metadata for the document.
|
---|
| 666 | * @param docID is a document identifier (docID can either be a <pid>
|
---|
| 667 | * of an item (document) in the fedora repository, or it can be
|
---|
| 668 | * "<pid>-sectionNumber".
|
---|
| 669 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
| 670 | * Title metadata for the requested document */
|
---|
| 671 | public String getTitleMetadata(String docID) {
|
---|
| 672 | return getTitleMetadata(new String[] { docID });
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
|
---|
| 676 | * response message containing ONLY the Title metadata for the documents.
|
---|
| 677 | * @param docIDs is a list of document identifiers (where docID can either be
|
---|
| 678 | * a <pid> of an item (document) in the fedora repository, or it can be
|
---|
| 679 | * "<pid>-sectionNumber".
|
---|
| 680 | * @return a GS3 DocumentMetadataRetrieve response message containing the
|
---|
| 681 | * Title metadata for all the requested documents */
|
---|
| 682 | public String getTitleMetadata(String[] docIDs) {
|
---|
| 683 | // Must create message of the following form:
|
---|
| 684 | // <documentNodeList><documentNode nodeID="docID">
|
---|
| 685 | // <metadataList><metadata name="Title">sometitle</metadata>
|
---|
| 686 | // </metadataList></documentNode>
|
---|
| 687 |
|
---|
| 688 | Document doc = builder.newDocument();
|
---|
| 689 | FedoraGS3RunException ex = null;
|
---|
| 690 |
|
---|
| 691 | Element docNodeList = doc.createElement(
|
---|
| 692 | GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 693 | try{
|
---|
| 694 | for(int i = 0; i < docIDs.length; i++) {
|
---|
| 695 | Element docNode = getTitleMetadata(doc, docIDs[i]);
|
---|
| 696 | docNodeList.appendChild(docNode);
|
---|
| 697 | }
|
---|
| 698 | }catch(Exception e) {
|
---|
| 699 | ex = new FedoraGS3RunException(e);
|
---|
[21573] | 700 | //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
|
---|
[15222] | 701 | ex.setSpecifics("EX metadata datastream");
|
---|
| 702 | }
|
---|
| 703 |
|
---|
| 704 | Element responseMsg = createResponseMessage(doc, docNodeList, ex,
|
---|
| 705 | GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
|
---|
| 706 | try{
|
---|
[22300] | 707 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 708 | } catch(TransformerException e) {
|
---|
| 709 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 710 | + " " + e;
|
---|
| 711 | }
|
---|
| 712 | }
|
---|
| 713 |
|
---|
| 714 | /** Method that takes a new DOM document, as well as an identifier of either
|
---|
| 715 | * a document or document section and returns a documentNode element containing
|
---|
| 716 | * the title metadata for it:
|
---|
| 717 | * <documentNode nodeID="docID"><metadataList>
|
---|
| 718 | * <metadata name="Title">sometitle</metadata>
|
---|
| 719 | * </metadataList></documentNode>
|
---|
| 720 | * @return documentNode containing the metadata for the collection or
|
---|
| 721 | * document given by parameter ID
|
---|
| 722 | * @param docID denotes the id of a document or a document section, so id
|
---|
| 723 | * is either a document-pid or it's of the form documentpid-sectionNumber */
|
---|
| 724 | protected Element getTitleMetadata(Document doc, String docID)
|
---|
| 725 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 726 | SAXException, IOException
|
---|
| 727 | {
|
---|
| 728 | // Returns a docNode element of the following form:
|
---|
| 729 | // <documentNode nodeID="docID">
|
---|
| 730 | // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
|
---|
| 731 | // </documentNode>
|
---|
| 732 |
|
---|
| 733 | // <documentNode nodeID="docID">
|
---|
| 734 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 735 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 736 | attribute.setValue(docID);
|
---|
| 737 | docNode.setAttributeNode(attribute);
|
---|
| 738 |
|
---|
| 739 | // <metadataList>
|
---|
| 740 | Element metaList = doc.createElement(
|
---|
| 741 | GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 742 | // <metadata name="Title">
|
---|
| 743 | Element metadata = doc.createElement(GSXML.METADATA_ELEM);
|
---|
| 744 | // if we connect it all up (append children), we can immediately add
|
---|
| 745 | // the name attribute into the metadata element:
|
---|
| 746 | metaList.appendChild(metadata);
|
---|
| 747 | docNode.appendChild(metaList);
|
---|
| 748 | metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
|
---|
| 749 |
|
---|
| 750 | String title = "";
|
---|
| 751 | String sectionID = getSectionIDFromDocID(docID);
|
---|
| 752 | String docPID = getDocPIDFromDocID(docID);
|
---|
| 753 |
|
---|
| 754 | // check if title of toplevel document is requested
|
---|
| 755 | if(sectionID.equals(""))
|
---|
| 756 | title = this.getDocTitle(docPID);
|
---|
| 757 | else { // title of document section
|
---|
| 758 | title = this.getSectionTitle(docPID, sectionID);
|
---|
| 759 | }
|
---|
| 760 |
|
---|
| 761 | metadata.appendChild(doc.createTextNode(title));
|
---|
| 762 |
|
---|
| 763 | return docNode;
|
---|
| 764 | }
|
---|
| 765 |
|
---|
[22300] | 766 | /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
|
---|
| 767 | * containing the requested portion of the document structure of the documents
|
---|
| 768 | * indicated by docIDs:
|
---|
| 769 | * @param docID is the document identifier of the document whose hierarchical
|
---|
| 770 | * structure is requested. The name of the collection is already included in the
|
---|
| 771 | * docID for a Fedora DL.
|
---|
| 772 | * @param structure - strings specifying the required structure of the document.
|
---|
| 773 | * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
|
---|
| 774 | * @param info - strings specifying the required structural info of the document.
|
---|
| 775 | * It can be any combination of: siblingPosition, numSiblings, numChildren.
|
---|
| 776 | */
|
---|
| 777 | public String getDocumentStructure(String docID, String[] structure, String[] info) {
|
---|
| 778 | return getStructure(new String[]{docID}, structure, info);
|
---|
| 779 | }
|
---|
| 780 |
|
---|
| 781 |
|
---|
| 782 | /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
|
---|
| 783 | * containing the requested portion of the document structure of the documents
|
---|
| 784 | * indicated by docIDs:
|
---|
| 785 | * @param docIDs is an array of document identifiers of documents whose
|
---|
| 786 | * hierarchical structures are requested. The name of the collection is already
|
---|
| 787 | * included in the docID for a Fedora DL.
|
---|
| 788 | * @param structure - strings specifying the required structure of each document.
|
---|
| 789 | * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
|
---|
| 790 | * @param info - strings specifying the required structural info of each document.
|
---|
| 791 | * It can be any combination of: siblingPosition, numSiblings, numChildren.
|
---|
| 792 | */
|
---|
| 793 | public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
|
---|
| 794 | return getStructure(docIDs, structure, info);
|
---|
| 795 | }
|
---|
| 796 |
|
---|
| 797 | /**
|
---|
[15222] | 798 | * Returns a greenstone3 DocumentStructureRetrieve XML response message
|
---|
| 799 | * containing the document structures for the given docIDs.
|
---|
| 800 | * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
|
---|
| 801 | * greenstone formatted XML is returned. The requested section of the table
|
---|
| 802 | * of contents (TOC) for a document is converted into the greenstone3 xml
|
---|
| 803 | * format that is returned upon DocumentStructureRetrieve requests.
|
---|
| 804 | * @param docIDs the documentIDs for which the section's structure is returned;
|
---|
| 805 | * where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
|
---|
[22300] | 806 | * @param structure - the structure of the sections to return. Can be any combination of:
|
---|
| 807 | * ancestors, parent, siblings, children, descendants, entire.
|
---|
| 808 | * @param infos - strings containing any combination of the values: numChildren, numSiblings,
|
---|
| 809 | * siblingPosition. The requested info gets added as attributes to the returned root element.
|
---|
[15222] | 810 | * @return a greenstone3 DocumentStructureRetrieve XML response message in
|
---|
| 811 | * String format with the structure of the docIDs requested.
|
---|
| 812 | */
|
---|
[22300] | 813 | protected String getStructure(String[] docIDs, String[] structure, String[] infos)
|
---|
[15222] | 814 | {
|
---|
| 815 | Document doc = builder.newDocument();
|
---|
| 816 | FedoraGS3RunException ex = null;
|
---|
| 817 | // <documentNodeList>
|
---|
| 818 | Element docNodeList = doc.createElement(
|
---|
| 819 | GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 820 |
|
---|
| 821 | try{
|
---|
| 822 | // append the <documentNodes> for the docIDs
|
---|
| 823 | // to the docNodeList
|
---|
[22300] | 824 | //getStructureElement(docNodeList, docIDs, levels);
|
---|
| 825 | getStructureElement(docNodeList, docIDs, structure, infos);
|
---|
[15222] | 826 | } catch(Exception e) {
|
---|
| 827 | ex = new FedoraGS3RunException(e);
|
---|
| 828 | ex.setSpecifics("(requested portion of) TOC datastream");
|
---|
| 829 | }
|
---|
| 830 | // insert our <documentNodeList> into a GS3 response message
|
---|
| 831 | Element responseMsg = createResponseMessage(doc, docNodeList, ex,
|
---|
| 832 | GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
|
---|
| 833 | try{
|
---|
[22300] | 834 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 835 | } catch(TransformerException e) {
|
---|
| 836 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 837 | + " " + e;
|
---|
| 838 | }
|
---|
| 839 | }
|
---|
[22300] | 840 |
|
---|
| 841 |
|
---|
| 842 | /** Given a <documentNodeList> portion of a greenstone3
|
---|
[15222] | 843 | * DocumentStructureRetrieve XML response message, this method will populate
|
---|
| 844 | * it with the <documentNodes> that represent the structure of the given docIDs.
|
---|
| 845 | * @param docNodeList is a <documentNodeList> to which <documentNodes> of
|
---|
| 846 | * the doc structures are appended.
|
---|
| 847 | * @param docIDs the documentIDs for which the section's structure is returned;
|
---|
| 848 | * where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
|
---|
[22300] | 849 | * @param structures - the structure of the sections to return. Can be any combination of:
|
---|
| 850 | * ancestors, parent, siblings, children, descendants, entire.
|
---|
| 851 | * @param infos - a string containing any combination of the values: numChildren, numSiblings,
|
---|
| 852 | * siblingPosition. The requested info gets added as attributes to the returned root element.
|
---|
[15222] | 853 | */
|
---|
[22300] | 854 | protected void getStructureElement(Element docNodeList, String[] docIDs,
|
---|
| 855 | String[] structures, String[] infos)
|
---|
[15222] | 856 | throws RemoteException, UnsupportedEncodingException, SAXException,
|
---|
| 857 | IOException
|
---|
| 858 | {
|
---|
[22300] | 859 | // Make one string out of requested structure components, and one string from info components
|
---|
| 860 | String structure = "";
|
---|
| 861 | String info = "";
|
---|
| 862 | for(int i = 0; i < structures.length; i++) {
|
---|
| 863 | structure = structure + structures[i] + "|";
|
---|
| 864 | }
|
---|
| 865 | for(int i = 0; i < infos.length; i++) {
|
---|
| 866 | info = info + infos[i] + "|";
|
---|
| 867 | }
|
---|
| 868 |
|
---|
| 869 | // process each docID
|
---|
| 870 | for(int i = 0; i < docIDs.length; i++) {
|
---|
| 871 | // work out the document's fedora PID and section ID
|
---|
| 872 | String sectionID = getSectionIDFromDocID(docIDs[i]);
|
---|
| 873 | String docPID = getDocPIDFromDocID(docIDs[i]);
|
---|
| 874 | if(sectionID.equals("")) {
|
---|
| 875 | sectionID = "1";
|
---|
[15222] | 876 | }
|
---|
[22300] | 877 |
|
---|
| 878 | // get the required section, along with children or descendants
|
---|
| 879 | Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
|
---|
| 880 | Document doc = docNodeList.getOwnerDocument();
|
---|
| 881 |
|
---|
| 882 | // copy-and-convert that structure into a structure format for GS3
|
---|
| 883 | Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
|
---|
| 884 |
|
---|
| 885 | if(!info.equals("")) {
|
---|
| 886 | // <nodeStructureInfo>
|
---|
| 887 | // <info name="" value="" />
|
---|
| 888 | // <info name="" value="" />
|
---|
| 889 | // ...
|
---|
| 890 | // </nodeStructureInfo>
|
---|
[26270] | 891 | Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
|
---|
[22300] | 892 | Element root = srcDocElement.getOwnerDocument().getDocumentElement();
|
---|
| 893 |
|
---|
[26270] | 894 | if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
|
---|
| 895 | String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
|
---|
[22300] | 896 | Element infoEl = doc.createElement(GSXML.INFO_ATT);
|
---|
[26270] | 897 | infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
|
---|
[22300] | 898 | infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
|
---|
| 899 | nodeStructureInfo.appendChild(infoEl);
|
---|
| 900 | }
|
---|
| 901 |
|
---|
[26270] | 902 | if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
|
---|
| 903 | String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
|
---|
[22300] | 904 | Element infoEl = doc.createElement(GSXML.INFO_ATT);
|
---|
[26270] | 905 | infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
|
---|
[22300] | 906 | infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
|
---|
| 907 | nodeStructureInfo.appendChild(infoEl);
|
---|
| 908 | }
|
---|
| 909 |
|
---|
[26270] | 910 | if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
|
---|
| 911 | String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
|
---|
[22300] | 912 | Element infoEl = doc.createElement(GSXML.INFO_ATT);
|
---|
[26270] | 913 | infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
|
---|
[22300] | 914 | infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
|
---|
| 915 | nodeStructureInfo.appendChild(infoEl);
|
---|
| 916 | }
|
---|
[26270] | 917 |
|
---|
| 918 | if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
|
---|
| 919 | String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
|
---|
| 920 | Element infoEl = doc.createElement(GSXML.INFO_ATT);
|
---|
| 921 | infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
|
---|
| 922 | infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
|
---|
| 923 | nodeStructureInfo.appendChild(infoEl);
|
---|
| 924 | }
|
---|
| 925 |
|
---|
[22300] | 926 | docNode.appendChild(nodeStructureInfo);
|
---|
| 927 | }
|
---|
| 928 |
|
---|
| 929 | // add it to our list of documentNodes
|
---|
| 930 | docNodeList.appendChild(docNode);
|
---|
| 931 | }
|
---|
[15222] | 932 | }
|
---|
[22300] | 933 |
|
---|
[15222] | 934 |
|
---|
| 935 | /**
|
---|
| 936 | * Takes the portion of the XML document outlining the structure of the
|
---|
| 937 | * document (section)--in the format this is stored in Fedora--and returns
|
---|
| 938 | * Greenstone 3 DOM XML format for outlining document structure.
|
---|
| 939 | * @return a <documentNode> element that contains a greenstone3
|
---|
| 940 | * DocumentStructureRetrieve XML corresponding to the parameter Element section
|
---|
| 941 | * (which is in fedora XML), for the document indicated by docID.
|
---|
| 942 | * @param requestingDocID is the identifier of the document for which the
|
---|
| 943 | * structure was requested. It's this document's children or descendants that
|
---|
| 944 | * will be returned. Note that this is not always the same as (clear from)
|
---|
| 945 | * parameter docID.
|
---|
| 946 | * @param docID is the documentID for which the section's structure is
|
---|
| 947 | * returned where docID = "docPID-sectionNumber".
|
---|
| 948 | * @param section - the fedora section XML that is being mirrored in
|
---|
| 949 | * greenstone3 format.
|
---|
| 950 | */
|
---|
| 951 | protected Element getStructure(Document doc, String requestingDocID,
|
---|
| 952 | String docID, Element section)
|
---|
| 953 | {
|
---|
| 954 | // we want to mirror the section's DOM (given in fedora XML) in
|
---|
| 955 | // greenstone3's XML for a DocumentStructureRetrieve response.
|
---|
| 956 |
|
---|
| 957 | // <documentNode nodeID="docID"> - the docNode on which a structure retrieve
|
---|
| 958 | // is being performed
|
---|
| 959 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 960 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 961 | attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
|
---|
[22300] | 962 | docNode.setAttributeNode(attribute);
|
---|
[15222] | 963 |
|
---|
| 964 | // <nodeStructure>
|
---|
| 965 | Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
|
---|
| 966 |
|
---|
| 967 | // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
|
---|
| 968 | Element rootNode = createDocNodeFromSubsection(doc, section, docID);
|
---|
| 969 |
|
---|
| 970 | // fills in the subtree of the rootNode in our nodeStructure element
|
---|
| 971 | createDocStructure(doc, section, rootNode, docID);
|
---|
| 972 | //where section represents the root section
|
---|
| 973 |
|
---|
| 974 | nodeStructure.appendChild(rootNode);
|
---|
| 975 | docNode.appendChild(nodeStructure);
|
---|
| 976 | return docNode;
|
---|
| 977 | }
|
---|
[22300] | 978 |
|
---|
| 979 |
|
---|
[15222] | 980 | /** Recursive method that creates a documentStructure mirroring parameter
|
---|
| 981 | * section, starting from parameter parent down to all descendants
|
---|
| 982 | * @param section is the XML <Section> in the fedora repository's TOC
|
---|
| 983 | * for the docPID whose substructure is to be mirrored
|
---|
| 984 | * @param parent is the XML documentNode in the greenstone repository whose
|
---|
| 985 | * descendants created by this method will correspond to the descendants of
|
---|
| 986 | * parameter section.
|
---|
| 987 | * @param doc is the document containing the parent;
|
---|
| 988 | * @param docPID is the prefix of all nodeIDs in the parent's structure
|
---|
| 989 | */
|
---|
| 990 | protected void createDocStructure(
|
---|
| 991 | Document doc, Element section, Element parent, String docPID)
|
---|
| 992 | {
|
---|
| 993 | // get the section's children (if any)
|
---|
| 994 | NodeList children = section.getChildNodes();
|
---|
| 995 | for(int i = 0; i < children.getLength(); i++) {
|
---|
| 996 | Node n = children.item(i);
|
---|
| 997 |
|
---|
| 998 | if(n.getNodeName().equals(SECTION_ELEMENT)) {
|
---|
| 999 | //then we know it's an element AND that its tagname is "Section"
|
---|
| 1000 | Element subsection = (Element)n;
|
---|
| 1001 | Element child = createDocNodeFromSubsection(doc, subsection, docPID);
|
---|
| 1002 | parent.appendChild(child);
|
---|
| 1003 |
|
---|
| 1004 | // recursion call on newly found child-element and subsection
|
---|
| 1005 | createDocStructure(doc, subsection, child, docPID);
|
---|
| 1006 | }
|
---|
| 1007 | }
|
---|
| 1008 | }
|
---|
| 1009 |
|
---|
| 1010 | /** Given a particular subsection element, this method creates a
|
---|
| 1011 | * Greenstone3 DocumentNode element that mirrors it.
|
---|
| 1012 | * @param doc is the document that will contain the created DocumentNode
|
---|
| 1013 | * @param docID is the prefix of all nodeIDs in the parent's structure
|
---|
| 1014 | * @param subSection is the XML <Section> in the fedora repository's
|
---|
| 1015 | * TOC for the docPID which will be mirrored in the greenstone XML
|
---|
| 1016 | * documentNode that will be returned.
|
---|
| 1017 | * @return a greenstone <documentNode> that represents the fedora TOC's
|
---|
| 1018 | * <Section> element passed as parameter subSection. */
|
---|
| 1019 | protected Element createDocNodeFromSubsection(
|
---|
| 1020 | Document doc, Element subSection, String docID)
|
---|
| 1021 | {
|
---|
| 1022 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 1023 | Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
|
---|
| 1024 | docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
|
---|
| 1025 | docNode.setAttributeNode(docType);
|
---|
| 1026 |
|
---|
| 1027 | Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1028 | String sectionID = subSection.hasAttribute(ID) ?
|
---|
| 1029 | subSection.getAttribute(ID) : "";
|
---|
[22300] | 1030 | if(sectionID.equals("1")
|
---|
| 1031 | && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
|
---|
[22302] | 1032 | // reset the attribute without the section number (just "docID" may be important for democlient?)
|
---|
| 1033 | nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
|
---|
[22300] | 1034 | } else {
|
---|
| 1035 | nodeID.setValue(docID + HYPHEN + sectionID);
|
---|
| 1036 | }
|
---|
| 1037 | //nodeID.setValue(docID + HYPHEN + sectionID);
|
---|
[15222] | 1038 | docNode.setAttributeNode(nodeID);
|
---|
| 1039 |
|
---|
| 1040 | Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
|
---|
[22300] | 1041 | if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
|
---|
| 1042 | nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
|
---|
| 1043 | }
|
---|
[15222] | 1044 | docNode.setAttributeNode(nodeType);
|
---|
| 1045 | return docNode;
|
---|
| 1046 | }
|
---|
| 1047 |
|
---|
| 1048 |
|
---|
| 1049 | /** Given an identifier that is either a docPID or a concatenation of
|
---|
| 1050 | * docPID+sectionID, this method works out the fedora assigned docPID and
|
---|
| 1051 | * sectionID and then calls getContentBody(docPID, sectionID) with those.
|
---|
| 1052 | * @param docID is expected to be of the form
|
---|
| 1053 | * "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
|
---|
| 1054 | * "greenstone:<collectionName>-<docPID>"
|
---|
| 1055 | * If it is "greenstone:<collectionName>-<docPID>", then the content for
|
---|
| 1056 | * "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
|
---|
| 1057 | * is returned! */
|
---|
| 1058 | public String getContent(String docID) {
|
---|
| 1059 | return this.getContent(new String[]{docID});
|
---|
| 1060 | }
|
---|
| 1061 |
|
---|
| 1062 | /** Given an identifier that is a concatenation of docID+sectionID, this
|
---|
| 1063 | * method works out the fedora assigned docPID and sectionID and then calls
|
---|
| 1064 | * getContentBody(docPID, sectionID) with those.
|
---|
| 1065 | * @param docIDs is an array of document identifiers of the form
|
---|
| 1066 | * "greenstone:<collectionName>-<docPID>-<sectionNumber>"
|
---|
| 1067 | * If it is "greenstone:<collectionName>-<docPID>", then the content for
|
---|
| 1068 | * "greenstone:<collectionName>-Section1" is returned! */
|
---|
| 1069 | public String getContent(String[] docIDs) {
|
---|
| 1070 | Document doc = builder.newDocument();
|
---|
| 1071 | FedoraGS3RunException ex = null;
|
---|
| 1072 |
|
---|
| 1073 | //<documentNodeList>
|
---|
| 1074 | Element docNodeList = doc.createElement(
|
---|
| 1075 | GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1076 |
|
---|
| 1077 | try{
|
---|
| 1078 | for(int i = 0; i < docIDs.length; i++) {
|
---|
| 1079 | // get the sectionID and docPID from the docID
|
---|
| 1080 | String sectionID = this.removePrefix(
|
---|
| 1081 | getSectionIDFromDocID(docIDs[i]), SECTION);
|
---|
| 1082 | String docPID = getDocPIDFromDocID(docIDs[i]);
|
---|
| 1083 | if(sectionID.equals("")) // if no section is specified, get
|
---|
| 1084 | sectionID = "1"; // get the content for Section id="1"
|
---|
| 1085 |
|
---|
| 1086 | // Get the contents for the requested section of document docPID
|
---|
| 1087 | String sectionContent = this.getContentBody(docPID, sectionID);
|
---|
| 1088 |
|
---|
| 1089 | // set the nodeID attribute
|
---|
| 1090 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 1091 | Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1092 |
|
---|
| 1093 | nodeId.setValue(docIDs[i]); // just set the docID which will contain
|
---|
| 1094 | // the docPID (and sectionID if already present)
|
---|
| 1095 |
|
---|
| 1096 | docNode.setAttributeNode(nodeId);
|
---|
| 1097 | // set the text content to what was retrieved
|
---|
| 1098 | Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
|
---|
[21775] | 1099 | Text textNode = doc.createTextNode(sectionContent.trim());
|
---|
[15222] | 1100 |
|
---|
| 1101 | nodeContent.appendChild(textNode);
|
---|
| 1102 | docNode.appendChild(nodeContent);
|
---|
| 1103 | //add the documentNode to the docNodeList
|
---|
| 1104 | docNodeList.appendChild(docNode);
|
---|
| 1105 | }
|
---|
| 1106 | } catch(Exception e) {
|
---|
| 1107 | ex = new FedoraGS3RunException(e);
|
---|
| 1108 | ex.setSpecifics("requested doc Section datastream");
|
---|
| 1109 | }
|
---|
| 1110 | Element responseMsg = createResponseMessage(doc, docNodeList, ex,
|
---|
| 1111 | GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
|
---|
| 1112 | try{
|
---|
[22300] | 1113 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1114 | } catch(TransformerException e) {
|
---|
| 1115 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1116 | + " " + e;
|
---|
| 1117 | }
|
---|
| 1118 | }
|
---|
| 1119 |
|
---|
| 1120 | /** Gets the contents of a textNode from a section.
|
---|
| 1121 | * @return the text content of a section.
|
---|
| 1122 | * @param docPID the pid of the document from which a section's text is to
|
---|
| 1123 | * be retrieved.
|
---|
| 1124 | * @param sectionID is the section identifier of the document denoted by
|
---|
| 1125 | * docPID whose text is to be returned.
|
---|
| 1126 | */
|
---|
| 1127 | protected String getContentBody(String docPID, String sectionID)
|
---|
| 1128 | throws RemoteException, UnsupportedEncodingException,
|
---|
| 1129 | SAXException, IOException
|
---|
| 1130 | {
|
---|
| 1131 | String section = this.getSection(docPID, sectionID);
|
---|
| 1132 |
|
---|
| 1133 | // the content is nested inside a <Section> element,
|
---|
| 1134 | // we extract it from there:
|
---|
| 1135 | InputSource source = new InputSource(new StringReader(section));
|
---|
| 1136 | Document doc = builder.parse(source);
|
---|
| 1137 |
|
---|
| 1138 | // The document Element is the <Section> we want.
|
---|
| 1139 | // Get its text contents:
|
---|
| 1140 | section = FedoraCommons.getValue(doc.getDocumentElement());
|
---|
| 1141 |
|
---|
| 1142 | // we are going to remove all occurrences of "_httpdocimg_/"
|
---|
| 1143 | // that precede associated filenames, because that's a GS3
|
---|
| 1144 | // defined macro for resolving relative urls. It won't help
|
---|
| 1145 | // with documents stored in fedora.
|
---|
| 1146 | section = section.replaceAll(GS3FilePathMacro+"/", "");
|
---|
| 1147 | return section;
|
---|
| 1148 | }
|
---|
| 1149 |
|
---|
| 1150 | /** Here we create the greenstone's response message element:
|
---|
| 1151 | * <message≶<response><content></response></message>
|
---|
| 1152 | * @return a greenstone response-message element.
|
---|
| 1153 | * @param doc - the Document object which should me used to create the
|
---|
| 1154 | * <message> and <response> elements
|
---|
| 1155 | * @param content - the element that is to be nested inside <response>
|
---|
| 1156 | * @param ex - any exception that occurred when trying to create
|
---|
| 1157 | * the content parameter
|
---|
| 1158 | * @param responseType - the value for the type attribute of <response>,
|
---|
| 1159 | * such as "describe", "retrieve", "browse", "query"...
|
---|
| 1160 | * @param originator - indiates the collectionName or service (like
|
---|
| 1161 | * DocumentContentRetrieve) from where this response message originates
|
---|
| 1162 | */
|
---|
| 1163 | protected Element createResponseMessage(Document doc, Element content,
|
---|
| 1164 | Exception ex, String responseType, String originator)
|
---|
| 1165 | {
|
---|
| 1166 | Element response = doc.createElement(GSXML.RESPONSE_ELEM);
|
---|
| 1167 | // from = "FedoraGS3"
|
---|
[21924] | 1168 | Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
|
---|
| 1169 | attribute.setValue(originator);
|
---|
[15222] | 1170 | response.setAttributeNode(attribute);
|
---|
| 1171 |
|
---|
| 1172 | // type = "describe" or "process" - whatever's given in requestType:
|
---|
| 1173 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1174 | attribute.setValue(responseType);
|
---|
| 1175 | response.setAttributeNode(attribute);
|
---|
| 1176 |
|
---|
| 1177 | if(content != null)
|
---|
| 1178 | response.appendChild(content);
|
---|
| 1179 |
|
---|
| 1180 | // we'll create an error element for RemoteExceptions (web service problems)
|
---|
| 1181 | // and UnsupportedEncodingExceptions and
|
---|
| 1182 | if(ex != null) {
|
---|
| 1183 | Element error = doc.createElement(GSXML.ERROR_ELEM);
|
---|
| 1184 | error.appendChild(doc.createTextNode(ex.getMessage()));
|
---|
| 1185 | // now append the error to the <response> element (after
|
---|
| 1186 | // the content element whatever that was)
|
---|
| 1187 | response.appendChild(error);
|
---|
| 1188 | }
|
---|
| 1189 |
|
---|
| 1190 | Element message = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
| 1191 | message.appendChild(response);
|
---|
| 1192 | doc.appendChild(message);
|
---|
| 1193 | return message;
|
---|
| 1194 | }
|
---|
| 1195 |
|
---|
| 1196 | /** @return a <serviceList> Element as defined by GS3: containing all the
|
---|
| 1197 | * services (denoted by <service> elements) that are supported by FedoraGS3.
|
---|
| 1198 | * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
|
---|
| 1199 | * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
|
---|
| 1200 | * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
|
---|
| 1201 | * @param doc - the Document object which should me used to create the
|
---|
| 1202 | * <serviceList> element */
|
---|
| 1203 | protected Element createServiceList(Document doc)
|
---|
| 1204 | {
|
---|
| 1205 | Element serviceList = doc.createElement(
|
---|
| 1206 | GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1207 |
|
---|
| 1208 | for(int i = 0; i < serviceNames.length; i++) {
|
---|
| 1209 | // create the <service name="serviceName[i]" type="servicetype" />
|
---|
| 1210 | Element service = doc.createElement(GSXML.SERVICE_ELEM);
|
---|
| 1211 |
|
---|
| 1212 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1213 | attribute.setValue(serviceNames[i]);
|
---|
| 1214 | service.setAttributeNode(attribute);
|
---|
| 1215 |
|
---|
| 1216 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1217 | if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
|
---|
| 1218 | attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
|
---|
| 1219 | else if(serviceNames[i].contains("Query")) // search services
|
---|
| 1220 | attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
|
---|
| 1221 | else
|
---|
| 1222 | attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
|
---|
| 1223 | service.setAttributeNode(attribute);
|
---|
| 1224 |
|
---|
| 1225 | // add the service element to the serviceList element
|
---|
| 1226 | // <serviceList><service /></serviceList>
|
---|
| 1227 | serviceList.appendChild(service);
|
---|
| 1228 | }
|
---|
| 1229 | return serviceList;
|
---|
| 1230 | }
|
---|
| 1231 |
|
---|
| 1232 | /** @return a GS3 response message for a describe services request:
|
---|
| 1233 | * indicating the list of services supported by the Fedora-Greenstone
|
---|
| 1234 | * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
|
---|
| 1235 | * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
|
---|
| 1236 | * ClassifierBrowseMetadataRetrieve - as indicated by member variable
|
---|
| 1237 | * serviceNames. */
|
---|
| 1238 | public String getServiceList()
|
---|
| 1239 | {
|
---|
| 1240 | Document doc = builder.newDocument();
|
---|
| 1241 | Element serviceList = createServiceList(doc);
|
---|
| 1242 | // make <serviceList> the body of the responseMessage:
|
---|
| 1243 | // <message><response><serviceList></response></message>
|
---|
| 1244 | Element responseMsg = createResponseMessage(doc, serviceList, null,
|
---|
| 1245 | GSXML.REQUEST_TYPE_DESCRIBE, "");
|
---|
| 1246 | try {
|
---|
[22300] | 1247 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1248 | }catch(TransformerException e) {
|
---|
| 1249 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1250 | + " " + e;
|
---|
| 1251 | }
|
---|
| 1252 | }
|
---|
| 1253 |
|
---|
| 1254 | /** @return a GS3 describe response message listing the collections and
|
---|
| 1255 | * collection-specific metadata stored in the Fedora-Greenstone repository. */
|
---|
| 1256 | public String getCollectionList()
|
---|
| 1257 | {
|
---|
| 1258 | Document doc = builder.newDocument();
|
---|
| 1259 | FedoraGS3RunException ex = null; // any RemoteException
|
---|
| 1260 |
|
---|
| 1261 | // create the <collectionList /> element
|
---|
| 1262 | Element collectionList = doc.createElement(
|
---|
| 1263 | GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1264 | try{
|
---|
| 1265 | String[] collectionNames = this.getCollectionNames(
|
---|
| 1266 | this.getCollections()); // this line could throw RemoteException
|
---|
| 1267 | for(int i = 0; i < collectionNames.length; i++) {
|
---|
| 1268 | // create the <collection name="somename" /> element
|
---|
| 1269 | Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
|
---|
| 1270 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1271 | attribute.setValue(collectionNames[i]);
|
---|
| 1272 | collection.setAttributeNode(attribute);
|
---|
| 1273 |
|
---|
| 1274 | // append the <collection> element as child of <collectionList>
|
---|
| 1275 | collectionList.appendChild(collection);
|
---|
| 1276 |
|
---|
| 1277 | //if(collection.hasAttribute(GSXML.NAME_ATT))
|
---|
| 1278 | //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
|
---|
| 1279 | }
|
---|
| 1280 | } catch(RemoteException e) { // if this happens, perhaps it's because it
|
---|
| 1281 | // can't find Greenstone collections in fedora repository?
|
---|
| 1282 | ex = new FedoraGS3RunException(e);
|
---|
| 1283 | ex.setSpecifics(
|
---|
| 1284 | "greenstone collections in fedora repository");
|
---|
| 1285 | }
|
---|
| 1286 |
|
---|
| 1287 | // make <collectionList> the body of the responseMessage:
|
---|
| 1288 | // <message><response><collectionList></response></message>
|
---|
| 1289 | Element responseMsg = createResponseMessage(doc, collectionList, ex,
|
---|
| 1290 | GSXML.REQUEST_TYPE_DESCRIBE, "");
|
---|
| 1291 | try{
|
---|
[22300] | 1292 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1293 | }catch(TransformerException e) {
|
---|
| 1294 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1295 | + " " + e;
|
---|
| 1296 | }
|
---|
| 1297 | }
|
---|
| 1298 |
|
---|
| 1299 | /** @return a GS3 describe response message for a collection in the
|
---|
| 1300 | * Fedora-Greenstone repository.
|
---|
| 1301 | * @param collectionName - the name of the collection that is to be described.
|
---|
| 1302 | * It will be converted to a fedora collection pid, which is of the form
|
---|
| 1303 | * "greenstone:<collectionName>-collection". */
|
---|
| 1304 | public String describeCollection(String collectionName)
|
---|
| 1305 | {
|
---|
| 1306 | Document doc = builder.newDocument();
|
---|
| 1307 | FedoraGS3RunException ex = null;
|
---|
| 1308 |
|
---|
| 1309 | Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
|
---|
| 1310 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1311 | attribute.setValue(collectionName);
|
---|
| 1312 | collection.setAttributeNode(attribute);
|
---|
| 1313 |
|
---|
| 1314 | //<displayItem assigned="true" lang="en" name="name">
|
---|
| 1315 | //"some display name"</displayItem>
|
---|
| 1316 | Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
|
---|
| 1317 |
|
---|
| 1318 | attribute = doc.createAttribute(GSXML.LANG_ATT);
|
---|
| 1319 | attribute.setValue(this.lang);
|
---|
| 1320 | displayItem.setAttributeNode(attribute);
|
---|
| 1321 |
|
---|
| 1322 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1323 | attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
|
---|
| 1324 | displayItem.setAttributeNode(attribute);
|
---|
| 1325 |
|
---|
| 1326 | try{
|
---|
| 1327 | Text textNode = doc.createTextNode(
|
---|
| 1328 | this.getCollectionTitle(getCollectionPID(collectionName)));
|
---|
| 1329 | displayItem.appendChild(textNode);
|
---|
| 1330 | } catch(Exception e) {
|
---|
| 1331 | // can't find Greenstone collections in fedora repository or problem
|
---|
| 1332 | // getting their titles from their metadata datastream?
|
---|
| 1333 | ex = new FedoraGS3RunException(e);
|
---|
| 1334 | ex.setSpecifics("greenstone collections or their metadata"
|
---|
| 1335 | + "in the fedora repository");
|
---|
| 1336 | }
|
---|
| 1337 | // now append the displayItem element as child of the collection element
|
---|
| 1338 | collection.appendChild(displayItem);
|
---|
| 1339 | // get the <serviceList> and add it into the collection description.
|
---|
| 1340 | // Services for all collections in the FedoraGS3 repository are the
|
---|
| 1341 | // same, offering a ClassifierBrowse to browse titles by starting letter
|
---|
| 1342 | // and DocRetrieve services: Content, Metadata and Structure.
|
---|
| 1343 |
|
---|
| 1344 | Element serviceList = createServiceList(doc);
|
---|
| 1345 | collection.appendChild(serviceList);
|
---|
| 1346 |
|
---|
| 1347 | Element responseMsg = createResponseMessage(doc, collection, ex,
|
---|
| 1348 | GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
|
---|
| 1349 | try{
|
---|
[22300] | 1350 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1351 | }catch(TransformerException e) {
|
---|
| 1352 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1353 | + " " + e;
|
---|
| 1354 | }
|
---|
| 1355 | }
|
---|
| 1356 |
|
---|
| 1357 | /** @return a GS3 describe response message for the services of a collection
|
---|
| 1358 | * in the Fedora-Greenstone repository. So far, these services are the same for
|
---|
| 1359 | * all fedora collections: they are the services given in member variable
|
---|
| 1360 | * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
|
---|
| 1361 | * ClassifierBrowseMetadataRetrieve.
|
---|
[21835] | 1362 | * All collections in this Digital Library (Fedora Repository) share the
|
---|
| 1363 | * same services, so this method returns the same services as getServiceList();
|
---|
[15222] | 1364 | * @param collectionName - the name of the collection whose services are to
|
---|
| 1365 | * be described. It will be converted to a fedora collection pid, which is of
|
---|
| 1366 | * the form "greenstone:<collectionName>-collection". */
|
---|
| 1367 | public String describeCollectionServices(String collectionName)
|
---|
| 1368 | {
|
---|
| 1369 | Document doc = builder.newDocument();
|
---|
| 1370 |
|
---|
| 1371 | Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
|
---|
| 1372 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1373 | attribute.setValue(collectionName);
|
---|
| 1374 | collection.setAttributeNode(attribute);
|
---|
| 1375 |
|
---|
| 1376 | Element serviceList = createServiceList(doc);
|
---|
| 1377 | collection.appendChild(serviceList);
|
---|
| 1378 |
|
---|
| 1379 | Element responseMsg = createResponseMessage(doc, collection, null,
|
---|
| 1380 | GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
|
---|
| 1381 | try{
|
---|
[22300] | 1382 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1383 | }catch(TransformerException e) {
|
---|
| 1384 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1385 | + " " + e;
|
---|
| 1386 | }
|
---|
| 1387 | }
|
---|
| 1388 |
|
---|
| 1389 | /** All collections in this Digital Library (Fedora Repository) share
|
---|
| 1390 | * the same services, so this method returns the same as
|
---|
| 1391 | * describeCollectionService(collName, serviceName).
|
---|
| 1392 | * @return a GS3 describe response message for the requested service
|
---|
| 1393 | * of the given collection. DocumentContent/Metadata/StructureRetrieve
|
---|
| 1394 | * return nothing special except their names; browse (and any query)
|
---|
| 1395 | * return more complex XML responses.
|
---|
| 1396 | * @param serviceName - the name of the service in the collection which is to
|
---|
| 1397 | * be described.*/
|
---|
| 1398 | public String describeService(String serviceName)
|
---|
| 1399 | {
|
---|
| 1400 | // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
|
---|
| 1401 | // we return:
|
---|
| 1402 | // <message><response from="<name>Retrieve" type="describe">
|
---|
| 1403 | // <service name="<name>Retrieve" type="retrieve" /></response></message>
|
---|
| 1404 | // But for browse (and any query) service, we return the data necessary
|
---|
| 1405 | // for displaying it
|
---|
| 1406 |
|
---|
| 1407 | Document doc = this.builder.newDocument();
|
---|
| 1408 | Element service = doc.createElement(GSXML.SERVICE_ELEM);
|
---|
| 1409 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1410 | attribute.setValue(serviceName);
|
---|
| 1411 | service.setAttributeNode(attribute);
|
---|
| 1412 |
|
---|
| 1413 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1414 |
|
---|
[21775] | 1415 | if(serviceName.toLowerCase().endsWith("retrieve")) {
|
---|
[15222] | 1416 | attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
|
---|
[21775] | 1417 | }
|
---|
[15222] | 1418 | else if(serviceName.toLowerCase().contains("browse")) {
|
---|
| 1419 | attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
|
---|
| 1420 |
|
---|
| 1421 | // we need name and description <displayItem> elements
|
---|
| 1422 | Element displayItem
|
---|
| 1423 | = createNameValuePairElement(doc,
|
---|
| 1424 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
|
---|
| 1425 | service.appendChild(displayItem);
|
---|
| 1426 |
|
---|
| 1427 | displayItem = createNameValuePairElement(doc,
|
---|
| 1428 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
|
---|
| 1429 | "Browse pre-defined classification hierarchies");
|
---|
| 1430 | service.appendChild(displayItem);
|
---|
| 1431 |
|
---|
| 1432 | // now need a classifierList
|
---|
| 1433 | Element classifierList = doc.createElement(
|
---|
| 1434 | GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1435 |
|
---|
| 1436 | int classifierNum = 1;
|
---|
| 1437 | // append a <classifier content="some letter" name="CL#">
|
---|
| 1438 | // for each letter of the alphabet:
|
---|
| 1439 | Element classifier = createClassifierElement(doc, "TitleByLetter",
|
---|
[15670] | 1440 | classifierNum++, "titles by letter", "Browse titles by letter");
|
---|
[15222] | 1441 | // now add this <classifier> to the <classifierList>
|
---|
| 1442 | classifierList.appendChild(classifier);
|
---|
| 1443 |
|
---|
| 1444 | // ANY MORE CLASSIFIERS? ADD THEM HERE
|
---|
| 1445 |
|
---|
| 1446 | service.appendChild(classifierList);
|
---|
| 1447 | } // ELSE check for whether it is a query service
|
---|
| 1448 | else if(serviceName.toLowerCase().contains("query")) {
|
---|
| 1449 | attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
|
---|
[21775] | 1450 | if(serviceName.equals("TextQuery")) {
|
---|
[15222] | 1451 | describeTextQueryService(service);
|
---|
[21775] | 1452 | } else if(serviceName.equals("FieldQuery")) {
|
---|
[15222] | 1453 | describeFieldQueryService(service);
|
---|
[21775] | 1454 | }
|
---|
[15222] | 1455 | }
|
---|
| 1456 |
|
---|
| 1457 | // don't forget to add the type attribute to the service!
|
---|
| 1458 | service.setAttributeNode(attribute);
|
---|
| 1459 |
|
---|
| 1460 | String from = serviceName;
|
---|
| 1461 |
|
---|
| 1462 | Element responseMsg = createResponseMessage(doc, service, null,
|
---|
| 1463 | GSXML.REQUEST_TYPE_DESCRIBE, from);
|
---|
| 1464 | try{
|
---|
[22300] | 1465 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 1466 | }catch(TransformerException e) {
|
---|
| 1467 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1468 | + " " + e;
|
---|
| 1469 | }
|
---|
| 1470 | }
|
---|
| 1471 |
|
---|
| 1472 | /** Appends children to the parameter service Element that make the
|
---|
| 1473 | * final service Element into a describe response XML for FedoraGS3's
|
---|
| 1474 | * TextQuery service.
|
---|
| 1475 | * @param service is the service Element that is being filled out. */
|
---|
| 1476 | protected void describeTextQueryService(Element service) {
|
---|
| 1477 | Document doc = service.getOwnerDocument();
|
---|
| 1478 | // we need name, submit (button) and description <displayItem> elements
|
---|
| 1479 | Element displayItem = createNameValuePairElement(doc,
|
---|
| 1480 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1481 | "Text Search");
|
---|
| 1482 | service.appendChild(displayItem);
|
---|
| 1483 |
|
---|
| 1484 | displayItem = createNameValuePairElement(doc,
|
---|
| 1485 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
|
---|
| 1486 | service.appendChild(displayItem);
|
---|
| 1487 |
|
---|
| 1488 | displayItem = createNameValuePairElement(doc,
|
---|
| 1489 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
|
---|
| 1490 | "Title and full-text search service");
|
---|
| 1491 | service.appendChild(displayItem);
|
---|
| 1492 |
|
---|
| 1493 | //create the <paramList>
|
---|
| 1494 | Element paramList = doc.createElement(
|
---|
| 1495 | GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1496 |
|
---|
| 1497 | // we ignore granularity to search at: it will always be
|
---|
| 1498 | // document and section level
|
---|
| 1499 | // we ignore casefolding: always on (that is, case is irrelevant)
|
---|
| 1500 | // we ignore document display order: always ranked
|
---|
| 1501 |
|
---|
| 1502 | // Constructing the following:
|
---|
| 1503 | // <param default="100" name="maxDocs" type="integer">
|
---|
| 1504 | // <displayItem name="name">Maximum hits to return</displayItem>
|
---|
| 1505 | // </param>
|
---|
| 1506 | Element param = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1507 |
|
---|
| 1508 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1509 | attribute.setValue(MAXDOCS);
|
---|
| 1510 | param.setAttributeNode(attribute);
|
---|
| 1511 |
|
---|
| 1512 | attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
|
---|
| 1513 | attribute.setValue("100");
|
---|
| 1514 | param.setAttributeNode(attribute);
|
---|
| 1515 |
|
---|
| 1516 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1517 | attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
|
---|
| 1518 | param.setAttributeNode(attribute);
|
---|
| 1519 |
|
---|
| 1520 | displayItem = createNameValuePairElement(doc,
|
---|
| 1521 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1522 | "Maximum hits to return");
|
---|
| 1523 | param.appendChild(displayItem);
|
---|
| 1524 |
|
---|
| 1525 | paramList.appendChild(param);
|
---|
| 1526 |
|
---|
| 1527 | // Constructing the following:
|
---|
| 1528 | // <param name="query" type="string">
|
---|
| 1529 | // <displayItem name="name">Query string</displayItem>
|
---|
| 1530 | // </param>
|
---|
| 1531 | param = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1532 |
|
---|
| 1533 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1534 | attribute.setValue(QUERY);
|
---|
| 1535 | param.setAttributeNode(attribute);
|
---|
| 1536 |
|
---|
| 1537 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1538 | attribute.setValue(GSXML.PARAM_TYPE_STRING);
|
---|
| 1539 | param.setAttributeNode(attribute);
|
---|
| 1540 |
|
---|
| 1541 | displayItem = createNameValuePairElement(doc,
|
---|
| 1542 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1543 | "Query string");
|
---|
| 1544 | param.appendChild(displayItem);
|
---|
| 1545 |
|
---|
| 1546 | paramList.appendChild(param);
|
---|
| 1547 |
|
---|
| 1548 | service.appendChild(paramList);
|
---|
| 1549 | }
|
---|
| 1550 |
|
---|
| 1551 | /** Appends children to the parameter service Element that make the
|
---|
| 1552 | * final service Element into a describe response XML for FedoraGS3's
|
---|
| 1553 | * FieldQuery service.
|
---|
| 1554 | * @param service is the service Element that is being filled out. */
|
---|
| 1555 | protected void describeFieldQueryService(Element service) {
|
---|
| 1556 | Document doc = service.getOwnerDocument();
|
---|
| 1557 | // we need name, submit (button) and description <displayItem> elements
|
---|
| 1558 | Element displayItem = createNameValuePairElement(doc,
|
---|
| 1559 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1560 | "Form Search");
|
---|
| 1561 | service.appendChild(displayItem);
|
---|
| 1562 |
|
---|
| 1563 | displayItem = createNameValuePairElement(doc,
|
---|
| 1564 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
|
---|
| 1565 | service.appendChild(displayItem);
|
---|
| 1566 |
|
---|
| 1567 | displayItem = createNameValuePairElement(doc,
|
---|
| 1568 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
|
---|
| 1569 | "Simple fielded search");
|
---|
| 1570 | service.appendChild(displayItem);
|
---|
| 1571 |
|
---|
| 1572 | //create the <paramList>
|
---|
| 1573 | Element paramList = doc.createElement(
|
---|
| 1574 | GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1575 |
|
---|
| 1576 | // we ignore granularity to search at: it will always be
|
---|
| 1577 | // document and section level
|
---|
| 1578 | // we ignore casefolding: always on (that is, case is irrelevant)
|
---|
| 1579 | // we ignore document display order: always ranked
|
---|
| 1580 |
|
---|
| 1581 | // Constructing the following:
|
---|
| 1582 | // <param default="100" name="maxDocs" type="integer">
|
---|
| 1583 | // <displayItem name="name">Maximum hits to return</displayItem>
|
---|
| 1584 | // </param>
|
---|
| 1585 | Element param = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1586 |
|
---|
| 1587 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1588 | attribute.setValue(MAXDOCS);
|
---|
| 1589 | param.setAttributeNode(attribute);
|
---|
| 1590 |
|
---|
| 1591 | attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
|
---|
| 1592 | attribute.setValue("100");
|
---|
| 1593 | param.setAttributeNode(attribute);
|
---|
| 1594 |
|
---|
| 1595 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1596 | attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
|
---|
| 1597 | param.setAttributeNode(attribute);
|
---|
| 1598 |
|
---|
| 1599 | displayItem = createNameValuePairElement(doc,
|
---|
| 1600 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1601 | "Maximum hits to return");
|
---|
| 1602 | param.appendChild(displayItem);
|
---|
| 1603 |
|
---|
| 1604 | paramList.appendChild(param);
|
---|
| 1605 |
|
---|
| 1606 | // Constructing the following:
|
---|
| 1607 | // <param name="simpleField" occurs="4" type="multi">
|
---|
| 1608 | // <displayItem name="name"></displayItem>
|
---|
| 1609 | //
|
---|
| 1610 | // <param name="query" type="string">
|
---|
| 1611 | // <displayItem name="name">Word or phrase </displayItem>
|
---|
| 1612 | // </param>
|
---|
| 1613 | //
|
---|
| 1614 | // <param default="allFields" name="fieldname" type="enum_single">
|
---|
| 1615 | // <displayItem name="name">in field</displayItem>
|
---|
| 1616 | //
|
---|
| 1617 | // <option name="docTitles">
|
---|
| 1618 | // <displayItem name="name">document titles</displayItem>
|
---|
| 1619 | // </option>
|
---|
| 1620 | // <option name="allTitles">
|
---|
| 1621 | // <displayItem name="name">document and section titles</displayItem>
|
---|
| 1622 | // </option>
|
---|
| 1623 | // <option name="fullText">
|
---|
| 1624 | // <displayItem name="name">full text</displayItem>
|
---|
| 1625 | // </option>
|
---|
| 1626 | // <option name="all">
|
---|
| 1627 | // <displayItem name="name">titles and full text</displayItem>
|
---|
| 1628 | // </option>
|
---|
| 1629 | // <option name="">
|
---|
| 1630 | // <displayItem name="name"></displayItem>
|
---|
| 1631 | // </option>
|
---|
| 1632 | // </param>
|
---|
| 1633 | // </param>
|
---|
| 1634 | Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1635 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1636 | attribute.setValue(SIMPLEFIELD_ATT);
|
---|
| 1637 | rowOfParams.setAttributeNode(attribute);
|
---|
| 1638 |
|
---|
| 1639 | // we want the row of controls to occur multiple times
|
---|
| 1640 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1641 | attribute.setValue(GSXML.PARAM_TYPE_MULTI);
|
---|
| 1642 | rowOfParams.setAttributeNode(attribute);
|
---|
| 1643 |
|
---|
| 1644 | attribute = doc.createAttribute(OCCURS_ATT);
|
---|
| 1645 | attribute.setValue("4"); // we want this row to occur 4 times
|
---|
| 1646 | rowOfParams.setAttributeNode(attribute);
|
---|
| 1647 |
|
---|
| 1648 | // <param name="query" type="string">
|
---|
| 1649 | // <displayItem name="name">Word or phrase </displayItem>
|
---|
| 1650 | // </param>
|
---|
| 1651 | param = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1652 |
|
---|
| 1653 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1654 | attribute.setValue(QUERY);
|
---|
| 1655 | param.setAttributeNode(attribute);
|
---|
| 1656 |
|
---|
| 1657 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1658 | attribute.setValue(GSXML.PARAM_TYPE_STRING);
|
---|
| 1659 | param.setAttributeNode(attribute);
|
---|
| 1660 |
|
---|
| 1661 | displayItem = createNameValuePairElement(doc,
|
---|
| 1662 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1663 | "Word or phrase");
|
---|
| 1664 | param.appendChild(displayItem);
|
---|
| 1665 | rowOfParams.appendChild(param);
|
---|
| 1666 |
|
---|
| 1667 | // <param default="allFields" name="fieldName" type="enum_single">
|
---|
| 1668 | // <displayItem name="name">in field</displayItem>
|
---|
| 1669 | param = doc.createElement(GSXML.PARAM_ELEM);
|
---|
| 1670 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1671 | attribute.setValue(FIELDNAME_ATT);
|
---|
| 1672 | param.setAttributeNode(attribute);
|
---|
| 1673 |
|
---|
| 1674 | attribute = doc.createAttribute(GSXML.TYPE_ATT);
|
---|
| 1675 | attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
|
---|
| 1676 | param.setAttributeNode(attribute);
|
---|
| 1677 |
|
---|
| 1678 | attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
|
---|
| 1679 | attribute.setValue(ALL_FIELDS);
|
---|
| 1680 | param.setAttributeNode(attribute);
|
---|
| 1681 |
|
---|
| 1682 | displayItem = createNameValuePairElement(doc,
|
---|
| 1683 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1684 | "in field");
|
---|
| 1685 | param.appendChild(displayItem);
|
---|
| 1686 |
|
---|
| 1687 | String[] searchFieldNames
|
---|
| 1688 | = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
|
---|
| 1689 | String[] searchFieldDisplay = {"all titles and full-text",
|
---|
| 1690 | "document titles only", "document and section titles",
|
---|
| 1691 | "full-text only"};
|
---|
| 1692 |
|
---|
| 1693 | // for each fieldName create an option element and insert
|
---|
| 1694 | // the option into the enum_multi drop-down param:
|
---|
| 1695 | // <option name="fieldName">
|
---|
| 1696 | // <displayItem name="name">fieldName</displayItem>
|
---|
| 1697 | // </option>
|
---|
| 1698 | for(int i = 0; i < searchFieldNames.length; i++) {
|
---|
| 1699 | Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
|
---|
| 1700 | attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 1701 | attribute.setValue(searchFieldNames[i]);
|
---|
| 1702 | option.setAttributeNode(attribute);
|
---|
| 1703 |
|
---|
| 1704 | displayItem = createNameValuePairElement(doc,
|
---|
| 1705 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
|
---|
| 1706 | searchFieldDisplay[i]);
|
---|
| 1707 | option.appendChild(displayItem);
|
---|
| 1708 | param.appendChild(option); // add option to the drop-down box
|
---|
| 1709 | }
|
---|
| 1710 |
|
---|
| 1711 | rowOfParams.appendChild(param);
|
---|
| 1712 | paramList.appendChild(rowOfParams);
|
---|
| 1713 | service.appendChild(paramList);
|
---|
| 1714 | }
|
---|
| 1715 |
|
---|
| 1716 | /**
|
---|
| 1717 | * @return a GS3 describe response message for the requested service
|
---|
| 1718 | * of the given collection. DocumentContent/Metadata/StructureRetrieve
|
---|
| 1719 | * return nothing special except their names; browse (and any query)
|
---|
| 1720 | * return more complex XML responses.
|
---|
| 1721 | * All collections in this Digital Library (Fedora Repository) share
|
---|
| 1722 | * the same services, so this method returns the same as
|
---|
| 1723 | * describeService(serviceName).
|
---|
| 1724 | * @param collectionName - the name of the collection whose service is to
|
---|
| 1725 | * be described. It will be converted to a fedora collection pid, which is of
|
---|
| 1726 | * the form "greenstone:<collectionName>-collection".
|
---|
| 1727 | * @param serviceName - the name of the service in the collection which is to
|
---|
| 1728 | * be described. */
|
---|
| 1729 | public String describeCollectionService(String collectionName,
|
---|
| 1730 | String serviceName) {
|
---|
| 1731 | // collectionName can be ignored, because all services are FedoraGS3
|
---|
| 1732 | // services and are not unique to any particular (greenstone) collection.
|
---|
| 1733 | return describeService(serviceName);
|
---|
| 1734 | }
|
---|
| 1735 |
|
---|
| 1736 | /** This method performs the implemented browse operation: allowing the
|
---|
| 1737 | * user to browse the titles of documents in the given collection by letter
|
---|
| 1738 | * and returning the results.
|
---|
[22300] | 1739 | * @param collectionName is the name of the collection whose documents
|
---|
| 1740 | * starting with the given letter will be returned.
|
---|
[21835] | 1741 | * @param classifierIDs are the ids of the classifiers on which to browse. In
|
---|
[15222] | 1742 | * this case, the classifier indicates whether we browse titles by letter, or
|
---|
| 1743 | * browse (documents) by collection; and it is of the form <CL(letter)>.
|
---|
[22300] | 1744 | * @param structures - the requested browse substructure. Can be any combination
|
---|
| 1745 | * of ancestors, parent, siblings, children, descendants.
|
---|
| 1746 | * @param infos - the requested structural info. Can be numSiblings,
|
---|
| 1747 | * siblingPosition, numChildren.
|
---|
| 1748 | * @return a GS3 ClassifierBrowse response message which lists all
|
---|
[15222] | 1749 | * the documents that start with the letter indicated by parameter classifier.
|
---|
| 1750 | */
|
---|
[22300] | 1751 | public String browse(String collectionName, String[] classifierIDs,
|
---|
| 1752 | String[] structures, String[] infos)
|
---|
[15222] | 1753 | {
|
---|
[22300] | 1754 | // Construct one string from the structures and structural info arrays
|
---|
| 1755 | String structure = "";
|
---|
| 1756 | String info = "";
|
---|
| 1757 | for(int i = 0; i < structures.length; i++) {
|
---|
| 1758 | structure = structure + structures[i] + "|";
|
---|
| 1759 | }
|
---|
| 1760 | for(int i = 0; i < infos.length; i++) {
|
---|
| 1761 | info = info + infos[i] + "|";
|
---|
| 1762 | }
|
---|
| 1763 |
|
---|
| 1764 | Document doc = builder.newDocument();
|
---|
| 1765 | FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
|
---|
| 1766 |
|
---|
| 1767 | // <classifierNodeList>
|
---|
| 1768 | Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 1769 |
|
---|
| 1770 | for(int i = 0; i < classifierIDs.length; i++) {
|
---|
| 1771 | if(classifierIDs[i].startsWith("CL1")) { // browse by titles
|
---|
| 1772 | browseTitlesByLetterClassifier(doc, classifierNodeList,
|
---|
| 1773 | collectionName, classifierIDs[i],
|
---|
| 1774 | structure, info);
|
---|
| 1775 | }
|
---|
| 1776 | }
|
---|
| 1777 |
|
---|
| 1778 | Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
|
---|
| 1779 | GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
|
---|
| 1780 | try {
|
---|
| 1781 | return FedoraCommons.elementToString(responseMsg);
|
---|
| 1782 | } catch(TransformerException e) {
|
---|
| 1783 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 1784 | + " " + e;
|
---|
| 1785 | }
|
---|
| 1786 | }
|
---|
| 1787 |
|
---|
| 1788 | /** CL1 browsing classifier: browsing titles by starting letter.
|
---|
| 1789 | * The browsing structure is retrieved.
|
---|
| 1790 | * @param doc - the document object that will contain the CL1 browsing structure.
|
---|
| 1791 | * @param classifierNodeList - the classifiers will be added to this nodeList.
|
---|
| 1792 | * @param collectionName - name of the collection through which we are browsing CL1.
|
---|
| 1793 | * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
|
---|
| 1794 | * a letter.
|
---|
[22308] | 1795 | * @param structure - the requested browse substructure. Can be any combination of
|
---|
| 1796 | * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
|
---|
[22300] | 1797 | * @param info - the requested structural info. Can be numSiblings, siblingPosition,
|
---|
| 1798 | * numChildren.
|
---|
| 1799 | * @return the classifierNodeList with the CL1 classifier browse structure.
|
---|
| 1800 | */
|
---|
| 1801 | public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
|
---|
| 1802 | String collectionName, String classifierID,
|
---|
| 1803 | String structure, String info)
|
---|
| 1804 | {
|
---|
| 1805 | FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
|
---|
| 1806 |
|
---|
| 1807 | if(structure.indexOf("entire") != -1) {
|
---|
| 1808 | structure = structure + "ancestors|descendants";
|
---|
| 1809 | }
|
---|
| 1810 |
|
---|
| 1811 | // Structure of ancestors and children only at this stage
|
---|
| 1812 | int firstLevel = classifierID.indexOf('.');
|
---|
| 1813 | int secondLevel = classifierID.lastIndexOf('.');
|
---|
| 1814 |
|
---|
| 1815 | // <nodeStructure>
|
---|
| 1816 | Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
|
---|
| 1817 |
|
---|
| 1818 | // requested classifier node
|
---|
| 1819 | Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
|
---|
| 1820 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1821 | attribute.setValue(classifierID);
|
---|
| 1822 | classNode.setAttributeNode(attribute);
|
---|
[26262] | 1823 | Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
|
---|
| 1824 | typeAttribute.setValue(GSXML.VLIST);
|
---|
| 1825 | classNode.setAttributeNode(typeAttribute);
|
---|
[22300] | 1826 |
|
---|
| 1827 | if(firstLevel == -1) { // CL1 - toplevel node
|
---|
| 1828 | Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
|
---|
| 1829 |
|
---|
| 1830 | classifierNodeList.appendChild(classNode);
|
---|
| 1831 | classNode.appendChild(nodeStructure);
|
---|
| 1832 |
|
---|
| 1833 | nodeStructure.appendChild(root);
|
---|
| 1834 | if(structure.indexOf("descendants") != -1) {
|
---|
[22308] | 1835 | getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
|
---|
[22300] | 1836 | } else if(structure.indexOf("children") != -1) {
|
---|
[22308] | 1837 | getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
|
---|
[22300] | 1838 | }
|
---|
[22308] | 1839 | // nothing to be done for siblings
|
---|
[22300] | 1840 | }
|
---|
| 1841 | else if(firstLevel == secondLevel) { // CL1.x, where x is a number
|
---|
| 1842 |
|
---|
[22308] | 1843 | if(structure.indexOf("parent") != -1
|
---|
| 1844 | || structure.indexOf("ancestors") != -1
|
---|
| 1845 | || structure.indexOf("siblings") != -1) {
|
---|
[22300] | 1846 | String toplevelID = classifierID.substring(0, firstLevel);
|
---|
| 1847 | Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
|
---|
| 1848 | attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1849 | attribute.setValue(toplevelID);
|
---|
| 1850 | toplevelNode.setAttributeNode(attribute);
|
---|
[26262] | 1851 | typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
|
---|
| 1852 | typeAttribute.setValue(GSXML.VLIST);
|
---|
| 1853 | toplevelNode.setAttributeNode(typeAttribute);
|
---|
[22300] | 1854 | Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
|
---|
| 1855 |
|
---|
| 1856 | classifierNodeList.appendChild(toplevelNode);
|
---|
| 1857 | toplevelNode.appendChild(nodeStructure);
|
---|
| 1858 | nodeStructure.appendChild(node);
|
---|
[22308] | 1859 |
|
---|
| 1860 | if(structure.indexOf("siblings") != -1) { // get the children of the parents too
|
---|
| 1861 | getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
|
---|
| 1862 | // pass the requested node (classNode) so that it is attached in the correct
|
---|
| 1863 | // location among its siblings, and to ensure that it is not recreated.
|
---|
| 1864 | // getTitlesByLetterStructure() will append classNode to node
|
---|
| 1865 | } else {
|
---|
| 1866 | node.appendChild(classNode);
|
---|
| 1867 | }
|
---|
[22300] | 1868 | } else {
|
---|
| 1869 | Element node = (Element)classNode.cloneNode(true);
|
---|
| 1870 | classifierNodeList.appendChild(node);
|
---|
| 1871 | node.appendChild(nodeStructure);
|
---|
| 1872 | nodeStructure.appendChild(classNode);
|
---|
| 1873 | }
|
---|
| 1874 |
|
---|
| 1875 | int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
|
---|
| 1876 | char ch = (char)(num - 1 + 'A');
|
---|
| 1877 | if(structure.indexOf("descendants") != -1) {
|
---|
| 1878 | getTitlesForLetter(ch, collectionName, classNode, "descendants");
|
---|
| 1879 | } else if(structure.indexOf("children") != -1) {
|
---|
| 1880 | getTitlesForLetter(ch, collectionName, classNode, "children");
|
---|
| 1881 | }
|
---|
| 1882 | }
|
---|
| 1883 | else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
|
---|
| 1884 | LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
|
---|
| 1885 | }
|
---|
| 1886 |
|
---|
| 1887 | return classifierNodeList;
|
---|
| 1888 | }
|
---|
| 1889 |
|
---|
| 1890 | /** Creates a (CL1) subclassifier element for the docs whose titles start with
|
---|
| 1891 | * the given letter.
|
---|
| 1892 | * @param ch - the starting letter of the document titles to retrieve.
|
---|
| 1893 | * @param collectionName - name of the collection through which we are browsing CL1.
|
---|
| 1894 | * @param classifierNode - the docNodes found will be appended to this node.
|
---|
| 1895 | * @param depthStructure - can be descendants or children. Specifies what to retrieve:
|
---|
| 1896 | * gets descendants of any documents found, otherwise gets just the children.
|
---|
| 1897 | * @return the given classifierNode which will have the child (or descendant) documents
|
---|
| 1898 | * appended to it.
|
---|
| 1899 | */
|
---|
| 1900 | public Element getTitlesForLetter(char ch, String collectionName,
|
---|
| 1901 | Element classifierNode, String depthStructure)
|
---|
| 1902 | {
|
---|
| 1903 | Document doc = classifierNode.getOwnerDocument();
|
---|
| 1904 | FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
|
---|
| 1905 |
|
---|
| 1906 |
|
---|
| 1907 | // Retrieve the document structure for each subClassifierID:
|
---|
| 1908 | // all the documents that begin with its letter.
|
---|
| 1909 | String letter = String.valueOf(ch);
|
---|
| 1910 | try {
|
---|
| 1911 | String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
|
---|
| 1912 | if(docPIDs.length == 0) {
|
---|
| 1913 | return classifierNode; // skip letters that don't have any kids
|
---|
| 1914 | }
|
---|
| 1915 |
|
---|
| 1916 | for(int i = 0; i < docPIDs.length; i++) {
|
---|
| 1917 | // work out the document's fedora PID and section ID
|
---|
| 1918 | String sectionID = getSectionIDFromDocID(docPIDs[i]);
|
---|
| 1919 | String docPID = getDocPIDFromDocID(docPIDs[i]);
|
---|
[15222] | 1920 |
|
---|
[22300] | 1921 | // get the required section, along with children or descendants
|
---|
| 1922 | Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
|
---|
[15222] | 1923 |
|
---|
[22300] | 1924 | // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
|
---|
| 1925 | Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
|
---|
| 1926 |
|
---|
| 1927 | // fills in the subtree of the rootNode in our nodeStructure element
|
---|
| 1928 | createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
|
---|
| 1929 | classifierNode.appendChild(docRootNode);
|
---|
| 1930 | }
|
---|
| 1931 | } catch(Exception e) {
|
---|
| 1932 | ex = new FedoraGS3RunException(e);
|
---|
| 1933 | ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
|
---|
| 1934 | }
|
---|
| 1935 |
|
---|
| 1936 | return classifierNode;
|
---|
| 1937 | }
|
---|
| 1938 |
|
---|
| 1939 |
|
---|
| 1940 | /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
|
---|
| 1941 | * starting letter of the alphabet. X is each letter of the alphabet for which there
|
---|
| 1942 | * are matching document titles.
|
---|
| 1943 | * @param collectionName - name of the collection through which we are browsing CL1.
|
---|
| 1944 | * @param classifierNode - the docNodes found will be appended to this node.
|
---|
| 1945 | * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
|
---|
| 1946 | * the IDs for the subclassifiers (CL.x).
|
---|
| 1947 | * @param getDescendants - if true, get descendants of any documents found, otherwise
|
---|
| 1948 | * get just the children.
|
---|
[22308] | 1949 | * @param wantedSibling - the node (already created) whose siblings are requested. We
|
---|
| 1950 | * need to make sure not to recreate this node when creating its sibling nodes.
|
---|
[22300] | 1951 | * @return the given classifierNode, with the CL.x subclassifiers for the letters of
|
---|
| 1952 | * the alphabet that are represented in the document titles.
|
---|
| 1953 | */
|
---|
| 1954 | public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
|
---|
[22308] | 1955 | String classifierID, boolean getDescendants,
|
---|
| 1956 | Element wantedSibling)
|
---|
| 1957 | {
|
---|
| 1958 | String ID = "";
|
---|
| 1959 | if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
|
---|
| 1960 | ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1961 | }
|
---|
| 1962 |
|
---|
[22300] | 1963 | Document doc = classifierNode.getOwnerDocument();
|
---|
| 1964 | FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
|
---|
| 1965 |
|
---|
| 1966 | // We're going to loop to the end of the alphabet
|
---|
| 1967 | int count = 1;
|
---|
| 1968 | for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
|
---|
| 1969 | // Retrieve the document structure for each subClassifierID:
|
---|
| 1970 | // all the documents that begin with its letter.
|
---|
| 1971 | String letter = String.valueOf(ch);
|
---|
| 1972 | try {
|
---|
| 1973 | String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
|
---|
| 1974 | if(docPIDs.length == 0) {
|
---|
| 1975 | continue; // skip letters that don't have any kids
|
---|
| 1976 | }
|
---|
[22308] | 1977 | Element subClassifier = null;
|
---|
| 1978 | if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
|
---|
| 1979 | // already have the requested node, don't recreate it
|
---|
| 1980 | subClassifier = wantedSibling;
|
---|
| 1981 | } else {
|
---|
[26262] | 1982 | // <classifierNode childType="VList" nodeID="CL1.x">
|
---|
[22308] | 1983 | subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
|
---|
[26262] | 1984 | Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
|
---|
| 1985 | typeAttribute.setValue(GSXML.VLIST);
|
---|
| 1986 | subClassifier.setAttributeNode(typeAttribute);
|
---|
[22308] | 1987 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 1988 | attribute.setValue(classifierID+"."+count);
|
---|
| 1989 | subClassifier.setAttributeNode(attribute);
|
---|
| 1990 | }
|
---|
| 1991 | classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
|
---|
[22300] | 1992 |
|
---|
| 1993 | if(getDescendants) { // get the documents
|
---|
| 1994 |
|
---|
| 1995 | // append the <docNodes> for the docPIDs found as children
|
---|
| 1996 | // of subclassifier
|
---|
| 1997 |
|
---|
| 1998 | for(int i = 0; i < docPIDs.length; i++) {
|
---|
| 1999 | // work out the document's fedora PID and section ID
|
---|
| 2000 | String sectionID = getSectionIDFromDocID(docPIDs[i]);
|
---|
| 2001 | String docPID = getDocPIDFromDocID(docPIDs[i]);
|
---|
[15222] | 2002 |
|
---|
[22300] | 2003 | // get the required section, along with children or descendants
|
---|
| 2004 | Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
|
---|
| 2005 |
|
---|
| 2006 | // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
|
---|
| 2007 | Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
|
---|
| 2008 |
|
---|
| 2009 | // fills in the subtree of the rootNode in our nodeStructure element
|
---|
| 2010 | createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
|
---|
| 2011 | subClassifier.appendChild(rootNode);
|
---|
| 2012 | }
|
---|
[21863] | 2013 | }
|
---|
[22300] | 2014 | } catch(Exception e) {
|
---|
| 2015 | ex = new FedoraGS3RunException(e);
|
---|
| 2016 | ex.setSpecifics("requested portion of TOC file or "
|
---|
| 2017 | + "trouble with fielded search ");
|
---|
| 2018 | }
|
---|
[21835] | 2019 | }
|
---|
[22300] | 2020 | return classifierNode;
|
---|
| 2021 | }
|
---|
| 2022 |
|
---|
[15222] | 2023 |
|
---|
| 2024 | /** This method performs something equivalent to a greenstone3
|
---|
| 2025 | * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
|
---|
[22300] | 2026 | * @param classNodeIDs array of classifierNode IDs for which the metadata
|
---|
[15222] | 2027 | * needs to be returned.
|
---|
[22300] | 2028 | * @param metafields are the classifier metadata fields that are to be returned.
|
---|
| 2029 | * At present this method ignores them/pretends the requested metafields are
|
---|
| 2030 | * "all" and always returns the Title meta for the requested classifier nodes
|
---|
| 2031 | * (because that is all the metadata this Fedora classifier has at present).
|
---|
[15222] | 2032 | * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
|
---|
| 2033 | * lists the metadata for all the classifierNodes passed as parameter.*/
|
---|
[22300] | 2034 | public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
|
---|
[15222] | 2035 | {
|
---|
| 2036 | Document doc = this.builder.newDocument();
|
---|
| 2037 | // <classifierNodeList>
|
---|
| 2038 | Element classifierNodeList = doc.createElement(
|
---|
| 2039 | GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 2040 |
|
---|
| 2041 | // create <classifierNode><metadataList><metadata>s
|
---|
[21864] | 2042 | // </metadataList></classifierNode> for all letters of the alphabet
|
---|
[15222] | 2043 | for(int i = 0; i < classNodeIDs.length; i++) {
|
---|
| 2044 | // strip ID of everything before the first '.' (i.e. remove "CL#.")
|
---|
| 2045 | int index = classNodeIDs[i].indexOf('.');
|
---|
| 2046 | String subClassifierNumber = classNodeIDs[i].substring(index+1);
|
---|
[21864] | 2047 | index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
|
---|
| 2048 | if(index != -1) {
|
---|
| 2049 | subClassifierNumber = subClassifierNumber.substring(0, index);
|
---|
| 2050 | }
|
---|
[15222] | 2051 | int subClassifierNum = Integer.parseInt(subClassifierNumber);
|
---|
[15672] | 2052 | String classifierName = "";
|
---|
| 2053 | if(subClassifierNum == 0) { // no document titles started with a letter
|
---|
| 2054 | classifierName = "A-Z";
|
---|
| 2055 | } else {
|
---|
| 2056 | char letter = (char)('A' + subClassifierNum - 1); // A = 1
|
---|
| 2057 | classifierName = String.valueOf(letter);
|
---|
| 2058 | }
|
---|
[15222] | 2059 |
|
---|
| 2060 | // <classifierNode nodeID="CL#.subNum">
|
---|
| 2061 | Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
|
---|
| 2062 | Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 2063 | attribute.setValue(classNodeIDs[i]);
|
---|
| 2064 | classifierNode.setAttributeNode(attribute);
|
---|
| 2065 |
|
---|
| 2066 | // <metadataList>
|
---|
| 2067 | Element metadataList = doc.createElement(
|
---|
| 2068 | GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 2069 |
|
---|
| 2070 | // at least one metadata element: that of the title of this
|
---|
| 2071 | // classifierNode:
|
---|
| 2072 | // <metadata name="Title">letter</metadata>
|
---|
| 2073 | Element metadata = this.createNameValuePairElement(doc,
|
---|
[15672] | 2074 | GSXML.METADATA_ELEM, "Title", classifierName);
|
---|
[15222] | 2075 |
|
---|
| 2076 | // now connect up everything
|
---|
| 2077 | metadataList.appendChild(metadata);
|
---|
| 2078 | classifierNode.appendChild(metadataList);
|
---|
| 2079 | classifierNodeList.appendChild(classifierNode);
|
---|
| 2080 | }
|
---|
| 2081 |
|
---|
| 2082 | Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
|
---|
| 2083 | GSXML.REQUEST_TYPE_PROCESS, //collName +
|
---|
| 2084 | "ClassifierBrowseMetadataRetrieve");
|
---|
| 2085 | try{
|
---|
[22300] | 2086 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 2087 | }catch(TransformerException e) {
|
---|
| 2088 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 2089 | + " " + e;
|
---|
| 2090 | }
|
---|
| 2091 | }
|
---|
| 2092 |
|
---|
| 2093 | /** @return a newly created element of the following format:
|
---|
| 2094 | * <classifier content="somecontent" name="CL+num">
|
---|
| 2095 | * <displayItem name="name">someClassifierName</displayItem>
|
---|
| 2096 | * <displayItem name="description">Browse by classifier name</displayItem>
|
---|
| 2097 | * </classifier>
|
---|
| 2098 | * @param doc - the document used to create the element
|
---|
| 2099 | * @param content - value of the content attribute
|
---|
| 2100 | * @param classifierNum - the number suffixed to the CL, together forming
|
---|
| 2101 | * the classifier Node's ID
|
---|
| 2102 | * @param displayNameVal is the bodytext of a named displayItem element
|
---|
| 2103 | * @param displayDescrVal is the bodytext of a displayItem element with
|
---|
| 2104 | * description */
|
---|
| 2105 | protected Element createClassifierElement(Document doc, String content,
|
---|
| 2106 | int classifierNum, String displayNameVal, String displayDescrVal)
|
---|
| 2107 | {
|
---|
| 2108 | final String CL = "CL";
|
---|
| 2109 | Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
|
---|
| 2110 | // content attribute
|
---|
| 2111 | Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
|
---|
| 2112 | att.setValue(content);
|
---|
| 2113 | classifier.setAttributeNode(att);
|
---|
| 2114 | // name attribute
|
---|
| 2115 | att = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 2116 | att.setValue(CL + classifierNum);
|
---|
| 2117 | classifier.setAttributeNode(att);
|
---|
| 2118 |
|
---|
| 2119 | // now create the displayItem children for classifier:
|
---|
| 2120 | // <displayItem name="name">#letter</displayItem>
|
---|
[22300] | 2121 | // <displayItem name="description">Browse titles starting with #letter</displayItem>
|
---|
[15222] | 2122 | Element displayItem = createNameValuePairElement(doc,
|
---|
| 2123 | GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
|
---|
| 2124 | classifier.appendChild(displayItem);
|
---|
| 2125 | displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
|
---|
| 2126 | GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
|
---|
| 2127 | classifier.appendChild(displayItem);
|
---|
| 2128 |
|
---|
| 2129 | return classifier;
|
---|
| 2130 | }
|
---|
| 2131 |
|
---|
| 2132 |
|
---|
| 2133 | /** @return a newly created element of the following format:
|
---|
| 2134 | * <elementName name="somename">"some display value"</elementName>
|
---|
| 2135 | * @param doc - the document used to create the element
|
---|
| 2136 | * @param elementName - the tag name
|
---|
| 2137 | * @param name - value of attribute name
|
---|
| 2138 | * @param value - the body text of the element */
|
---|
| 2139 | protected Element createNameValuePairElement(Document doc, String elementName,
|
---|
| 2140 | String name, String value) {
|
---|
| 2141 | // <elementName name="somename">"some display value"</elementName>
|
---|
| 2142 | Element element = doc.createElement(elementName);
|
---|
| 2143 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 2144 | attribute.setValue(name);
|
---|
| 2145 | element.setAttributeNode(attribute);
|
---|
| 2146 |
|
---|
| 2147 | element.appendChild(doc.createTextNode(value));
|
---|
| 2148 | return element;
|
---|
| 2149 | }
|
---|
| 2150 |
|
---|
| 2151 | /**
|
---|
| 2152 | * @param collection is the collection to search in
|
---|
| 2153 | * @param query is the query term to search for. It won't specify the
|
---|
| 2154 | * indexed field to search in, which will mean that GSearch will
|
---|
| 2155 | * search all default indexed fields.
|
---|
| 2156 | * @param maxDocs is the maximum number of results to return (which
|
---|
| 2157 | * at present we consider equivalent to FedoraGSearch's hitpageSize).
|
---|
| 2158 | */
|
---|
| 2159 | public String[] textQuery(String collection, String query,
|
---|
| 2160 | int maxDocs)
|
---|
| 2161 | throws Exception
|
---|
| 2162 | {
|
---|
| 2163 | // no need to search there is no query or query is empty spaces
|
---|
| 2164 | if(query.trim().equals(""))
|
---|
| 2165 | return new String[]{};
|
---|
| 2166 |
|
---|
| 2167 | // QUERY value won't specify indexed field to search, Fedora
|
---|
| 2168 | // Gsearch will take that as meaning all default indexed fields.
|
---|
| 2169 | // Params to search() method below: string of fielded query terms;
|
---|
| 2170 | // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
|
---|
| 2171 | query = query + " " + "PID" + COLON + GREENSTONE;
|
---|
| 2172 |
|
---|
| 2173 | String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
|
---|
| 2174 | // now we have the XML returned by FedoraGSearch, get the pids
|
---|
| 2175 | // of the documents returned (if any)
|
---|
| 2176 | String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
|
---|
| 2177 | collection, searchResult);
|
---|
| 2178 | return pids;
|
---|
| 2179 | }
|
---|
| 2180 |
|
---|
| 2181 | /**
|
---|
| 2182 | * This method performs a fieldquery, searching for x number of phrases
|
---|
| 2183 | * in each of the 4 indexed fields.
|
---|
| 2184 | * @param collection is the collection to search in
|
---|
| 2185 | * @param nameValParamsMap is a Map of several(key, value) entries,
|
---|
| 2186 | * 4 of which we're concerned with here:
|
---|
| 2187 | * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
|
---|
| 2188 | * - the values are a comma separated list of terms (phrases or single
|
---|
| 2189 | * words) to search that field in. There may be more than 1 or
|
---|
| 2190 | * there may be none (in which case there may be N empty values or
|
---|
| 2191 | * spaces separated by commas).
|
---|
| 2192 | * @param maxDocs is the maximum number of results to return (which
|
---|
| 2193 | * at present we consider equivalent to FedoraGSearch's hitpageSize).
|
---|
| 2194 | * */
|
---|
| 2195 | public String[] fieldQuery(String collection, Map nameValParamsMap,
|
---|
| 2196 | int maxDocs)
|
---|
| 2197 | throws Exception
|
---|
| 2198 | {
|
---|
| 2199 | // we're going to maintain a list of UNIQUE pids that were returned
|
---|
| 2200 | // in search results. Hence we use Set:
|
---|
| 2201 | java.util.Set set = new java.util.HashSet();
|
---|
| 2202 |
|
---|
| 2203 | // (1) Use Fedora's search to search document titles, if they were
|
---|
| 2204 | // specified:
|
---|
| 2205 | String[] docTitlepids = {};
|
---|
| 2206 |
|
---|
| 2207 | String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
|
---|
| 2208 | if(docTitleTerms != null) { // no doc titles may have been specified
|
---|
| 2209 | String[] phrases = docTitleTerms.split(COMMA);
|
---|
| 2210 |
|
---|
| 2211 | // search the individual phrases first:
|
---|
| 2212 | for(int i = 0; i < phrases.length; i++) {
|
---|
| 2213 | if(phrases.equals("") || phrases.equals(" "))
|
---|
| 2214 | continue; //skip when there are no terms
|
---|
| 2215 | docTitlepids = this.searchDocumentTitles(
|
---|
| 2216 | collection, phrases[i], false);
|
---|
| 2217 | for(int j = 0; j < docTitlepids.length; j++)
|
---|
| 2218 | set.add(docTitlepids[j]);
|
---|
| 2219 | }
|
---|
| 2220 | }
|
---|
| 2221 | // (2) use FedoraGSearch to search doc AND section titles, and
|
---|
| 2222 | // fulltext (in case these were specified in nameValParamsMap):
|
---|
| 2223 | String searchResult = this.fedoraGSearch.search(
|
---|
| 2224 | nameValParamsMap, 1, maxDocs);
|
---|
| 2225 |
|
---|
| 2226 | String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
|
---|
| 2227 | collection, searchResult);
|
---|
| 2228 |
|
---|
| 2229 | for(int i = 0; i < pids.length; i++)
|
---|
| 2230 | set.add(pids[i]);
|
---|
| 2231 |
|
---|
| 2232 | pids = null;
|
---|
| 2233 | pids = new String[set.size()];
|
---|
| 2234 | set.toArray(pids); // unique pids
|
---|
| 2235 | return pids;
|
---|
| 2236 | }
|
---|
| 2237 |
|
---|
[22300] | 2238 | /** @return a String representing Greenstone3 XML for a query process
|
---|
[15222] | 2239 | * response returning the results for the query denoted by parameter
|
---|
| 2240 | * nameValParamsMap.
|
---|
| 2241 | * @param nameValParamsMap is a Hashmap of name and value pairs for all the
|
---|
| 2242 | * query field data values. The names match the field names that
|
---|
| 2243 | * describeCollectionService() would have returned for the query service.
|
---|
| 2244 | * @param collection is the name of the collection
|
---|
| 2245 | * @param service is the name of the query service
|
---|
| 2246 | * This method is only ever called when any of the services in the digital
|
---|
| 2247 | * library described themselves as type=query. Therefore any digital
|
---|
| 2248 | * libraries that have no query services, can just return emtpy message
|
---|
| 2249 | * strings (or even "") since this method will never be called on them
|
---|
| 2250 | * anyway. */
|
---|
| 2251 | public String query(String collection, String service,
|
---|
| 2252 | Map nameValParamsMap)
|
---|
| 2253 | {
|
---|
| 2254 | FedoraGS3RunException ex = null;
|
---|
| 2255 | // (1) obtain the requested number of maximum result documents
|
---|
| 2256 | int maxDocs = 100;
|
---|
| 2257 | try{
|
---|
| 2258 | maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
|
---|
| 2259 | } catch(NumberFormatException e) {
|
---|
| 2260 | maxDocs = 100;
|
---|
| 2261 | }
|
---|
| 2262 |
|
---|
| 2263 | String pids[] = {};
|
---|
| 2264 | // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
|
---|
[22300] | 2265 | if(service.endsWith("TextQuery")) {
|
---|
[15222] | 2266 | try {
|
---|
| 2267 | // get the Query field:
|
---|
| 2268 | String query = (String)nameValParamsMap.get(QUERY);
|
---|
| 2269 | pids = textQuery(collection, query, maxDocs);
|
---|
| 2270 | }
|
---|
| 2271 | catch(Exception e) {
|
---|
| 2272 | LOG.error("Error in TextQuery processing: " + e);
|
---|
| 2273 | ex = new FedoraGS3RunException(
|
---|
| 2274 | "When trying to use FedoraGenericSearch for a TextQuery", e);
|
---|
| 2275 |
|
---|
| 2276 | }
|
---|
| 2277 | } else { // (3) FieldQuery
|
---|
| 2278 | // first get the comma-separated lists
|
---|
| 2279 | String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
|
---|
| 2280 | String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
|
---|
| 2281 | // both are comma separated lists, so split both on 'comma'
|
---|
| 2282 | String[] fieldNames = listOfFieldNames.split(COMMA);
|
---|
| 2283 | String[] searchTerms = listOfSearchTerms.split(COMMA);
|
---|
| 2284 |
|
---|
| 2285 | // In the fieldNames and searchTerms lists of nameValParamsMap,
|
---|
| 2286 | // each searchTerm element was matched with its correspondingly
|
---|
| 2287 | // indexed fieldName.
|
---|
| 2288 | // A new map is going to reorganise this, by putting all terms
|
---|
| 2289 | // for a particular fieldName together in a comma separated list
|
---|
| 2290 | // and associating that with the fieldName. I.e. (key, value) ->
|
---|
| 2291 | // (fieldName, comma-separated list of all terms in that field)
|
---|
| 2292 | Map map = new HashMap();
|
---|
| 2293 | for(int i = 0; i < searchTerms.length; i++) {
|
---|
| 2294 | // there may be fewer searchTerms than fieldNames (since some
|
---|
| 2295 | // fieldNames may have been left empty), so loop on searchTerms
|
---|
[21775] | 2296 | if(map.containsKey(fieldNames[i])) { // fieldName is already
|
---|
[15222] | 2297 | // in the list, so append comma with new value
|
---|
| 2298 | String termsList = (String)map.get(fieldNames[i]);
|
---|
| 2299 | termsList = termsList + COMMA + searchTerms[i];
|
---|
| 2300 | map.put(fieldNames[i], termsList);
|
---|
| 2301 | } else { // this is the first time this fieldName occurred
|
---|
| 2302 | // just put the fieldName with searchTerm as-is
|
---|
| 2303 | map.put(fieldNames[i], searchTerms[i]);
|
---|
| 2304 | }
|
---|
| 2305 | }
|
---|
| 2306 |
|
---|
| 2307 | try {
|
---|
| 2308 | // For fieldquery, we search on all the fieldNames specified
|
---|
| 2309 | // - if DOC_TITLES is specified then we use Fedora's search
|
---|
| 2310 | // - for all other fieldNames specified, we use FedoraGSearch
|
---|
| 2311 | pids = fieldQuery(collection, map, maxDocs);
|
---|
| 2312 | }
|
---|
| 2313 | catch(Exception e) {
|
---|
| 2314 | LOG.error("Error in FieldQuery processing: " + e);
|
---|
| 2315 | ex = new FedoraGS3RunException(
|
---|
| 2316 | "When trying to use FedoraGenericSearch for a FieldQuery", e);
|
---|
| 2317 | }
|
---|
| 2318 | }
|
---|
| 2319 |
|
---|
| 2320 | // Build Greenstone XML Query response message for from
|
---|
| 2321 | // the pids (which should be document identifiers)
|
---|
| 2322 | Document doc = builder.newDocument();
|
---|
| 2323 | // <metadataList><metadata name="numDocsMatched" value="n" />
|
---|
| 2324 | // </metadataList>
|
---|
| 2325 | Element metadataList = doc.createElement(
|
---|
| 2326 | GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 2327 | Element metadata = doc.createElement(GSXML.METADATA_ELEM);
|
---|
| 2328 |
|
---|
| 2329 | Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
|
---|
| 2330 | attribute.setValue(NUM_DOCS_MATCHED);
|
---|
| 2331 | metadata.setAttributeNode(attribute);
|
---|
| 2332 |
|
---|
| 2333 | attribute = doc.createAttribute(GSXML.VALUE_ATT);
|
---|
| 2334 | attribute.setValue(Integer.toString(pids.length));
|
---|
| 2335 | metadata.setAttributeNode(attribute);
|
---|
| 2336 |
|
---|
| 2337 | metadataList.appendChild(metadata);
|
---|
| 2338 |
|
---|
| 2339 | // <documentNodeList>
|
---|
| 2340 | // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
|
---|
| 2341 | // docType='hierarchy' nodeType="leaf" />
|
---|
| 2342 | // ...
|
---|
| 2343 | // ...
|
---|
| 2344 | // </documentNodeList>
|
---|
| 2345 | Element docNodeList = doc.createElement(
|
---|
| 2346 | GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
| 2347 | // for each
|
---|
| 2348 | for(int i = 0; i < pids.length; i++) {
|
---|
| 2349 | Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
| 2350 | attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
|
---|
| 2351 | attribute.setValue(pids[i]);
|
---|
| 2352 | docNode.setAttributeNode(attribute);
|
---|
| 2353 |
|
---|
| 2354 | attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
|
---|
| 2355 | attribute.setValue("hierarchy");
|
---|
| 2356 | docNode.setAttributeNode(attribute);
|
---|
| 2357 |
|
---|
| 2358 | attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
|
---|
| 2359 | attribute.setValue("root");
|
---|
| 2360 | docNode.setAttributeNode(attribute);
|
---|
| 2361 | docNodeList.appendChild(docNode);
|
---|
| 2362 | }
|
---|
| 2363 |
|
---|
| 2364 | Element responseMsg = createResponseMessage(doc, docNodeList, ex,
|
---|
| 2365 | GSXML.REQUEST_TYPE_PROCESS, service);
|
---|
| 2366 | try{
|
---|
[22300] | 2367 | return FedoraCommons.elementToString(responseMsg);
|
---|
[15222] | 2368 | }catch(TransformerException e) {
|
---|
| 2369 | return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
|
---|
| 2370 | + " " + e;
|
---|
| 2371 | }
|
---|
| 2372 | }
|
---|
[26171] | 2373 |
|
---|
| 2374 |
|
---|
| 2375 | // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
|
---|
| 2376 | /** Given a URL that represents a fedoraPID, will look up the object.
|
---|
| 2377 | * If it exists, it will return the contents of the DC:Title of its datastream.
|
---|
| 2378 | * If it doesn't exist, it will return the URL as-is.
|
---|
| 2379 | * @param URL: the URL that (after modification) represents a fedoraPID to look up.
|
---|
| 2380 | * @param collection: the name of collection in which to search for the URL
|
---|
| 2381 | * representing a fedoraPID.
|
---|
| 2382 | * @return the string (representing a fedoraPID) stored in the DC:Title of the
|
---|
| 2383 | * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
|
---|
| 2384 | * then the parameter URL is returned.
|
---|
| 2385 | */
|
---|
| 2386 | public String getPIDforURL(String url, String collection) {
|
---|
| 2387 | FedoraGS3RunException ex = null; // any RemoteException
|
---|
| 2388 |
|
---|
| 2389 | // (1) convert url to the fedorapid
|
---|
| 2390 | // / -> _ and : -> -
|
---|
| 2391 | String fedoraPID = url.replaceAll("/", "_");
|
---|
| 2392 | fedoraPID = fedoraPID.replaceAll(":", "-");
|
---|
| 2393 | // prefix "greenstone-http:<colname>-" to the fedoraPID
|
---|
| 2394 | fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
|
---|
| 2395 | //LOG.error("### fedoraPID: " + fedoraPID);
|
---|
| 2396 |
|
---|
| 2397 | // (2) Look up the datastream for the fedorapid
|
---|
| 2398 | String dcTitle = "";
|
---|
| 2399 | try {
|
---|
| 2400 | dcTitle = getDCTitle(fedoraPID);
|
---|
| 2401 | } catch(Exception e) {
|
---|
| 2402 | LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
|
---|
| 2403 | ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
|
---|
| 2404 | }
|
---|
| 2405 | //String dc = this.getDC(fedoraPID);
|
---|
| 2406 | //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
|
---|
| 2407 |
|
---|
| 2408 | // (3) if fedorapid exists, extract the dc:title content.
|
---|
| 2409 | // if it doesn't exist, return url
|
---|
| 2410 | if(dcTitle.equals("")) {
|
---|
| 2411 | return url;
|
---|
| 2412 | } else {
|
---|
| 2413 | // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
|
---|
| 2414 | //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
|
---|
| 2415 | return dcTitle+"-1";
|
---|
| 2416 | }
|
---|
| 2417 | }
|
---|
[15222] | 2418 |
|
---|
| 2419 | public static void main(String args[]) {
|
---|
| 2420 | try{
|
---|
| 2421 | // testing default constructor
|
---|
| 2422 | //FedoraGS3Connection con = new FedoraGS3Connection();
|
---|
| 2423 |
|
---|
| 2424 | // testing constructor that takes properties file to show initial
|
---|
| 2425 | // fedora server values
|
---|
| 2426 | java.io.File propertyFilename
|
---|
| 2427 | = new java.io.File("fedoraGS3.properties");
|
---|
| 2428 | FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
|
---|
| 2429 |
|
---|
| 2430 | // DESCRIBE: serviceList, collectionList
|
---|
| 2431 | System.out.println("serviceList:\n" + con.getServiceList());
|
---|
| 2432 |
|
---|
| 2433 | System.out.println("collectionList:\n" + con.getCollectionList());
|
---|
| 2434 |
|
---|
| 2435 | String[] colPIDs = con.getCollections();
|
---|
| 2436 | String[] collectionNames = con.getCollectionNames(con.getCollections());
|
---|
| 2437 |
|
---|
| 2438 |
|
---|
| 2439 | for(int i = 0; i < collectionNames.length; i++) {
|
---|
| 2440 | System.out.println("Describing collections:\n");
|
---|
| 2441 | System.out.println(con.describeCollection(collectionNames[i]));
|
---|
| 2442 | System.out.println("Describing collection services:\n"
|
---|
| 2443 | + con.describeCollectionServices(collectionNames[i]));
|
---|
| 2444 | }
|
---|
| 2445 |
|
---|
| 2446 | String[] serviceNames = con.getServiceNames();
|
---|
| 2447 | for(int i = 0; i < serviceNames.length; i++) {
|
---|
| 2448 | System.out.println("Describing " + serviceNames[i] + ":\n"
|
---|
| 2449 | + con.describeCollectionService("demo", serviceNames[i]));
|
---|
| 2450 | }
|
---|
| 2451 |
|
---|
| 2452 |
|
---|
| 2453 | // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
|
---|
| 2454 | // along with EX of the top-level document:
|
---|
| 2455 | System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
|
---|
[22300] | 2456 | System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
|
---|
[15222] | 2457 |
|
---|
| 2458 |
|
---|
| 2459 | String[] docIDs = con.getCollectionDocs(colPIDs[0]);
|
---|
| 2460 | System.out.println("\nGET CONTENT:");
|
---|
| 2461 | for(int i = 0; i < docIDs.length; i++) {
|
---|
| 2462 | System.out.println(con.getContent(docIDs[i]));
|
---|
| 2463 | }
|
---|
| 2464 |
|
---|
| 2465 | System.out.println("\nGET META:");
|
---|
| 2466 | for(int i = 0; i < docIDs.length; i++) {
|
---|
[22300] | 2467 | System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
|
---|
[15222] | 2468 | }
|
---|
| 2469 |
|
---|
| 2470 | String[] getTitlesFor = {
|
---|
| 2471 | "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
|
---|
| 2472 | "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
|
---|
| 2473 | "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
|
---|
| 2474 | "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
|
---|
| 2475 | "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
|
---|
| 2476 | };
|
---|
| 2477 |
|
---|
| 2478 | // first let's display the regular meta for top-level docs and
|
---|
| 2479 | // their sections
|
---|
| 2480 | for(int i = 0; i < getTitlesFor.length; i++) {
|
---|
[22300] | 2481 | System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
|
---|
[15222] | 2482 | }
|
---|
| 2483 |
|
---|
| 2484 | System.out.println("\nTitles are:");
|
---|
| 2485 | System.out.println(con.getTitleMetadata(getTitlesFor));
|
---|
| 2486 |
|
---|
| 2487 | System.out.println("\nGET STRUCTURE:");
|
---|
| 2488 | for(int i = 0; i < docIDs.length; i++) {
|
---|
[22300] | 2489 | System.out.println("Descendents and numChildren:\n"
|
---|
[26270] | 2490 | + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
|
---|
[22300] | 2491 | System.out.println("Parent and numSiblings:\n"
|
---|
[26270] | 2492 | + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
|
---|
[15222] | 2493 | }
|
---|
| 2494 |
|
---|
| 2495 | // TEST ERROR CASES:
|
---|
| 2496 | System.out.println("\nTESTING ERROR CASES");
|
---|
| 2497 | System.out.println(con.getContent("greenstone:demo-pinky"));
|
---|
| 2498 | String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
|
---|
| 2499 | "greenstone:demo-pinky" };
|
---|
| 2500 | System.out.println(con.getContent(errorCases));
|
---|
[22300] | 2501 | System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
|
---|
[26270] | 2502 | System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
|
---|
[15222] | 2503 |
|
---|
| 2504 | System.out.println("\nCLASSIFIER BROWSE");
|
---|
| 2505 | System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
|
---|
[22300] | 2506 | new String[]{"CL1"}, new String[] {""}, new String[] {""}));
|
---|
[15222] | 2507 |
|
---|
| 2508 | System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
|
---|
| 2509 | String[] classNodeIDs = new String[26];
|
---|
| 2510 | for(int i = 0; i < classNodeIDs.length; i++) {
|
---|
| 2511 | int subClassifierNum = i + 1;
|
---|
| 2512 | classNodeIDs[i] = "CL1." + subClassifierNum;
|
---|
| 2513 | }
|
---|
| 2514 | System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
|
---|
[22300] | 2515 | classNodeIDs, new String[]{"all"}));
|
---|
[15222] | 2516 |
|
---|
| 2517 | System.out.println("Testing query services");
|
---|
| 2518 | System.out.println("TEXT QUERY:");
|
---|
| 2519 | Map formControlValsMap = new HashMap();
|
---|
| 2520 | formControlValsMap.put(MAXDOCS, "100");
|
---|
| 2521 | formControlValsMap.put(QUERY, "snails");
|
---|
| 2522 | String searchResponse
|
---|
| 2523 | = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
|
---|
| 2524 | System.out.println(searchResponse);
|
---|
| 2525 |
|
---|
| 2526 | System.out.println("FIELD QUERY:");
|
---|
| 2527 | formControlValsMap.clear();
|
---|
| 2528 | formControlValsMap.put(MAXDOCS, "100");
|
---|
| 2529 | formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
|
---|
| 2530 | formControlValsMap.put(FIELDNAME_ATT,
|
---|
| 2531 | "allFields,docTitles,allFields,allFields");
|
---|
| 2532 | searchResponse
|
---|
| 2533 | = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
|
---|
| 2534 | System.out.println(searchResponse);
|
---|
| 2535 |
|
---|
| 2536 | System.exit(0);
|
---|
| 2537 | }catch(Exception e) {
|
---|
| 2538 | JOptionPane.showMessageDialog(
|
---|
| 2539 | null, e, "Error", JOptionPane.ERROR_MESSAGE);
|
---|
| 2540 | //System.err.println("ERROR: " + e);
|
---|
| 2541 | e.printStackTrace();
|
---|
| 2542 | }
|
---|
| 2543 | }
|
---|
[15733] | 2544 | }
|
---|