root/other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java @ 26270

Revision 26270, 105.9 KB (checked in by ak19, 6 years ago)

1. Now checks request for nodeStructureInfo documentType, as is needed to get it working with GS3 again. 2. Added in reusable constants of gsdl3/util/AbstractBasicDocument.java since these have now been made public constants.

Line 
1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the  * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
32import org.greenstone.gsdl3.util.GSXML;
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.Attr;
36import org.w3c.dom.Text;
37import org.w3c.dom.NodeList;
38import org.w3c.dom.Node;
39import org.xml.sax.InputSource;
40
41import java.io.File;
42import java.util.HashMap;
43import java.util.Properties;
44import java.util.Map;
45
46import javax.swing.JOptionPane;
47
48import org.xml.sax.SAXException;
49import java.io.UnsupportedEncodingException;
50import java.io.IOException;
51import javax.net.ssl.SSLHandshakeException;
52import java.net.ConnectException;
53import java.net.MalformedURLException;
54import java.rmi.RemoteException;
55import javax.xml.parsers.ParserConfigurationException;
56import javax.xml.transform.TransformerException;
57
58/**
59 * Class that extends FedoraConnection in order to be able to use
60 * Fedora's web services to retrieve the specific datastreams of
61 * Greenstone documents stored in Fedora's repository. This class
62 * provides methods that convert those datastreams into Greenstone3
63 * XML response messages which are returned.
64 * @author ak19
65*/
66public class FedoraGS3Connection
67    extends FedoraConnection implements FedoraToGS3Interface,
68        FedoraToGS3Interface.Constants 
69{
70    /** The logging instance for this class */
71    private static final Logger LOG = Logger.getLogger(
72            FedoraGS3Connection.class.getName());
73   
74    /** Default name of Fedora index */
75    private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
76   
77    /** Complete list of services that are supported our FedoraGS3 would 
78     * support if everything goes well. If a connection to FedoraGSearch
79     * cannot be established, the query services will no longer be
80     * available. The actual services supported are given by member
81     * variable serviceNames. */
82    protected static final String[] SERVICES = {
83        "DocumentContentRetrieve", "DocumentMetadataRetrieve",
84        "DocumentStructureRetrieve",
85        "TextQuery", "FieldQuery",
86        "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
87    };
88   
89    /** List of services actually supported by our FedoraGS3 repository
90     * after construction. If FedoraGenericSearch can't be connected to,
91     * then query services will not be offered */
92    protected String[] serviceNames;
93   
94    /** The object used to connect to FedoraGenericSearch, which is used
95     * for doing full-text searching */
96    protected GSearchConnection fedoraGSearch;
97
98    /** The url for the wsdl file of FedoraGSearch's web services
99     * by default this will be the Fedora server's base URL
100     * concatenated to "gsearch/services/FgsOperations?wsdl" */
101    protected String gSearchWSDLURL;
102       
103    /** The last part of the gSearchWSDL URL. The first part is
104     * the same as the fedora server's base url. */
105        protected String gSearchWSDLSuffix;
106           
107    /** The name of the index that FedoraGSearch will index the GS3
108     * documents into. If no name is specified in the properties file,
109     * this will default to FedoraIndex. */
110    protected String gSearchIndexName;
111   
112    /** 5 argument constructor is the same as that of superclass FedoraConnection:
113     * @param protocol can be either http or https
114     * @param host is the host where the fedora server is listening
115     * @param port is the port where the fedora server is listening
116     * @param fedoraServerUsername is the username for administrative
117     * authentication required to access the fedora server.
118     * @param fedoraServerPassword is the password for administrative
119     * authentication required to access the fedora server. If no password was set
120     * when installing Fedora, leave the field "".
121     * Instantiates a FedoraGS3Connection object which connects to Fedora's
122     * web services through stub classes and tries to connect to FedoraGSearch's
123     * web services through the default WSDL location for it
124     * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
125     * call setGSearchWSDLURL(url) after the constructor instead.
126    */
127    public FedoraGS3Connection(String protocol, String host, int port,
128            String fedoraServerUsername, String fedoraServerPassword)
129        throws ParserConfigurationException, MalformedURLException,
130            SSLHandshakeException, RemoteException, AuthenticationFailedException,   
131                NotAFedoraServerException, ConnectException, Exception 
132    {
133        super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
134        // super() will call setInitialisationProperties(properties)
135        // And that will try to instantiate the GSearchConnection.
136    }
137   
138    /** No-argument constructor which is the same as that of superclass
139     * FedoraConnection: it displays a small dialog requesting input for the
140     * host, port, administrative password and username of the fedora server.
141     * If no password was set on the fedora repository when installing it,
142     * the user can leave the password field blank. */
143    public FedoraGS3Connection()
144        throws ParserConfigurationException, MalformedURLException,
145            CancelledException, ConnectException, RemoteException,
146            SSLHandshakeException, Exception
147    {
148        super();
149        // super() will call setInitialisationProperties(properties)
150        // And that will try to instantiate the GSearchConnection.
151    }
152   
153    /** Single-argument constructor which is the same as that of superclass
154     * FedoraConnection: it takes the name of the properties file where
155     * connection initialisation values may already be provided and then
156     * displays a small dialog requesting input for the host, port,
157     * administrative password and username of the fedora server showing
158     * the values in the properties file as default. If the necessary
159     * initialisation are not present in the file, the corresponding fields
160     * in the dialog will be blank.
161     * If no password was set on the fedora repository when installing it,
162     * the user can leave the password field blank. */
163    public FedoraGS3Connection(File propertiesFilename)
164        throws ParserConfigurationException, MalformedURLException,
165            CancelledException, ConnectException, RemoteException,
166            SSLHandshakeException, Exception
167    {
168        super(propertiesFilename);
169        // super() will call setInitialisationProperties(properties)
170        // And that will try to instantiate the GSearchConnection.
171    }
172   
173    /** The superclass constructor calls this method passing any preset 
174     * properties loaded from a propertiesFile. This method is overridden 
175     * here in order to instantiate the gSearchConnection based on the 
176     * - gSearchWSDLSuffix that will be appended to the fedora base url.
177     * (If one was not provided in the properties file, gSearchWSDLURL defaults
178     * to something of the form
179     * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
180     * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
181     * "gsearch/services/FgsOperations?wsdl".
182     * - name of the index into which the GS3 documents have been indexed
183     * and which FedoraGenericSearch should use to perform searches. If none is
184     * given in the properties file, then the index name defaults to "FedoraIndex".
185     * @param properties is the Properties Map loaded from a properties file
186     * (if there was any) which specifies such things as host and port of the
187     * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
188     * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
189     * to whatever the final value of this.gSearchWSDLURL' suffix is, and
190     * "gsearch.indexName" will be set to to whatever the final value of
191     * this.gSearchIndexName is.
192    */
193    protected void setInitialisationProperties(Properties properties)
194        throws ParserConfigurationException, MalformedURLException,
195        CancelledException, ConnectException, RemoteException,
196        SSLHandshakeException, Exception
197    {
198        super.setInitialisationProperties(properties);
199        // gsearchWSDL URL suffix, if not specified, defaults to
200        // "fedoragsearch/services/FgsOperations?wsdl" which is
201        // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
202        this.gSearchWSDLSuffix = properties.getProperty(
203            "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
204        this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
205        // Set the property to whatever this.gSearchWSDLURL is now,
206        // so that it will be written out to the properties file again
207        properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
208       
209        // Similarly for the name of the index FedoraGenericSearch should use
210        // when performing searches for GS3 docs stored in Fedora's repository.
211        this.gSearchIndexName = properties.getProperty(
212                "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
213        properties.setProperty("gsearch.indexName", this.gSearchIndexName);
214        // Create a connection to FedoraGSearch's web services:
215        initSearchFunctionality();
216    }
217   
218    /** Overridden init method to work with the 5 argument constructor, so that we can
219     * bypass using setInitialisationProperties() which works with a Properties map.
220    */
221    protected void init(String protocol, String host, String port,
222            String fedoraServerUsername, String fedoraServerPassword)
223        throws ParserConfigurationException, MalformedURLException,
224            AuthenticationFailedException, RemoteException, Exception
225    {
226        super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
227        this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
228        this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
229        this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
230        initSearchFunctionality();
231    }   
232
233
234    /** Init method that instantiates a GSearchConnection object used
235     * to work with the separate FedoraGSearch web services.
236     * The url of the WSDL for FedoraGSearch's web services is worked out
237     * from the baseURL of the Fedora server.
238    */
239    protected void initSearchFunctionality()
240    {
241        try {
242            this.fedoraGSearch = null;
243            this.fedoraGSearch = new GSearchConnection(
244                    gSearchWSDLURL, gSearchIndexName);
245            this.serviceNames = SERVICES;
246        } catch(Exception e){
247            LOG.error("Cannot connect to FedoraGSearch's web services at "
248                + gSearchWSDLURL + "\nQuery services will not be available.");
249            // If an exception occurs, something has gone wrong when
250            // trying to connect to FedoraGSearch's web services. This
251            // means, we can't offer query services, as that's provided
252            // by FedoraGSearch
253            serviceNames = null;
254            int countOfNonQueryServices = 0;
255            for(int i = 0; i < SERVICES.length; i++) {
256                // do not count query services
257                if(!SERVICES[i].toLowerCase().contains("query")) {
258                    countOfNonQueryServices++;
259                }
260            }
261            // Services now supported are everything except Query services
262            serviceNames = new String[countOfNonQueryServices];
263            int j = 0;
264            for(int i = 0; i < SERVICES.length; i++) {
265                if(!SERVICES[i].toLowerCase().contains("query")) {
266                    serviceNames[j] = SERVICES[i];
267                    j++; // valid serviceName, so increment serviceName counter
268                }
269                 
270            }
271        }
272    }
273   
274    /** @return the gSearchWSDLURL, the url of the WSDL for the
275     * FedoraGSearch web services */
276    public String getGSearchWSDLURL() { return gSearchWSDLURL; }
277   
278    /** Sets the member variable gSearchWSDLURL that specify the location of
279     * the WSDL file of FedoraGSearch's web services. Then it attempts
280     * to instantiate a connection to those web services.
281     * @param url is the new url of the GSearch web services WSDL file */
282    public void setGSearchWSDLURL(String url) {
283        this.gSearchWSDLURL = url;
284        initSearchFunctionality();
285    }
286   
287    /** @return the gSearchIndexName, the name of the index Fedora Generic
288     * Search will search in (where GS3 docs have been indexed into). */
289    public String getGSearchIndexName() { return gSearchIndexName; }
290   
291    /** Sets the member variable gSearchIndexName that specifies the name
292     * of the index containing indexed GS3 documents. Then it attempts
293     * to instantiate a connection to the Fedora GSearch web services using
294     * this changed value for indexName.
295     * @param indexName is the new name of the index containing indexed GS3
296     * docs that GSearch should search in. */
297    public void setGSearchIndexName(String indexName) {
298        this.gSearchIndexName = indexName;
299        initSearchFunctionality();
300    }
301   
302    /** @return the array of the services actually supported by FedoraGS3 */
303    protected String[] getServiceNames() { return this.serviceNames;}
304   
305    /**
306     * For finding out if the sectionNumber is given as part of the docID.
307     * @param docID is the String that contains the docPID and may also
308     * contain the section number.
309     * @return true if the document identifier docID contains a section-
310     * number, and false if it consists solely of the docPID.
311     * That is, true is returned if
312     * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
313     * and false is returned if
314     * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
315     * */
316    protected boolean containsSectionNumber(String docID) {
317        // if there are two hyphens in the docID, then there are sections
318        // (and the section number is appended at end of docID)
319        // docID = "greenstone:colName-<docPID>-<sectionNum>"
320        return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
321    }
322   
323    /** This method will extract the docPID from docID and return it.
324     * (If a sectionNumber is suffixed to the docID, the docPID which is
325     * the prefix is returned; otherwise the docID is the docPID and is
326     * returned)
327     * @param docID is the String that contains the docPID and may also
328     * contain the section number.
329     * @return only the docPID portion of the docID.
330    */
331    protected String getDocPIDFromDocID(String docID) {
332        if(containsSectionNumber(docID))
333            return docID.substring(0, docID.lastIndexOf(HYPHEN));
334        // else (if there's no sectionNumber), docID is the docPID
335        return docID;
336    }
337   
338    /** This method will return the section Number, if there's any
339     * suffixed to the docID. Otherwise it will return the empty string
340     * @param docID is the String that contains the docPID and may also
341     * contain the section number.
342     * @return only the sectionID portion of the docID - if any, else "".
343    */
344    protected String getSectionIDFromDocID(String docID) {
345        if(containsSectionNumber(docID))
346            return docID.substring(
347                    docID.lastIndexOf(HYPHEN)+1, docID.length());
348        return "";
349    }
350   
351    /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
352     * response message that gives the metadata for each collection identified
353     * @param collIDs is an array of fedora pids identifying collections in the
354     * fedora repository
355     * @return a GS3 DocumentMetadataRetrieve response message containing the
356     * EX metadata for all the requested collections */
357    public String getCollectionMetadata(String[] collIDs) {
358    return getMetadata(collIDs, new String[] {"all"});
359    }
360   
361    /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
362     * response message is returned containing the metadata for each document.
363     * @param docIDs is an array of document identifiers (docID can either be
364     * &lt;pid&gt;s items (documents) in the fedora repository, or
365     * "&lt;pid&gt;-sectionNumber".
366     * @return a GS3 DocumentMetadataRetrieve response message containing the
367     * EX, DC, DLS metadata for all the requested documents
368     * @param metadata is the list of metadata elements to be retrieved for each doc */
369    public String getDocumentMetadata(String[] docIDs, String[] metadata) {
370        return getMetadata(docIDs, metadata);
371    }
372   
373    /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
374     * response message that gives the metadata for the collection identified
375     * @param collID is a fedora pid identifying a collection in its repository
376     * @return a GS3 DocumentMetadataRetrieve response message containing the
377     * EX metadata for the requested collection
378     * @param metadata is the list of metadata elements to be retrieved for each doc */
379    public String getCollectionMetadata(String collID) {
380        return getMetadata(new String[] {collID}, new String[] {"all"});
381    }
382   
383    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
384     * response message containing the metadata for the document.
385     * @param docID is a document identifier (docID can either be a &lt;pid&gt;
386     * of an item (document) in the fedora repository, or it can be
387     * "&lt;pid&gt;-sectionNumber".
388     * @return a GS3 DocumentMetadataRetrieve response message containing the
389     * EX, DC, DLS metadata for the requested document */
390    public String getDocumentMetadata(String docID, String[] metadata)  {
391    return getMetadata(new String[] {docID}, metadata);
392    }
393
394    /** @return a greenstone DocumentMetadataRetrieve response for the
395     * documents or collections indicated by the docIDsOrCollIDs.
396     * @param docIDsOrCollIDs is an array of identifiers which may be either the
397     * fedora pids for collections, or otherwise may be a document identifier.
398     * In the last case, the document ID may consist of either
399     * "documentPID-sectionNumber" or may just be just fedora documentPID
400     * @param metadata is the list of metadata elements to be retrieved for each doc */
401    public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
402    {
403        Document doc = builder.newDocument();
404        FedoraGS3RunException ex = null;
405       
406        Element docNodeList = doc.createElement(
407                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
408       
409        try{
410            for(int i = 0; i < docIDsOrCollIDs.length; i++) {
411                // create the <documentNode> containing the metadata
412                // for each document docID
413                Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
414                docNodeList.appendChild(docNode);
415            }
416        } catch(Exception e) {
417            ex = new FedoraGS3RunException(e);
418            ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
419        }
420       
421        Element responseMsg = createResponseMessage(doc, docNodeList, ex, 
422                GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
423        try{
424            return FedoraCommons.elementToString(responseMsg);
425        } catch(TransformerException e) {
426            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
427                + " " + e;
428        }
429    }
430   
431    /** Method that takes a new DOM document, as well as an identifier of either
432     * a collection or document (which may be a fedora pid for the collection
433     * or document, or may be the documentPid-sectionNumber for a document) and
434     * returns a documentNode element for it:
435     * &lt;documentNode&gt;&lt;metadataList&gt;
436     * &lt;metadata name=""&gt;value&lt;/metadata&gt;
437     * ...
438     * &lt;/metadataList&gt;&lt;/documentNode&gt;
439     * @return documentNode containing the metadata for the collection or
440     * document given by parameter ID
441     * @param id denotes a collection pid, a document pid or a docID of the 
442     * form "documentpid-sectionNumber"
443     * @param metadata is the list of metadata elements to be retrieved for each doc */
444    protected Element getMetadata(Document doc, String id, String[] metadata)
445        throws RemoteException, UnsupportedEncodingException,
446            SAXException, IOException
447    {
448        // We're going to create the documentNode nested inside the following
449        // documentNodeList:
450            // <documentNodeList>
451            // <documentNode nodeID=""><metadataList>
452            // <metadata name="">value</metadata>
453            // </metadataList></documentNode>
454            // <documentNode>...</documentNode>
455            // </documentNodeList>
456            // <documentNodeList>
457       
458        // <documentNode nodeID="docID"> - the docNode on which a metadata
459        // retrieve is being performed
460        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
461        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
462        attribute.setValue(id);
463        docNode.setAttributeNode(attribute);
464       
465        // <metadataList>
466        Element metadataList = doc.createElement(
467                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
468       
469        String ex = "";
470        String dc = "";
471        String dls = "";
472        if(id.endsWith(_COLLECTION)) { // docID refers to a collection
473            // Obtain the "EX" datastream (extracted metadata) for the collection
474            ex = this.getEX(id);
475        }
476        else { // docID refers to a document
477            // work out the document's fedora PID and section ID, and then
478            // obtain the EX (extracted metadata) and DC datastreams for the doc
479           
480            // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
481            // is the same as for pid="greenstone:<colname>-docPID"
482            // That is, <Section id="1"> refers to the toplevel document docPID
483            // If requested for top-level document, there may also be DLS meta
484            String sectionID = getSectionIDFromDocID(id);
485            String docPID = getDocPIDFromDocID(id);
486            if(sectionID.equals("") || sectionID.equals("1")) {
487                // metadata of toplevel document is requested
488                ex = this.getEX(docPID); // slightly faster than doing
489                        //getSectionEXMetadata(docID, "1")
490                dc = this.getDC(docPID);
491                dls = this.getDLS(docPID);
492            }
493            else {
494                ex = getSectionEXMetadata(docPID, sectionID);
495                dc = getSectionDCMetadata(docPID, sectionID);
496            }
497        }
498       
499        String metafields = "";
500        for(int i = 0; i < metadata.length; i++) {
501            metafields = metafields + metadata[i] + "|";           
502        }
503
504        // Adding in metadata sets in alphabetical order
505        // DC metadata for a top-level document is different from EX, DLS:
506        // only the element's namespace prefix is "dc", the rest of a tagname
507        // is unknown.
508        if(!dc.equals("")) {
509            addMetadataWithNamespacedTagNames(doc, metadataList,
510                              dc, DC, metafields);
511        }
512       
513        // Check if we were supposed to process dls and dc metadata
514        // as well. We only ever do this for top-level documents,
515        // in which case, dls and dc will be non-empty strings
516        if(!dls.equals("")) {
517            addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
518        }
519       
520        // we definitely have an EX metadatastream for each
521        // collection object, top-level document object,
522        // and document section item
523        addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
524       
525        // now the metadataList has been built up
526        docNode.appendChild(metadataList);
527       
528        return docNode; // return <documentNode> containing the metadata
529    }
530   
531    /** This method retrieves all the metadata elements in the metaDataStream
532     * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
533     * metadataSetNS is the namespace of each tag, and creates a new element of
534     * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
535     * each. Each of these are then appended to the metadataList parameter.
536     * @param doc is the Document object using which the new metadata Elements
537     * are to be constructed
538     * @param metadataList is the &lt;metadataList&gt; Element to which the new
539     * metadata Elements are to be appended as children.
540     * @param metaDatastream the metadata datastream in string form (e.g. the
541     * Dublin Core metadata stored in the Fedora repository).
542     * @param metadataSet is the constant datastream identifier, e.g. "DC".
543     * At present this method applies to the DC metadata and any others like it
544     * where each tagname is different except for the constant dc: namespace.
545     * @param metafields is a | separated string containing the metadatafields to
546     * extract or "all" if all fields are requested
547    */
548    protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
549                             String metaDatastream, String metadataSet, String metafields)
550        throws SAXException, IOException
551    {
552        Document src = builder.parse(
553                new InputSource(new StringReader(metaDatastream)));
554       
555        // The following doesn't work for some reason: to retrieve all elements
556        // whose namespace prefix starts with "dc", we pass "*" for localName
557        //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
558       
559        // Longer way: get the children of the root document
560        NodeList children = src.getDocumentElement().getChildNodes();
561       
562        for(int i = 0; i < children.getLength(); i++) {
563            String nodeName = children.item(i).getNodeName();
564            // check that the nodename starts with the metadataSet ("dc") namespace,
565            // which simultaneously ensures that the node's an element:
566            if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
567                // need to have a period for Greenstone instead of Fedora's colon
568                nodeName = nodeName.replace(COLON, PERIOD);
569                if(metadataSet.equals(DC)) { // dc:title -> dc.Title
570                nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
571                    + nodeName.substring(4);
572                }
573
574                // get the requested metadata fields
575                if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
576                Element metatag = (Element)children.item(i);
577                String value = FedoraCommons.getValue(metatag);
578                // <dc:tagname>value</dc:tagname>
579                // we're going to put this in our metadata element as
580                // <metadata name="dc.Tagname">value</metadata>
581               
582                // create metadata of (name, value) pairs in target DOM (doc)
583                Element metadata = doc.createElement(GSXML.METADATA_ELEM);
584                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
585               
586                attribute.setValue(nodeName);
587                metadata.setAttributeNode(attribute);
588                Text content = doc.createTextNode(value);
589                metadata.appendChild(content);
590                metadataList.appendChild(metadata);
591                }
592            }
593        }       
594    }
595   
596    /** This method retrieves all the metadata elements in the metaDataStream
597     * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt; 
598     * where "namespace" is the namespace prefix of each tag, and metadataName 
599     * is the name of the metadata (like author, title). For each element
600     * it creates a corresponding new element of the form
601     * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
602     * Each of these are then appended to the metadataList parameter.
603     * @param doc is the Document object using which the new metadata Elements
604     * are to be constructed
605     * @param metadataList is the &lt;metadataList&gt; Element to which the new
606     * metadata Elements are to be appended as children.
607     * @param metaDatastream the metadata datastream in string form (e.g. the
608     * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
609     * repository).
610     * @param metadataSet is the constant datastream identifier,
611     * e.g. "DLS" or "EX".
612     * At present this method applies to the DLS and EX metadata as they have
613     * constant tagnames throughout.
614     * @param metafields is a | separated string containing the metadatafields to
615     * extract or "all" if all fields are requested.
616    */
617    protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
618                           String metaDatastream, String metadataSet, String metafields)
619        throws SAXException, IOException
620    {
621        // Namespace prefix can be "ex:" or "dls:"
622        String namespacePrefix = "";
623        if(!metadataSet.equals(EX)) {
624            // need to have a period for Greenstone instead of Fedora's colon
625            namespacePrefix = metadataSet.toLowerCase() + PERIOD;
626        }
627               
628        Document src = builder.parse(
629                new InputSource(new StringReader(metaDatastream)));
630        NodeList metaTags = src.getElementsByTagName(
631                metadataSet.toLowerCase()+COLON+METADATA);
632                // Looking for tagnames: <ex:metadata> or <dls:metadata>
633           
634        for(int i = 0; i < metaTags.getLength(); i++) {
635            Element metatag = (Element)metaTags.item(i);
636           
637            // extract the metadata of (name, value) pairs from src DOM
638            // look for <metadata name="name">value</metadata>
639            String name = metatag.hasAttribute(NAME) ?
640                    metatag.getAttribute(NAME) : "";
641            // sometimes, there are several metadata for the same name, in this
642            // case, look for a qualifier and append its value to the name to
643            // distinguish it uniquely:
644            if(metatag.hasAttribute(QUALIFIER)) {
645                name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
646            }
647            name = namespacePrefix + name; // prefix with namespace, if any
648            if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
649                String value = FedoraCommons.getValue(metatag);
650               
651                // create metadata of (name, value) pairs in target DOM (doc)
652                Element metadata = doc.createElement(GSXML.METADATA_ELEM);
653                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
654                attribute.setValue(name);
655                metadata.setAttributeNode(attribute);
656                Text content = doc.createTextNode(value);
657                metadata.appendChild(content);
658               
659                metadataList.appendChild(metadata);
660            }
661        }
662    }
663   
664    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
665     * response message containing ONLY the Title metadata for the document.
666     * @param docID is a document identifier (docID can either be a &lt;pid&gt;
667     * of an item (document) in the fedora repository, or it can be
668     * "&lt;pid&gt;-sectionNumber".
669     * @return a GS3 DocumentMetadataRetrieve response message containing the
670     * Title metadata for the requested document */
671    public String getTitleMetadata(String docID) {
672        return getTitleMetadata(new String[] { docID });
673    }
674   
675    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
676     * response message containing ONLY the Title metadata for the documents.
677     * @param docIDs is a list of document identifiers (where docID can either be
678     * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
679     * "&lt;pid&gt;-sectionNumber".
680     * @return a GS3 DocumentMetadataRetrieve response message containing the
681     * Title metadata for all the requested documents */
682    public String getTitleMetadata(String[] docIDs) {
683        // Must create message of the following form:
684        // <documentNodeList><documentNode nodeID="docID">
685        // <metadataList><metadata name="Title">sometitle</metadata>
686        // </metadataList></documentNode>
687       
688        Document doc = builder.newDocument();
689        FedoraGS3RunException ex = null;
690       
691        Element docNodeList = doc.createElement(
692                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
693        try{
694            for(int i = 0; i < docIDs.length; i++) {
695                Element docNode = getTitleMetadata(doc, docIDs[i]);
696                docNodeList.appendChild(docNode);
697            }
698        }catch(Exception e) {
699            ex = new FedoraGS3RunException(e);
700            //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
701            ex.setSpecifics("EX metadata datastream");
702        }
703       
704        Element responseMsg = createResponseMessage(doc, docNodeList, ex, 
705                GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
706        try{
707            return FedoraCommons.elementToString(responseMsg);
708        } catch(TransformerException e) {
709            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
710                + " " + e;
711        }
712    }
713   
714    /** Method that takes a new DOM document, as well as an identifier of either
715     * a document or document section and returns a documentNode element containing
716     * the title metadata for it:
717     * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
718     * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
719     * &lt;/metadataList&gt;&lt;/documentNode&gt;
720     * @return documentNode containing the metadata for the collection or
721     * document given by parameter ID
722     * @param docID denotes the id of a document or a document section, so id
723     * is either a document-pid or it's of the form documentpid-sectionNumber */
724    protected Element getTitleMetadata(Document doc, String docID)
725        throws RemoteException, UnsupportedEncodingException,
726            SAXException, IOException
727    {
728        // Returns a docNode element of the following form:
729        // <documentNode nodeID="docID">
730        // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
731        // </documentNode>
732       
733        // <documentNode nodeID="docID">
734        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
735        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
736        attribute.setValue(docID);
737        docNode.setAttributeNode(attribute);
738       
739        // <metadataList>
740        Element metaList = doc.createElement(
741                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
742        // <metadata name="Title">
743        Element metadata = doc.createElement(GSXML.METADATA_ELEM);
744        // if we connect it all up (append children), we can immediately add
745        // the name attribute into the metadata element:
746        metaList.appendChild(metadata);
747        docNode.appendChild(metaList);
748        metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
749       
750        String title = "";
751        String sectionID = getSectionIDFromDocID(docID);
752        String docPID = getDocPIDFromDocID(docID);
753   
754        // check if title of toplevel document is requested
755        if(sectionID.equals(""))
756            title = this.getDocTitle(docPID);
757        else { // title of document section
758            title = this.getSectionTitle(docPID, sectionID);
759        }
760       
761        metadata.appendChild(doc.createTextNode(title));
762       
763        return docNode;
764    }
765   
766    /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
767     * containing the requested portion of the document structure of the documents
768     * indicated by docIDs:
769     * @param docID is the document identifier of the document whose hierarchical
770     * structure is requested. The name of the collection is already included in the
771     * docID for a Fedora DL.
772     * @param structure - strings specifying the required structure of the document.
773     * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
774     * @param info - strings specifying the required structural info of the document.
775     * It can be any combination of: siblingPosition, numSiblings, numChildren.
776    */
777    public String getDocumentStructure(String docID, String[] structure, String[] info) {
778    return getStructure(new String[]{docID}, structure, info);
779    }
780   
781
782     /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
783     * containing the requested portion of the document structure of the documents
784     * indicated by docIDs:
785     * @param docIDs is an array of document identifiers of documents whose
786     * hierarchical structures are requested. The name of the collection is already
787     * included in the docID for a Fedora DL.
788     * @param structure - strings specifying the required structure of each document.
789     * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
790     * @param info - strings specifying the required structural info of each document.
791     * It can be any combination of: siblingPosition, numSiblings, numChildren.
792    */
793    public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
794    return getStructure(docIDs, structure, info);
795    }
796
797        /**
798     * Returns a greenstone3 DocumentStructureRetrieve XML response message
799     * containing the document structures for the given docIDs.
800     * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML, 
801     * greenstone formatted XML is returned. The requested section of the table
802     * of contents (TOC) for a document is converted into the greenstone3 xml 
803     * format that is returned upon DocumentStructureRetrieve requests.
804     * @param docIDs the documentIDs for which the section's structure is returned;
805     * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
806     * @param structure - the structure of the sections to return. Can be any combination of:
807     * ancestors, parent, siblings, children, descendants, entire.
808     * @param infos - strings containing any combination of the values: numChildren, numSiblings,
809     * siblingPosition. The requested info gets added as attributes to the returned root element.
810     * @return a greenstone3 DocumentStructureRetrieve XML response message in
811     * String format with the structure of the docIDs requested.
812    */
813    protected String getStructure(String[] docIDs, String[] structure, String[] infos)
814    {
815        Document doc = builder.newDocument();
816        FedoraGS3RunException ex = null;
817        // <documentNodeList>
818        Element docNodeList = doc.createElement(
819                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
820       
821        try{
822            // append the <documentNodes> for the docIDs
823            // to the docNodeList
824            //getStructureElement(docNodeList, docIDs, levels);
825            getStructureElement(docNodeList, docIDs, structure, infos);
826        } catch(Exception e) {
827            ex = new FedoraGS3RunException(e);
828            ex.setSpecifics("(requested portion of) TOC datastream");
829        }
830        // insert our <documentNodeList> into a GS3 response message
831        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
832                GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
833        try{
834            return FedoraCommons.elementToString(responseMsg);
835        } catch(TransformerException e) {
836            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
837                + " " + e;
838        }
839    }
840
841
842    /** Given a &lt;documentNodeList&gt; portion of a greenstone3
843     * DocumentStructureRetrieve XML response message, this method will populate
844     * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
845     * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
846     * the doc structures are appended.
847     * @param docIDs the documentIDs for which the section's structure is returned;
848     * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
849     * @param structures - the structure of the sections to return. Can be any combination of:
850     * ancestors, parent, siblings, children, descendants, entire.
851     * @param infos - a string containing any combination of the values: numChildren, numSiblings,
852     * siblingPosition. The requested info gets added as attributes to the returned root element.
853    */
854    protected void getStructureElement(Element docNodeList, String[] docIDs,
855                       String[] structures, String[] infos)
856        throws RemoteException, UnsupportedEncodingException, SAXException,
857            IOException
858    {
859        // Make one string out of requested structure components, and one string from info components
860        String structure = "";
861        String info = "";
862        for(int i = 0; i < structures.length; i++) {
863        structure = structure + structures[i] + "|";
864        }
865        for(int i = 0; i < infos.length; i++) {
866        info = info + infos[i] + "|";
867        }
868       
869        // process each docID
870        for(int i = 0; i < docIDs.length; i++) {
871        // work out the document's fedora PID and section ID
872        String sectionID = getSectionIDFromDocID(docIDs[i]);
873        String docPID = getDocPIDFromDocID(docIDs[i]);
874        if(sectionID.equals("")) {
875            sectionID = "1";
876        }
877
878        // get the required section, along with children or descendants
879        Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
880        Document doc = docNodeList.getOwnerDocument();
881       
882        // copy-and-convert that structure into a structure format for GS3
883        Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
884       
885        if(!info.equals("")) {
886            // <nodeStructureInfo>
887            //    <info name="" value="" />
888            //    <info name="" value="" />
889            //    ...
890            // </nodeStructureInfo>
891            Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
892            Element root = srcDocElement.getOwnerDocument().getDocumentElement();
893           
894            if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
895            String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
896            Element infoEl = doc.createElement(GSXML.INFO_ATT);
897            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
898            infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
899            nodeStructureInfo.appendChild(infoEl);
900            }
901           
902            if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
903            String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
904            Element infoEl = doc.createElement(GSXML.INFO_ATT);
905            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
906            infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
907            nodeStructureInfo.appendChild(infoEl);
908            }
909           
910            if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
911            String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
912            Element infoEl = doc.createElement(GSXML.INFO_ATT);     
913            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
914            infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
915            nodeStructureInfo.appendChild(infoEl);
916            }
917
918            if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
919            String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
920            Element infoEl = doc.createElement(GSXML.INFO_ATT);     
921            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
922            infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
923            nodeStructureInfo.appendChild(infoEl);
924            }       
925
926            docNode.appendChild(nodeStructureInfo);
927        }
928       
929        // add it to our list of documentNodes
930        docNodeList.appendChild(docNode);
931        }
932    }
933
934       
935    /**
936     * Takes the portion of the XML document outlining the structure of the
937     * document (section)--in the format this is stored in Fedora--and returns
938     * Greenstone 3 DOM XML format for outlining document structure. 
939     * @return a &lt;documentNode&gt; element that contains a greenstone3
940     * DocumentStructureRetrieve XML corresponding to the parameter Element section
941     * (which is in fedora XML), for the document indicated by docID.
942     * @param requestingDocID is the identifier of the document for which the
943     * structure was requested. It's this document's children or descendants that
944     * will be returned. Note that this is not always the same as (clear from) 
945     * parameter docID.
946     * @param docID is the documentID for which the section's structure is
947     * returned where docID = "docPID-sectionNumber".
948     * @param section - the fedora section XML that is being mirrored in
949     * greenstone3 format.
950    */
951    protected Element getStructure(Document doc, String requestingDocID,
952            String docID, Element section)
953    {
954        // we want to mirror the section's DOM (given in fedora XML) in
955        // greenstone3's XML for a DocumentStructureRetrieve response.
956       
957        // <documentNode nodeID="docID"> - the docNode on which a structure retrieve 
958        // is being performed
959        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
960        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
961        attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
962        docNode.setAttributeNode(attribute);       
963       
964        // <nodeStructure>
965        Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
966       
967        // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
968        Element rootNode = createDocNodeFromSubsection(doc, section, docID);
969       
970        // fills in the subtree of the rootNode in our nodeStructure element
971        createDocStructure(doc, section, rootNode, docID);
972            //where section represents the root section
973       
974        nodeStructure.appendChild(rootNode);
975        docNode.appendChild(nodeStructure);
976        return docNode;     
977    }
978
979
980    /** Recursive method that creates a documentStructure mirroring parameter
981     * section, starting from parameter parent down to all descendants
982     * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
983     * for the docPID whose substructure is to be mirrored
984     * @param parent is the XML documentNode in the greenstone repository whose
985     * descendants created by this method will correspond to the descendants of
986     * parameter section. 
987     * @param doc is the document containing the parent;
988     * @param docPID is the prefix of all nodeIDs in the parent's structure
989    */
990    protected void createDocStructure(
991            Document doc, Element section, Element parent, String docPID)
992    {
993        // get the section's children (if any)
994        NodeList children = section.getChildNodes();
995        for(int i = 0; i < children.getLength(); i++) {
996            Node n = children.item(i);
997           
998            if(n.getNodeName().equals(SECTION_ELEMENT)) {
999                //then we know it's an element AND that its tagname is "Section"
1000                Element subsection = (Element)n;
1001                Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1002                parent.appendChild(child);
1003               
1004                // recursion call on newly found child-element and subsection
1005                createDocStructure(doc, subsection, child, docPID);
1006            }
1007        }
1008    }
1009   
1010    /** Given a particular subsection element, this method creates a
1011     * Greenstone3 DocumentNode element that mirrors it.
1012     * @param doc is the document that will contain the created DocumentNode
1013     * @param docID is the prefix of all nodeIDs in the parent's structure
1014     * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1015     * TOC for the docPID which will be mirrored in the greenstone XML
1016     * documentNode that will be returned.
1017     * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1018     * &lt;Section&gt; element passed as parameter subSection. */
1019    protected Element createDocNodeFromSubsection(
1020            Document doc, Element subSection, String docID)
1021    {
1022        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1023        Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1024        docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1025        docNode.setAttributeNode(docType);
1026       
1027        Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1028        String sectionID = subSection.hasAttribute(ID) ?
1029                subSection.getAttribute(ID) : "";
1030        if(sectionID.equals("1")
1031           && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1032            // reset the attribute without the section number (just "docID" may be important for democlient?)
1033            nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1034        } else {
1035            nodeID.setValue(docID + HYPHEN + sectionID);
1036        }
1037        //nodeID.setValue(docID + HYPHEN + sectionID);
1038        docNode.setAttributeNode(nodeID);
1039       
1040        Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1041        if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1042            nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1043        }
1044        docNode.setAttributeNode(nodeType);
1045        return docNode;
1046    }
1047   
1048   
1049    /** Given an identifier that is either a docPID or a concatenation of
1050     * docPID+sectionID, this method works out the fedora assigned docPID and
1051     * sectionID and then calls getContentBody(docPID, sectionID) with those.
1052     * @param docID is expected to be of the form
1053     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1054     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1055     * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1056     * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1057     * is returned! */
1058    public String getContent(String docID) {
1059        return this.getContent(new String[]{docID});
1060    }
1061   
1062    /** Given an identifier that is a concatenation of docID+sectionID, this
1063     * method works out the fedora assigned docPID and sectionID and then calls
1064     * getContentBody(docPID, sectionID) with those.
1065     * @param docIDs is an array of document identifiers of the form
1066     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1067     * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1068     * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1069    public String getContent(String[] docIDs) {
1070        Document doc = builder.newDocument();
1071        FedoraGS3RunException ex = null;
1072       
1073        //<documentNodeList>
1074        Element docNodeList = doc.createElement(
1075                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1076       
1077        try{
1078            for(int i = 0; i < docIDs.length; i++) {
1079                // get the sectionID and docPID from the docID
1080                String sectionID = this.removePrefix(
1081                        getSectionIDFromDocID(docIDs[i]), SECTION);
1082                String docPID = getDocPIDFromDocID(docIDs[i]);
1083                if(sectionID.equals("")) // if no section is specified, get
1084                    sectionID = "1"; // get the content for Section id="1"
1085                           
1086                // Get the contents for the requested section of document docPID
1087                String sectionContent = this.getContentBody(docPID, sectionID);
1088               
1089                // set the nodeID attribute
1090                Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1091                Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1092               
1093                nodeId.setValue(docIDs[i]); // just set the docID which will contain
1094                            // the docPID (and sectionID if already present)
1095               
1096                docNode.setAttributeNode(nodeId);
1097                // set the text content to what was retrieved
1098                Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1099                Text textNode = doc.createTextNode(sectionContent.trim());
1100               
1101                nodeContent.appendChild(textNode);
1102                docNode.appendChild(nodeContent);
1103                //add the documentNode to the docNodeList
1104                docNodeList.appendChild(docNode);
1105            }
1106        } catch(Exception e) {
1107            ex = new FedoraGS3RunException(e);
1108            ex.setSpecifics("requested doc Section datastream");
1109        }
1110        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1111                GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1112        try{
1113            return FedoraCommons.elementToString(responseMsg);
1114        } catch(TransformerException e) {
1115            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1116                + " " + e;
1117        }
1118    }
1119       
1120    /** Gets the contents of a textNode from a section.
1121     * @return the text content of a section.
1122     * @param docPID the pid of the document from which a section's text is to
1123     * be retrieved.
1124     * @param sectionID is the section identifier of the document denoted by
1125     * docPID whose text is to be returned.
1126    */
1127    protected String getContentBody(String docPID, String sectionID)
1128        throws RemoteException, UnsupportedEncodingException,
1129            SAXException, IOException
1130    {   
1131        String section = this.getSection(docPID, sectionID);
1132       
1133        // the content is nested inside a <Section> element,
1134        // we extract it from there:
1135        InputSource source = new InputSource(new StringReader(section));
1136        Document doc = builder.parse(source);
1137           
1138        // The document Element is the <Section> we want.
1139        // Get its text contents:
1140        section = FedoraCommons.getValue(doc.getDocumentElement());
1141       
1142        // we are going to remove all occurrences of "_httpdocimg_/"
1143        // that precede associated filenames, because that's a GS3
1144        // defined macro for resolving relative urls. It won't help
1145        // with documents stored in fedora.
1146        section = section.replaceAll(GS3FilePathMacro+"/", "");
1147        return section;
1148    }
1149       
1150    /** Here we create the greenstone's response message element:
1151     * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1152     * @return a greenstone response-message element.
1153     * @param doc - the Document object which should me used to create the
1154     * &lt;message&gt; and &lt;response&gt; elements
1155     * @param content - the element that is to be nested inside &lt;response&gt;
1156     * @param ex - any exception that occurred when trying to create
1157     * the content parameter
1158     * @param responseType - the value for the type attribute of &lt;response&gt;,
1159     * such as "describe", "retrieve", "browse", "query"...
1160     * @param originator - indiates the collectionName or service (like
1161     * DocumentContentRetrieve) from where this response message originates
1162    */
1163    protected Element createResponseMessage(Document doc, Element content,
1164            Exception ex, String responseType, String originator)
1165    {
1166        Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1167        // from = "FedoraGS3"
1168        Attr attribute = doc.createAttribute(GSXML.FROM_ATT);       
1169        attribute.setValue(originator);
1170        response.setAttributeNode(attribute);
1171       
1172        // type = "describe" or "process" - whatever's given in requestType:
1173        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1174        attribute.setValue(responseType);
1175        response.setAttributeNode(attribute);
1176       
1177        if(content != null)
1178            response.appendChild(content);
1179       
1180        // we'll create an error element for RemoteExceptions (web service problems) 
1181        // and UnsupportedEncodingExceptions and
1182        if(ex != null) {
1183            Element error = doc.createElement(GSXML.ERROR_ELEM);
1184            error.appendChild(doc.createTextNode(ex.getMessage()));
1185            // now append the error to the <response> element (after
1186            // the content element whatever that was)
1187            response.appendChild(error);
1188        }
1189       
1190        Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1191        message.appendChild(response);
1192        doc.appendChild(message);
1193        return message;
1194    }
1195   
1196    /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1197     * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1198     * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1199     * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1200     * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1201     * @param doc - the Document object which should me used to create the
1202     * &lt;serviceList&gt; element */
1203    protected Element createServiceList(Document doc)
1204    {
1205        Element serviceList = doc.createElement(
1206                GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1207       
1208        for(int i = 0; i < serviceNames.length; i++) {
1209            // create the <service name="serviceName[i]" type="servicetype" />
1210            Element service = doc.createElement(GSXML.SERVICE_ELEM);
1211           
1212            Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1213            attribute.setValue(serviceNames[i]);
1214            service.setAttributeNode(attribute);
1215           
1216            attribute = doc.createAttribute(GSXML.TYPE_ATT);
1217            if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1218                attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1219            else if(serviceNames[i].contains("Query")) // search services
1220                attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1221            else
1222                attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1223            service.setAttributeNode(attribute);
1224           
1225            // add the service element to the serviceList element
1226            // <serviceList><service /></serviceList>
1227            serviceList.appendChild(service);
1228        }
1229        return serviceList;
1230    }
1231   
1232    /** @return a GS3 response message for a describe services request:
1233     * indicating the list of services supported by the Fedora-Greenstone
1234     * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1235     * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1236     * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1237     * serviceNames. */
1238    public String getServiceList()
1239    {
1240        Document doc = builder.newDocument();
1241        Element serviceList = createServiceList(doc);
1242        // make <serviceList> the body of the responseMessage:
1243        // <message><response><serviceList></response></message>
1244        Element responseMsg = createResponseMessage(doc, serviceList, null,
1245                GSXML.REQUEST_TYPE_DESCRIBE, "");
1246        try {
1247            return FedoraCommons.elementToString(responseMsg);
1248        }catch(TransformerException e) {
1249            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1250                + " " + e;
1251        }
1252    }
1253   
1254    /** @return a GS3 describe response message listing the collections and 
1255     * collection-specific metadata stored in the Fedora-Greenstone repository. */
1256    public String getCollectionList()
1257    {
1258        Document doc = builder.newDocument();
1259        FedoraGS3RunException ex = null; // any RemoteException
1260       
1261        // create the <collectionList /> element
1262        Element collectionList = doc.createElement(
1263                GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1264        try{
1265            String[] collectionNames = this.getCollectionNames(
1266                    this.getCollections()); // this line could throw RemoteException
1267            for(int i = 0; i < collectionNames.length; i++) {
1268                // create the <collection name="somename" /> element
1269                Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1270                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1271                attribute.setValue(collectionNames[i]);
1272                collection.setAttributeNode(attribute);
1273               
1274                // append the <collection> element as child of <collectionList>
1275                collectionList.appendChild(collection);
1276               
1277                //if(collection.hasAttribute(GSXML.NAME_ATT))
1278                    //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1279            }
1280        } catch(RemoteException e) { // if this happens, perhaps it's because it
1281            // can't find Greenstone collections in fedora repository?
1282            ex = new FedoraGS3RunException(e);
1283            ex.setSpecifics(
1284                "greenstone collections in fedora repository");
1285        }
1286       
1287        // make <collectionList> the body of the responseMessage:
1288        // <message><response><collectionList></response></message>
1289        Element responseMsg = createResponseMessage(doc, collectionList, ex,
1290                GSXML.REQUEST_TYPE_DESCRIBE, "");
1291        try{
1292            return FedoraCommons.elementToString(responseMsg);
1293        }catch(TransformerException e) {
1294            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1295                + " " + e;
1296        }
1297    }
1298   
1299    /** @return a GS3 describe response message for a collection in the
1300     * Fedora-Greenstone repository.
1301     * @param collectionName - the name of the collection that is to be described. 
1302     * It will be converted to a fedora collection pid, which is of the form
1303     * "greenstone:&lt;collectionName&gt;-collection". */
1304    public String describeCollection(String collectionName)
1305    {
1306        Document doc = builder.newDocument();
1307        FedoraGS3RunException ex = null;
1308       
1309        Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1310        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1311        attribute.setValue(collectionName);
1312        collection.setAttributeNode(attribute);
1313       
1314        //<displayItem assigned="true" lang="en" name="name">
1315        //"some display name"</displayItem>
1316        Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1317       
1318        attribute = doc.createAttribute(GSXML.LANG_ATT);
1319        attribute.setValue(this.lang);
1320        displayItem.setAttributeNode(attribute);
1321       
1322        attribute = doc.createAttribute(GSXML.NAME_ATT);
1323        attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1324        displayItem.setAttributeNode(attribute);
1325       
1326        try{
1327            Text textNode = doc.createTextNode(
1328                this.getCollectionTitle(getCollectionPID(collectionName)));
1329            displayItem.appendChild(textNode);
1330        } catch(Exception e) {
1331            // can't find Greenstone collections in fedora repository or problem
1332            // getting their titles from their metadata datastream?
1333            ex = new FedoraGS3RunException(e);
1334            ex.setSpecifics("greenstone collections or their metadata"
1335                    + "in the fedora repository");
1336        }
1337        // now append the displayItem element as child of the collection element
1338        collection.appendChild(displayItem);
1339        // get the <serviceList> and add it into the collection description.
1340        // Services for all collections in the FedoraGS3 repository are the
1341        // same, offering a ClassifierBrowse to browse titles by starting letter
1342        // and DocRetrieve services: Content, Metadata and Structure.
1343       
1344        Element serviceList = createServiceList(doc);
1345        collection.appendChild(serviceList);
1346       
1347        Element responseMsg = createResponseMessage(doc, collection, ex,
1348                GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1349        try{
1350            return FedoraCommons.elementToString(responseMsg);
1351        }catch(TransformerException e) {
1352            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1353                + " " + e;
1354        }
1355    }
1356   
1357    /** @return a GS3 describe response message for the services of a collection
1358     * in the Fedora-Greenstone repository. So far, these services are the same for
1359     * all fedora collections: they are the services given in member variable
1360     * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1361     * ClassifierBrowseMetadataRetrieve.   
1362     * All collections in this Digital Library (Fedora Repository) share the
1363     * same services, so this method returns the same services as getServiceList();
1364     * @param collectionName - the name of the collection whose services are to
1365     * be described. It will be converted to a fedora collection pid, which is of
1366     * the form "greenstone:&lt;collectionName&gt;-collection". */
1367    public String describeCollectionServices(String collectionName)
1368    {
1369        Document doc = builder.newDocument();
1370       
1371        Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1372        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1373        attribute.setValue(collectionName);
1374        collection.setAttributeNode(attribute);
1375       
1376        Element serviceList = createServiceList(doc);
1377        collection.appendChild(serviceList);
1378       
1379        Element responseMsg = createResponseMessage(doc, collection, null,
1380                GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1381        try{
1382            return FedoraCommons.elementToString(responseMsg);
1383        }catch(TransformerException e) {
1384            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1385                + " " + e;
1386        }
1387    }
1388   
1389    /** All collections in this Digital Library (Fedora Repository) share
1390     * the same services, so this method returns the same as
1391     * describeCollectionService(collName, serviceName).
1392     * @return a GS3 describe response message for the requested service
1393     * of the given collection. DocumentContent/Metadata/StructureRetrieve
1394     * return nothing special except their names; browse (and any query)
1395     * return more complex XML responses.
1396     * @param serviceName - the name of the service in the collection which is to
1397     * be described.*/
1398    public String describeService(String serviceName)
1399    {
1400        // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1401        // we return:
1402        // <message><response from="<name>Retrieve" type="describe">
1403        // <service name="<name>Retrieve" type="retrieve" /></response></message>
1404        // But for browse (and any query) service, we return the data necessary
1405        // for displaying it
1406       
1407        Document doc = this.builder.newDocument();
1408        Element service = doc.createElement(GSXML.SERVICE_ELEM);
1409        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1410        attribute.setValue(serviceName);
1411        service.setAttributeNode(attribute);
1412       
1413        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1414       
1415        if(serviceName.toLowerCase().endsWith("retrieve")) {
1416            attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1417        }
1418        else if(serviceName.toLowerCase().contains("browse")) {
1419            attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1420           
1421            // we need name and description <displayItem> elements
1422            Element displayItem
1423                = createNameValuePairElement(doc,
1424                        GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1425            service.appendChild(displayItem);
1426           
1427            displayItem = createNameValuePairElement(doc,
1428                    GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1429                    "Browse pre-defined classification hierarchies");
1430            service.appendChild(displayItem);
1431           
1432            // now need a classifierList
1433            Element classifierList = doc.createElement(
1434                    GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1435           
1436            int classifierNum = 1;
1437            // append a <classifier content="some letter" name="CL#">
1438            // for each letter of the alphabet:
1439            Element classifier = createClassifierElement(doc, "TitleByLetter",
1440                classifierNum++, "titles by letter", "Browse titles by letter");
1441            // now add this <classifier> to the <classifierList>
1442            classifierList.appendChild(classifier);
1443           
1444            // ANY MORE CLASSIFIERS? ADD THEM HERE
1445           
1446            service.appendChild(classifierList);
1447        } // ELSE check for whether it is a query service
1448        else if(serviceName.toLowerCase().contains("query")) {
1449            attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1450            if(serviceName.equals("TextQuery")) {
1451                describeTextQueryService(service);
1452            } else if(serviceName.equals("FieldQuery")) {
1453                describeFieldQueryService(service);
1454            }
1455        }
1456       
1457        // don't forget to add the type attribute to the service!
1458        service.setAttributeNode(attribute);
1459       
1460        String from = serviceName;
1461       
1462        Element responseMsg = createResponseMessage(doc, service, null,
1463                GSXML.REQUEST_TYPE_DESCRIBE, from);
1464        try{
1465            return FedoraCommons.elementToString(responseMsg);
1466        }catch(TransformerException e) {
1467            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1468                + " " + e;
1469        }
1470    }
1471   
1472    /** Appends children to the parameter service Element that make the
1473     * final service Element into a describe response XML for FedoraGS3's
1474     * TextQuery service.
1475     * @param service is the service Element that is being filled out. */
1476    protected void describeTextQueryService(Element service) {
1477        Document doc = service.getOwnerDocument();
1478        // we need name, submit (button) and description <displayItem> elements
1479        Element displayItem = createNameValuePairElement(doc,
1480            GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1481            "Text Search");
1482        service.appendChild(displayItem);
1483       
1484        displayItem = createNameValuePairElement(doc,
1485                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1486        service.appendChild(displayItem);
1487       
1488        displayItem = createNameValuePairElement(doc,
1489                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1490                "Title and full-text search service");
1491        service.appendChild(displayItem);
1492       
1493        //create the <paramList>
1494        Element paramList = doc.createElement(
1495            GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1496       
1497        // we ignore granularity to search at: it will always be
1498        // document and section level
1499        // we ignore casefolding: always on (that is, case is irrelevant)
1500        // we ignore document display order: always ranked
1501       
1502        // Constructing the following:
1503        // <param default="100" name="maxDocs" type="integer">
1504        // <displayItem name="name">Maximum hits to return</displayItem>
1505        // </param>
1506        Element param = doc.createElement(GSXML.PARAM_ELEM);
1507       
1508        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1509        attribute.setValue(MAXDOCS);
1510        param.setAttributeNode(attribute);
1511       
1512        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1513        attribute.setValue("100");
1514        param.setAttributeNode(attribute);
1515       
1516        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1517        attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1518        param.setAttributeNode(attribute);
1519       
1520        displayItem = createNameValuePairElement(doc,
1521                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1522                "Maximum hits to return");
1523        param.appendChild(displayItem);
1524       
1525        paramList.appendChild(param);
1526       
1527        // Constructing the following:
1528        // <param name="query" type="string">
1529        // <displayItem name="name">Query string</displayItem>
1530        // </param>
1531        param = doc.createElement(GSXML.PARAM_ELEM);
1532       
1533        attribute = doc.createAttribute(GSXML.NAME_ATT);
1534        attribute.setValue(QUERY);
1535        param.setAttributeNode(attribute);
1536       
1537        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1538        attribute.setValue(GSXML.PARAM_TYPE_STRING);
1539        param.setAttributeNode(attribute);
1540       
1541        displayItem = createNameValuePairElement(doc,
1542                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1543                "Query string");
1544        param.appendChild(displayItem);
1545       
1546        paramList.appendChild(param);
1547       
1548        service.appendChild(paramList);
1549    }
1550   
1551    /** Appends children to the parameter service Element that make the
1552     * final service Element into a describe response XML for FedoraGS3's
1553     * FieldQuery service.
1554     * @param service is the service Element that is being filled out. */
1555    protected void describeFieldQueryService(Element service) {
1556        Document doc = service.getOwnerDocument();
1557        // we need name, submit (button) and description <displayItem> elements
1558        Element displayItem = createNameValuePairElement(doc,
1559            GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1560            "Form Search");
1561        service.appendChild(displayItem);
1562       
1563        displayItem = createNameValuePairElement(doc,
1564                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1565        service.appendChild(displayItem);
1566       
1567        displayItem = createNameValuePairElement(doc,
1568                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1569                "Simple fielded search");
1570        service.appendChild(displayItem);
1571       
1572        //create the <paramList>
1573        Element paramList = doc.createElement(
1574            GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1575       
1576        // we ignore granularity to search at: it will always be
1577        // document and section level
1578        // we ignore casefolding: always on (that is, case is irrelevant)
1579        // we ignore document display order: always ranked
1580       
1581        // Constructing the following:
1582        // <param default="100" name="maxDocs" type="integer">
1583        // <displayItem name="name">Maximum hits to return</displayItem>
1584        // </param>
1585        Element param = doc.createElement(GSXML.PARAM_ELEM);
1586       
1587        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1588        attribute.setValue(MAXDOCS);
1589        param.setAttributeNode(attribute);
1590       
1591        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1592        attribute.setValue("100");
1593        param.setAttributeNode(attribute);
1594       
1595        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1596        attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1597        param.setAttributeNode(attribute);
1598       
1599        displayItem = createNameValuePairElement(doc,
1600                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1601                "Maximum hits to return");
1602        param.appendChild(displayItem);
1603       
1604        paramList.appendChild(param);
1605       
1606        // Constructing the following:
1607        // <param name="simpleField" occurs="4" type="multi">
1608        // <displayItem name="name"></displayItem>
1609        //
1610        // <param name="query" type="string">
1611        //   <displayItem name="name">Word or phrase </displayItem>
1612        // </param>
1613        //
1614        // <param default="allFields" name="fieldname" type="enum_single">
1615        //   <displayItem name="name">in field</displayItem>
1616        //   
1617        //   <option name="docTitles">
1618        //     <displayItem name="name">document titles</displayItem>
1619        //   </option>
1620        //   <option name="allTitles">
1621        //     <displayItem name="name">document and section titles</displayItem>
1622        //   </option>
1623        //   <option name="fullText">
1624        //     <displayItem name="name">full text</displayItem>
1625        //   </option>
1626        //   <option name="all">
1627        //     <displayItem name="name">titles and full text</displayItem>
1628        //   </option>
1629        //   <option name="">
1630        //     <displayItem name="name"></displayItem>
1631        //       </option>
1632        //  </param>
1633        // </param>
1634        Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM); 
1635        attribute = doc.createAttribute(GSXML.NAME_ATT);
1636        attribute.setValue(SIMPLEFIELD_ATT);
1637        rowOfParams.setAttributeNode(attribute);
1638       
1639        // we want the row of controls to occur multiple times
1640        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1641        attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1642        rowOfParams.setAttributeNode(attribute);
1643       
1644        attribute = doc.createAttribute(OCCURS_ATT);
1645        attribute.setValue("4"); // we want this row to occur 4 times
1646        rowOfParams.setAttributeNode(attribute);
1647       
1648        // <param name="query" type="string">
1649        //   <displayItem name="name">Word or phrase </displayItem>
1650        // </param>
1651        param = doc.createElement(GSXML.PARAM_ELEM);
1652       
1653        attribute = doc.createAttribute(GSXML.NAME_ATT);
1654        attribute.setValue(QUERY);
1655        param.setAttributeNode(attribute);
1656       
1657        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1658        attribute.setValue(GSXML.PARAM_TYPE_STRING);
1659        param.setAttributeNode(attribute);
1660       
1661        displayItem = createNameValuePairElement(doc,
1662                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1663                "Word or phrase");
1664        param.appendChild(displayItem);
1665        rowOfParams.appendChild(param);
1666       
1667        // <param default="allFields" name="fieldName" type="enum_single">
1668        //   <displayItem name="name">in field</displayItem>
1669        param = doc.createElement(GSXML.PARAM_ELEM);
1670        attribute = doc.createAttribute(GSXML.NAME_ATT);
1671        attribute.setValue(FIELDNAME_ATT);
1672        param.setAttributeNode(attribute);
1673       
1674        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1675        attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1676        param.setAttributeNode(attribute);
1677       
1678        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1679        attribute.setValue(ALL_FIELDS);
1680        param.setAttributeNode(attribute);
1681       
1682        displayItem = createNameValuePairElement(doc,
1683                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1684                "in field");
1685        param.appendChild(displayItem);
1686       
1687        String[] searchFieldNames
1688            = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1689        String[] searchFieldDisplay  = {"all titles and full-text",
1690            "document titles only", "document and section titles",
1691            "full-text only"};
1692       
1693        // for each fieldName create an option element and insert
1694        // the option into the enum_multi drop-down param:
1695        // <option name="fieldName">
1696        //   <displayItem name="name">fieldName</displayItem>
1697        // </option>
1698        for(int i = 0; i < searchFieldNames.length; i++) {
1699            Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1700            attribute = doc.createAttribute(GSXML.NAME_ATT);
1701            attribute.setValue(searchFieldNames[i]);
1702            option.setAttributeNode(attribute);
1703           
1704            displayItem = createNameValuePairElement(doc,
1705                    GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1706                    searchFieldDisplay[i]);
1707            option.appendChild(displayItem);
1708            param.appendChild(option); // add option to the drop-down box
1709        }
1710       
1711        rowOfParams.appendChild(param);
1712        paramList.appendChild(rowOfParams);
1713        service.appendChild(paramList);
1714    }
1715   
1716    /**
1717     * @return a GS3 describe response message for the requested service
1718     * of the given collection. DocumentContent/Metadata/StructureRetrieve
1719     * return nothing special except their names; browse (and any query)
1720     * return more complex XML responses.
1721     * All collections in this Digital Library (Fedora Repository) share
1722     * the same services, so this method returns the same as
1723     * describeService(serviceName).   
1724     * @param collectionName - the name of the collection whose service is to
1725     * be described. It will be converted to a fedora collection pid, which is of
1726     * the form "greenstone:&lt;collectionName&gt;-collection".
1727     * @param serviceName - the name of the service in the collection which is to
1728     * be described. */
1729    public String describeCollectionService(String collectionName,
1730            String serviceName) {
1731        // collectionName can be ignored, because all services are FedoraGS3
1732        // services and are not unique to any particular (greenstone) collection.
1733        return describeService(serviceName);
1734    }
1735   
1736    /** This method performs the implemented browse operation: allowing the
1737     * user to browse the titles of documents in the given collection by letter
1738     * and returning the results.
1739     * @param collectionName is the name of the collection whose documents
1740     * starting with the given letter will be returned.
1741     * @param classifierIDs are the ids of the classifiers on which to browse. In
1742     * this case, the classifier indicates whether we browse titles by letter, or
1743     * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
1744     * @param structures - the requested browse substructure. Can be any combination
1745     * of ancestors, parent, siblings, children, descendants.
1746     * @param infos - the requested structural info. Can be numSiblings,
1747     * siblingPosition, numChildren.
1748     * @return a GS3 ClassifierBrowse response message which lists all
1749     * the documents that start with the letter indicated by parameter classifier.
1750    */
1751    public String browse(String collectionName, String[] classifierIDs,
1752                 String[] structures, String[] infos)
1753    {
1754        // Construct one string from the structures and structural info arrays
1755        String structure = "";
1756        String info = "";
1757        for(int i = 0; i < structures.length; i++) {
1758        structure = structure + structures[i] + "|";
1759        }
1760        for(int i = 0; i < infos.length; i++) {
1761        info = info + infos[i] + "|";
1762        }
1763       
1764        Document doc = builder.newDocument();
1765        FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1766       
1767        // <classifierNodeList>
1768        Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1769       
1770        for(int i = 0; i < classifierIDs.length; i++) {
1771        if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1772            browseTitlesByLetterClassifier(doc, classifierNodeList,
1773                           collectionName, classifierIDs[i],
1774                           structure, info);           
1775        }
1776        }
1777
1778        Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1779                    GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
1780        try {
1781        return FedoraCommons.elementToString(responseMsg);
1782        } catch(TransformerException e) {
1783        return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1784            + " " + e;
1785        }
1786    }
1787
1788    /** CL1 browsing classifier: browsing titles by starting letter.
1789     * The browsing structure is retrieved.
1790     * @param doc - the document object that will contain the CL1 browsing structure.
1791     * @param classifierNodeList - the classifiers will be added to this nodeList.
1792     * @param collectionName - name of the collection through which we are browsing CL1.
1793     * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1794     * a letter.
1795     * @param structure - the requested browse substructure. Can be any combination of
1796     * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1797     * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1798     * numChildren.
1799     * @return the classifierNodeList with the CL1 classifier browse structure.
1800     */
1801    public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1802                         String collectionName, String classifierID,
1803                         String structure, String info)
1804    {
1805    FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1806
1807    if(structure.indexOf("entire") != -1) {
1808        structure = structure + "ancestors|descendants";
1809    }
1810   
1811    // Structure of ancestors and children only at this stage
1812    int firstLevel = classifierID.indexOf('.');
1813    int secondLevel = classifierID.lastIndexOf('.');
1814   
1815    // <nodeStructure>
1816    Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1817
1818    // requested classifier node
1819    Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1820    Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1821    attribute.setValue(classifierID);
1822    classNode.setAttributeNode(attribute);
1823    Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1824    typeAttribute.setValue(GSXML.VLIST);
1825    classNode.setAttributeNode(typeAttribute);
1826
1827    if(firstLevel == -1) { // CL1 - toplevel node     
1828        Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1829
1830        classifierNodeList.appendChild(classNode);
1831        classNode.appendChild(nodeStructure);
1832       
1833        nodeStructure.appendChild(root);
1834        if(structure.indexOf("descendants") != -1) {
1835        getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1836        } else if(structure.indexOf("children") != -1) {
1837        getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1838        }
1839        // nothing to be done for siblings
1840    }
1841    else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1842       
1843        if(structure.indexOf("parent") != -1
1844           || structure.indexOf("ancestors") != -1
1845           || structure.indexOf("siblings") != -1) {
1846        String toplevelID = classifierID.substring(0, firstLevel);
1847        Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1848        attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1849        attribute.setValue(toplevelID);
1850        toplevelNode.setAttributeNode(attribute);
1851        typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1852        typeAttribute.setValue(GSXML.VLIST);
1853        toplevelNode.setAttributeNode(typeAttribute);
1854        Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1855
1856        classifierNodeList.appendChild(toplevelNode);
1857        toplevelNode.appendChild(nodeStructure);       
1858        nodeStructure.appendChild(node);
1859
1860        if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1861            getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1862            // pass the requested node (classNode) so that it is attached in the correct
1863            // location among its siblings, and to ensure that it is not recreated.
1864            // getTitlesByLetterStructure() will append classNode to node
1865        } else {
1866            node.appendChild(classNode);
1867        }
1868        } else {
1869        Element node = (Element)classNode.cloneNode(true);
1870        classifierNodeList.appendChild(node);
1871        node.appendChild(nodeStructure);
1872        nodeStructure.appendChild(classNode);
1873        }
1874       
1875        int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1876        char ch = (char)(num - 1 + 'A');
1877        if(structure.indexOf("descendants") != -1) {
1878        getTitlesForLetter(ch, collectionName, classNode, "descendants");
1879        } else if(structure.indexOf("children") != -1) {
1880        getTitlesForLetter(ch, collectionName, classNode, "children");
1881        }
1882    }
1883    else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1884        LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1885    }
1886
1887    return classifierNodeList;
1888    }
1889
1890    /** Creates a (CL1) subclassifier element for the docs whose titles start with
1891     * the given letter.
1892     * @param ch - the starting letter of the document titles to retrieve.
1893     * @param collectionName - name of the collection through which we are browsing CL1.
1894     * @param classifierNode - the docNodes found will be appended to this node.
1895     * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1896     * gets descendants of any documents found, otherwise gets just the children.
1897     * @return the given classifierNode which will have the child (or descendant) documents
1898     * appended to it.
1899     */
1900    public Element getTitlesForLetter(char ch, String collectionName,
1901                      Element classifierNode, String depthStructure)
1902    {
1903    Document doc = classifierNode.getOwnerDocument();
1904    FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1905
1906   
1907    // Retrieve the document structure for each subClassifierID:
1908    // all the documents that begin with its letter.
1909    String letter = String.valueOf(ch);
1910    try {
1911        String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1912        if(docPIDs.length == 0) {
1913        return classifierNode; // skip letters that don't have any kids
1914        }       
1915       
1916        for(int i = 0; i < docPIDs.length; i++) {
1917        // work out the document's fedora PID and section ID
1918        String sectionID = getSectionIDFromDocID(docPIDs[i]);
1919        String docPID = getDocPIDFromDocID(docPIDs[i]);
1920       
1921        // get the required section, along with children or descendants
1922        Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1923       
1924        // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1925        Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);       
1926       
1927        // fills in the subtree of the rootNode in our nodeStructure element
1928        createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1929        classifierNode.appendChild(docRootNode);
1930        }
1931    } catch(Exception e) {
1932        ex = new FedoraGS3RunException(e);
1933        ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1934    }
1935
1936    return classifierNode;
1937    }
1938
1939
1940    /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1941     * starting letter of the alphabet. X is each letter of the alphabet for which there
1942     * are matching document titles.
1943     * @param collectionName - name of the collection through which we are browsing CL1.
1944     * @param classifierNode - the docNodes found will be appended to this node.
1945     * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1946     * the IDs for the subclassifiers (CL.x).
1947     * @param getDescendants - if true, get descendants of any documents found, otherwise
1948     * get just the children.
1949     * @param wantedSibling - the node (already created) whose siblings are requested. We
1950     * need to make sure not to recreate this node when creating its sibling nodes.
1951     * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1952     * the alphabet that are represented in the document titles.
1953     */
1954    public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1955                          String classifierID, boolean getDescendants,
1956                          Element wantedSibling)
1957    {   
1958    String ID = "";
1959    if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1960        ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1961    }
1962   
1963    Document doc = classifierNode.getOwnerDocument();
1964    FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1965
1966    // We're going to loop to the end of the alphabet
1967    int count = 1;
1968    for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1969        // Retrieve the document structure for each subClassifierID:
1970        // all the documents that begin with its letter.
1971        String letter = String.valueOf(ch);
1972        try {
1973        String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1974        if(docPIDs.length == 0) {
1975            continue; // skip letters that don't have any kids
1976        }
1977        Element subClassifier = null;
1978        if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1979                // already have the requested node, don't recreate it
1980            subClassifier = wantedSibling;
1981        } else {
1982            // <classifierNode childType="VList" nodeID="CL1.x">
1983            subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
1984            Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1985            typeAttribute.setValue(GSXML.VLIST);
1986            subClassifier.setAttributeNode(typeAttribute);
1987            Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1988            attribute.setValue(classifierID+"."+count);
1989            subClassifier.setAttributeNode(attribute);
1990        }
1991        classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
1992       
1993        if(getDescendants) { // get the documents
1994
1995            // append the <docNodes> for the docPIDs found as children
1996            // of subclassifier
1997
1998            for(int i = 0; i < docPIDs.length; i++) {
1999            // work out the document's fedora PID and section ID
2000            String sectionID = getSectionIDFromDocID(docPIDs[i]);
2001            String docPID = getDocPIDFromDocID(docPIDs[i]);
2002       
2003            // get the required section, along with children or descendants
2004            Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2005
2006            // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2007            Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2008           
2009            // fills in the subtree of the rootNode in our nodeStructure element
2010            createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2011            subClassifier.appendChild(rootNode);
2012            }
2013        }
2014        } catch(Exception e) {
2015        ex = new FedoraGS3RunException(e);
2016        ex.setSpecifics("requested portion of TOC file or "
2017                + "trouble with fielded search ");
2018        }
2019    }
2020    return classifierNode;
2021    }
2022
2023   
2024    /** This method performs something equivalent to a greenstone3
2025     * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2026     * @param classNodeIDs array of classifierNode IDs for which the metadata
2027     * needs to be returned.
2028     * @param metafields are the classifier metadata fields that are to be returned.
2029     * At present this method ignores them/pretends the requested metafields are
2030     * "all" and always returns the Title meta for the requested classifier nodes
2031     * (because that is all the metadata this Fedora classifier has at present).
2032     * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2033     * lists the metadata for all the classifierNodes passed as parameter.*/
2034    public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2035    {
2036        Document doc = this.builder.newDocument();
2037        // <classifierNodeList>
2038        Element classifierNodeList = doc.createElement(
2039                GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2040       
2041        // create <classifierNode><metadataList><metadata>s
2042        // </metadataList></classifierNode> for all letters of the alphabet
2043        for(int i = 0; i < classNodeIDs.length; i++) {
2044            // strip ID of everything before the first '.' (i.e. remove "CL#.")
2045            int index = classNodeIDs[i].indexOf('.');
2046            String subClassifierNumber = classNodeIDs[i].substring(index+1);
2047            index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2048            if(index != -1) {
2049                subClassifierNumber = subClassifierNumber.substring(0, index);
2050            }
2051            int subClassifierNum = Integer.parseInt(subClassifierNumber);
2052            String classifierName = "";
2053            if(subClassifierNum == 0) { // no document titles started with a letter
2054                classifierName = "A-Z";
2055            } else {
2056                char letter = (char)('A' + subClassifierNum - 1); // A = 1
2057                classifierName = String.valueOf(letter);
2058            }
2059           
2060            // <classifierNode nodeID="CL#.subNum">
2061            Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2062            Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2063            attribute.setValue(classNodeIDs[i]);
2064            classifierNode.setAttributeNode(attribute);
2065           
2066            // <metadataList>
2067            Element metadataList = doc.createElement(
2068                    GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2069           
2070            // at least one metadata element: that of the title of this
2071            // classifierNode:
2072            // <metadata name="Title">letter</metadata>
2073            Element metadata = this.createNameValuePairElement(doc,
2074                    GSXML.METADATA_ELEM, "Title", classifierName);
2075           
2076            // now connect up everything
2077            metadataList.appendChild(metadata);
2078            classifierNode.appendChild(metadataList);
2079            classifierNodeList.appendChild(classifierNode);
2080        }
2081       
2082        Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2083                GSXML.REQUEST_TYPE_PROCESS, //collName +
2084                "ClassifierBrowseMetadataRetrieve");
2085        try{
2086            return FedoraCommons.elementToString(responseMsg);
2087        }catch(TransformerException e) {
2088            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2089                + " " + e;
2090        }
2091    }
2092   
2093    /** @return a newly created element of the following format:
2094     * &lt;classifier content="somecontent" name="CL+num"&gt;
2095     *      &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2096     *      &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2097     * &lt;/classifier&gt;
2098     * @param doc - the document used to create the element
2099     * @param content - value of the content attribute
2100     * @param classifierNum - the number suffixed to the CL, together forming
2101     * the classifier Node's ID
2102     * @param displayNameVal is the bodytext of a named displayItem element
2103     * @param displayDescrVal is the bodytext of a displayItem element with
2104     * description */
2105    protected Element createClassifierElement(Document doc, String content,
2106            int classifierNum, String displayNameVal, String displayDescrVal)
2107    {
2108        final String CL = "CL";
2109        Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2110        // content attribute
2111        Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2112        att.setValue(content);
2113        classifier.setAttributeNode(att);
2114        // name attribute
2115        att = doc.createAttribute(GSXML.NAME_ATT);
2116        att.setValue(CL + classifierNum);
2117        classifier.setAttributeNode(att);
2118       
2119        // now create the displayItem children for classifier:
2120        // <displayItem name="name">#letter</displayItem>
2121        // <displayItem name="description">Browse titles starting with #letter</displayItem>
2122        Element displayItem = createNameValuePairElement(doc,
2123                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2124        classifier.appendChild(displayItem);
2125        displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2126                GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2127        classifier.appendChild(displayItem);
2128       
2129        return classifier;
2130    }
2131   
2132       
2133    /** @return a newly created element of the following format:
2134     * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2135     * @param doc - the document used to create the element
2136     * @param elementName - the tag name
2137     * @param name - value of attribute name
2138     * @param value - the body text of the element */
2139    protected Element createNameValuePairElement(Document doc, String elementName,
2140            String name, String value) {
2141        // <elementName name="somename">"some display value"</elementName>
2142        Element element = doc.createElement(elementName);
2143        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2144        attribute.setValue(name);
2145        element.setAttributeNode(attribute);
2146       
2147        element.appendChild(doc.createTextNode(value));
2148        return element;
2149    }
2150   
2151    /**
2152     * @param collection is the collection to search in
2153     * @param query is the query term to search for. It won't specify the
2154     * indexed field to search in, which will mean that GSearch will
2155     * search all default indexed fields.
2156     * @param maxDocs is the maximum number of results to return (which
2157     * at present we consider equivalent to FedoraGSearch's hitpageSize).   
2158    */
2159    public String[] textQuery(String collection, String query,
2160            int maxDocs)
2161        throws Exception
2162    {
2163        // no need to search there is no query or query is empty spaces
2164        if(query.trim().equals(""))
2165            return new String[]{};
2166       
2167        // QUERY value won't specify indexed field to search, Fedora
2168        // Gsearch will take that as meaning all default indexed fields.
2169        // Params to search() method below: string of fielded query terms; 
2170        // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2171        query = query + " " + "PID" + COLON + GREENSTONE;
2172       
2173        String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2174        // now we have the XML returned by FedoraGSearch, get the pids
2175        // of the documents returned (if any)
2176        String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2177                collection, searchResult);
2178        return pids;
2179    }
2180   
2181    /**
2182     * This method performs a fieldquery, searching for x number of phrases
2183     * in each of the 4 indexed fields. 
2184     * @param collection is the collection to search in
2185     * @param nameValParamsMap is a Map of several(key, value) entries,
2186     * 4 of which we're concerned with here:
2187     * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2188     * - the values are a comma separated list of terms (phrases or single
2189     * words) to search that field in. There may be more than 1 or
2190     * there may be none (in which case there may be N empty values or
2191     * spaces separated by commas).
2192     * @param maxDocs is the maximum number of results to return (which
2193     * at present we consider equivalent to FedoraGSearch's hitpageSize).   
2194     * */
2195    public String[] fieldQuery(String collection, Map nameValParamsMap,
2196            int maxDocs)
2197        throws Exception
2198    {
2199        // we're going to maintain a list of UNIQUE pids that were returned
2200        // in search results. Hence we use Set:
2201        java.util.Set set = new java.util.HashSet();
2202       
2203        // (1) Use Fedora's search to search document titles, if they were
2204        // specified:
2205        String[] docTitlepids = {};
2206       
2207        String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2208        if(docTitleTerms != null) { // no doc titles may have been specified
2209            String[] phrases = docTitleTerms.split(COMMA);
2210           
2211            // search the individual phrases first:
2212            for(int i = 0; i < phrases.length; i++) {
2213                if(phrases.equals("") || phrases.equals(" "))
2214                    continue; //skip when there are no terms
2215                docTitlepids = this.searchDocumentTitles(
2216                        collection, phrases[i], false);
2217                for(int j = 0; j < docTitlepids.length; j++)
2218                    set.add(docTitlepids[j]);
2219            }
2220        }
2221        // (2) use FedoraGSearch to search doc AND section titles, and
2222        // fulltext (in case these were specified in nameValParamsMap):
2223        String searchResult = this.fedoraGSearch.search(
2224                nameValParamsMap, 1, maxDocs);
2225       
2226        String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2227                collection, searchResult);
2228       
2229        for(int i = 0; i < pids.length; i++)
2230            set.add(pids[i]);
2231       
2232        pids = null;
2233        pids = new String[set.size()];
2234        set.toArray(pids); // unique pids
2235        return pids;
2236    }
2237   
2238    /** @return a String representing Greenstone3 XML for a query process
2239     * response returning the results for the query denoted by parameter
2240     * nameValParamsMap.
2241     * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2242     * query field data values. The names match the field names that
2243     * describeCollectionService() would have returned for the query service.
2244     * @param collection is the name of the collection
2245     * @param service is the name of the query service
2246     * This method is only ever called when any of the services in the digital
2247     * library described themselves as type=query. Therefore any digital
2248     * libraries that have no query services, can just return emtpy message
2249     * strings (or even "") since this method will never be called on them
2250     * anyway. */
2251    public String query(String collection, String service,
2252            Map nameValParamsMap)
2253    {
2254        FedoraGS3RunException ex = null;
2255        // (1) obtain the requested number of maximum result documents
2256        int maxDocs = 100;
2257        try{
2258            maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2259        } catch(NumberFormatException e) {
2260            maxDocs = 100;
2261        }
2262       
2263        String pids[] = {};
2264        // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2265        if(service.endsWith("TextQuery")) {
2266            try {
2267                // get the Query field:
2268                String query = (String)nameValParamsMap.get(QUERY);
2269                pids = textQuery(collection, query, maxDocs);
2270            }
2271            catch(Exception e) {
2272                LOG.error("Error in TextQuery processing: " + e);
2273                ex = new FedoraGS3RunException(
2274                    "When trying to use FedoraGenericSearch for a TextQuery", e);
2275               
2276            }
2277        } else { // (3) FieldQuery
2278            // first get the comma-separated lists
2279            String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2280            String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2281            // both are comma separated lists, so split both on 'comma'
2282            String[] fieldNames = listOfFieldNames.split(COMMA);
2283            String[] searchTerms = listOfSearchTerms.split(COMMA);
2284           
2285            // In the fieldNames and searchTerms lists of nameValParamsMap,
2286            // each searchTerm element was matched with its correspondingly
2287            // indexed fieldName.
2288            // A new map is going to reorganise this, by putting all terms
2289            // for a particular fieldName together in a comma separated list
2290            // and associating that with the fieldName. I.e. (key, value) ->
2291            // (fieldName, comma-separated list of all terms in that field)
2292            Map map = new HashMap();
2293            for(int i = 0; i < searchTerms.length; i++) {
2294                // there may be fewer searchTerms than fieldNames (since some
2295                // fieldNames may have been left empty), so loop on searchTerms
2296                if(map.containsKey(fieldNames[i])) { // fieldName is already
2297                    // in the list, so append comma with new value
2298                    String termsList = (String)map.get(fieldNames[i]);
2299                    termsList = termsList + COMMA + searchTerms[i];
2300                    map.put(fieldNames[i], termsList);
2301                } else { // this is the first time this fieldName occurred
2302                    // just put the fieldName with searchTerm as-is
2303                    map.put(fieldNames[i], searchTerms[i]);
2304                }
2305            }
2306           
2307            try {
2308                // For fieldquery, we search on all the fieldNames specified
2309                // - if DOC_TITLES is specified then we use Fedora's search
2310                // - for all other fieldNames specified, we use FedoraGSearch
2311                pids = fieldQuery(collection, map, maxDocs);
2312            }
2313            catch(Exception e) {
2314                LOG.error("Error in FieldQuery processing: " + e);
2315                ex = new FedoraGS3RunException(
2316                    "When trying to use FedoraGenericSearch for a FieldQuery", e);
2317            }
2318        }
2319       
2320        // Build Greenstone XML Query response message for from
2321        // the pids (which should be document identifiers)
2322        Document doc = builder.newDocument();
2323        // <metadataList><metadata name="numDocsMatched" value="n" />
2324        // </metadataList>
2325        Element metadataList = doc.createElement(
2326                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2327        Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2328       
2329        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2330        attribute.setValue(NUM_DOCS_MATCHED);
2331        metadata.setAttributeNode(attribute);
2332       
2333        attribute = doc.createAttribute(GSXML.VALUE_ATT);
2334        attribute.setValue(Integer.toString(pids.length));
2335        metadata.setAttributeNode(attribute);
2336       
2337        metadataList.appendChild(metadata);
2338       
2339        // <documentNodeList>
2340        // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2341        // docType='hierarchy' nodeType="leaf" />
2342        // ...
2343        // ...
2344        // </documentNodeList>
2345        Element docNodeList = doc.createElement(
2346                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2347        // for each
2348        for(int i = 0; i < pids.length; i++) {
2349            Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2350            attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2351            attribute.setValue(pids[i]);
2352            docNode.setAttributeNode(attribute);
2353           
2354            attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2355            attribute.setValue("hierarchy");
2356            docNode.setAttributeNode(attribute);
2357           
2358            attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2359            attribute.setValue("root");
2360            docNode.setAttributeNode(attribute);
2361            docNodeList.appendChild(docNode);
2362        }
2363       
2364        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2365                GSXML.REQUEST_TYPE_PROCESS, service);
2366        try{
2367            return FedoraCommons.elementToString(responseMsg);
2368        }catch(TransformerException e) {
2369            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2370                + " " + e;
2371        }
2372    }
2373
2374   
2375    // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2376    /** Given a URL that represents a fedoraPID, will look up the object.
2377     * If it exists, it will return the contents of the DC:Title of its datastream.
2378     * If it doesn't exist, it will return the URL as-is.
2379     * @param URL: the URL that (after modification) represents a fedoraPID to look up.
2380     * @param collection: the name of collection in which to search for the URL
2381     * representing a fedoraPID.
2382     * @return the string (representing a fedoraPID) stored in the DC:Title of the
2383     * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2384     * then the parameter URL is returned.
2385    */
2386    public String getPIDforURL(String url, String collection) {
2387    FedoraGS3RunException ex = null; // any RemoteException
2388
2389    // (1) convert url to the fedorapid
2390    // / -> _ and : -> -
2391    String fedoraPID = url.replaceAll("/", "_");
2392    fedoraPID = fedoraPID.replaceAll(":", "-");
2393    // prefix "greenstone-http:<colname>-" to the fedoraPID
2394    fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2395    //LOG.error("### fedoraPID: " + fedoraPID);
2396
2397    // (2) Look up the datastream for the fedorapid
2398    String dcTitle = "";
2399    try {
2400        dcTitle = getDCTitle(fedoraPID);
2401    } catch(Exception e) {
2402        LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2403        ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2404    }
2405    //String dc = this.getDC(fedoraPID);
2406    //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2407
2408    // (3) if fedorapid exists, extract the dc:title content.
2409    // if it doesn't exist, return url
2410    if(dcTitle.equals("")) {       
2411        return url;
2412    } else {
2413        // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2414        //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2415        return dcTitle+"-1";
2416    }
2417    }
2418   
2419    public static void main(String args[]) {
2420        try{
2421            // testing default constructor
2422            //FedoraGS3Connection con = new FedoraGS3Connection();
2423           
2424            // testing constructor that takes properties file to show initial
2425            // fedora server values
2426            java.io.File propertyFilename
2427                = new java.io.File("fedoraGS3.properties");
2428            FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2429           
2430            // DESCRIBE: serviceList, collectionList
2431            System.out.println("serviceList:\n" + con.getServiceList());
2432           
2433            System.out.println("collectionList:\n" + con.getCollectionList());
2434           
2435            String[] colPIDs = con.getCollections();
2436            String[] collectionNames = con.getCollectionNames(con.getCollections());
2437           
2438           
2439            for(int i = 0; i < collectionNames.length; i++) {
2440                System.out.println("Describing collections:\n");
2441                System.out.println(con.describeCollection(collectionNames[i]));
2442                System.out.println("Describing collection services:\n"
2443                    + con.describeCollectionServices(collectionNames[i]));
2444            }
2445           
2446            String[] serviceNames = con.getServiceNames();
2447            for(int i = 0; i < serviceNames.length; i++) {
2448                System.out.println("Describing " + serviceNames[i] + ":\n"
2449                    + con.describeCollectionService("demo", serviceNames[i]));
2450            }
2451           
2452                       
2453            // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2454            // along with EX of the top-level document:
2455            System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2456            System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2457           
2458                       
2459            String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2460            System.out.println("\nGET CONTENT:");
2461            for(int i = 0; i < docIDs.length; i++) {
2462                System.out.println(con.getContent(docIDs[i]));
2463            }
2464           
2465            System.out.println("\nGET META:");
2466            for(int i = 0; i < docIDs.length; i++) {
2467                System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2468            }
2469           
2470            String[] getTitlesFor = {
2471                    "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2472                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2473                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2474                    "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2475                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2476            };
2477           
2478            // first let's display the regular meta for top-level docs and
2479            // their sections
2480            for(int i = 0; i < getTitlesFor.length; i++) {
2481                System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2482            }
2483           
2484            System.out.println("\nTitles are:");
2485            System.out.println(con.getTitleMetadata(getTitlesFor));
2486           
2487            System.out.println("\nGET STRUCTURE:");
2488            for(int i = 0; i < docIDs.length; i++) {
2489                System.out.println("Descendents and numChildren:\n"
2490                           + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2491                System.out.println("Parent and numSiblings:\n"
2492                           + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
2493            }
2494           
2495            // TEST ERROR CASES:
2496            System.out.println("\nTESTING ERROR CASES");
2497            System.out.println(con.getContent("greenstone:demo-pinky"));
2498            String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2499            "greenstone:demo-pinky" };
2500            System.out.println(con.getContent(errorCases));
2501            System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2502                           System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2503           
2504            System.out.println("\nCLASSIFIER BROWSE");
2505            System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2506                              new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2507           
2508            System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2509            String[] classNodeIDs = new String[26];
2510            for(int i = 0; i < classNodeIDs.length; i++) {
2511                int subClassifierNum = i + 1;
2512                classNodeIDs[i] = "CL1." + subClassifierNum;
2513            }
2514            System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2515                                      classNodeIDs, new String[]{"all"}));
2516           
2517            System.out.println("Testing query services");
2518            System.out.println("TEXT QUERY:");
2519            Map formControlValsMap = new HashMap();
2520            formControlValsMap.put(MAXDOCS, "100");
2521            formControlValsMap.put(QUERY, "snails");
2522            String searchResponse
2523                = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2524            System.out.println(searchResponse);
2525           
2526            System.out.println("FIELD QUERY:");
2527            formControlValsMap.clear();
2528            formControlValsMap.put(MAXDOCS, "100");
2529            formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2530            formControlValsMap.put(FIELDNAME_ATT,
2531                    "allFields,docTitles,allFields,allFields");
2532            searchResponse
2533                = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2534            System.out.println(searchResponse);
2535           
2536            System.exit(0);
2537        }catch(Exception e) {
2538            JOptionPane.showMessageDialog(
2539                    null, e, "Error", JOptionPane.ERROR_MESSAGE);
2540            //System.err.println("ERROR: " + e);
2541            e.printStackTrace();
2542        }
2543    }
2544}
Note: See TracBrowser for help on using the browser.