root/other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java @ 26286

Revision 26286, 106.7 KB (checked in by ak19, 6 years ago)

GSearch works again with the latest version of Fedora and FedoraGSearch (3.6.1 and 2.5 respectively).

Line 
1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the  * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
32import org.greenstone.gsdl3.util.GSXML;
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.Attr;
36import org.w3c.dom.Text;
37import org.w3c.dom.NodeList;
38import org.w3c.dom.Node;
39import org.xml.sax.InputSource;
40
41import java.io.File;
42import java.util.HashMap;
43import java.util.Properties;
44import java.util.Map;
45
46import javax.swing.JOptionPane;
47
48import org.xml.sax.SAXException;
49import java.io.UnsupportedEncodingException;
50import java.io.IOException;
51import javax.net.ssl.SSLHandshakeException;
52import java.net.Authenticator;
53import java.net.ConnectException;
54import java.net.MalformedURLException;
55import java.net.PasswordAuthentication;
56import java.rmi.RemoteException;
57import javax.xml.parsers.ParserConfigurationException;
58import javax.xml.transform.TransformerException;
59
60/**
61 * Class that extends FedoraConnection in order to be able to use
62 * Fedora's web services to retrieve the specific datastreams of
63 * Greenstone documents stored in Fedora's repository. This class
64 * provides methods that convert those datastreams into Greenstone3
65 * XML response messages which are returned.
66 * @author ak19
67*/
68public class FedoraGS3Connection
69    extends FedoraConnection implements FedoraToGS3Interface,
70        FedoraToGS3Interface.Constants 
71{
72    /** The logging instance for this class */
73    private static final Logger LOG = Logger.getLogger(
74            FedoraGS3Connection.class.getName());
75   
76    /** Default name of Fedora index */
77    private static final String DEFAULT_FEDORA_INDEX = "FgsIndex"; //"BasicIndex" for older versions of GSearch
78   
79    /** Complete list of services that our FedoraGS3 would support
80     * if everything goes well. If a connection to FedoraGSearch
81     * cannot be established, the query services will no longer be
82     * available. The actual services supported are given by member
83     * variable serviceNames. */
84    protected static final String[] SERVICES = {
85        "DocumentContentRetrieve", "DocumentMetadataRetrieve",
86        "DocumentStructureRetrieve",
87        "TextQuery", "FieldQuery",
88        "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
89    };
90   
91    /** List of services actually supported by our FedoraGS3 repository
92     * after construction. If FedoraGenericSearch can't be connected to,
93     * then query services will not be offered */
94    protected String[] serviceNames;
95   
96    /** The object used to connect to FedoraGenericSearch, which is used
97     * for doing full-text searching */
98    protected GSearchConnection fedoraGSearch;
99
100    /** The url for the wsdl file of FedoraGSearch's web services
101     * by default this will be the Fedora server's base URL
102     * concatenated to "gsearch/services/FgsOperations?wsdl" */
103    protected String gSearchWSDLURL;
104       
105    /** The last part of the gSearchWSDL URL. The first part is
106     * the same as the fedora server's base url. */
107        protected String gSearchWSDLSuffix;
108           
109    /** The name of the index that FedoraGSearch will index the GS3
110     * documents into. If no name is specified in the properties file,
111     * this will default to FedoraIndex. */
112    protected String gSearchIndexName;
113   
114    /** 5 argument constructor is the same as that of superclass FedoraConnection:
115     * @param protocol can be either http or https
116     * @param host is the host where the fedora server is listening
117     * @param port is the port where the fedora server is listening
118     * @param fedoraServerUsername is the username for administrative
119     * authentication required to access the fedora server.
120     * @param fedoraServerPassword is the password for administrative
121     * authentication required to access the fedora server. If no password was set
122     * when installing Fedora, leave the field "".
123     * Instantiates a FedoraGS3Connection object which connects to Fedora's
124     * web services through stub classes and tries to connect to FedoraGSearch's
125     * web services through the default WSDL location for it
126     * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
127     * call setGSearchWSDLURL(url) after the constructor instead.
128    */
129    public FedoraGS3Connection(String protocol, String host, int port,
130            String fedoraServerUsername, String fedoraServerPassword)
131        throws ParserConfigurationException, MalformedURLException,
132            SSLHandshakeException, RemoteException, AuthenticationFailedException,   
133                NotAFedoraServerException, ConnectException, Exception 
134    {
135        super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
136        // super() will call setInitialisationProperties(properties)
137        // And that will try to instantiate the GSearchConnection.
138    }
139   
140    /** No-argument constructor which is the same as that of superclass
141     * FedoraConnection: it displays a small dialog requesting input for the
142     * host, port, administrative password and username of the fedora server.
143     * If no password was set on the fedora repository when installing it,
144     * the user can leave the password field blank. */
145    public FedoraGS3Connection()
146        throws ParserConfigurationException, MalformedURLException,
147            CancelledException, ConnectException, RemoteException,
148            SSLHandshakeException, Exception
149    {
150        super();
151        // super() will call setInitialisationProperties(properties)
152        // And that will try to instantiate the GSearchConnection.
153    }
154   
155    /** Single-argument constructor which is the same as that of superclass
156     * FedoraConnection: it takes the name of the properties file where
157     * connection initialisation values may already be provided and then
158     * displays a small dialog requesting input for the host, port,
159     * administrative password and username of the fedora server showing
160     * the values in the properties file as default. If the necessary
161     * initialisation are not present in the file, the corresponding fields
162     * in the dialog will be blank.
163     * If no password was set on the fedora repository when installing it,
164     * the user can leave the password field blank. */
165    public FedoraGS3Connection(File propertiesFilename)
166        throws ParserConfigurationException, MalformedURLException,
167            CancelledException, ConnectException, RemoteException,
168            SSLHandshakeException, Exception
169    {
170        super(propertiesFilename);
171        // super() will call setInitialisationProperties(properties)
172        // And that will try to instantiate the GSearchConnection.
173    }
174   
175    /** The superclass constructor calls this method passing any preset 
176     * properties loaded from a propertiesFile. This method is overridden 
177     * here in order to instantiate the gSearchConnection based on the 
178     * - gSearchWSDLSuffix that will be appended to the fedora base url.
179     * (If one was not provided in the properties file, gSearchWSDLURL defaults
180     * to something of the form
181     * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
182     * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
183     * "gsearch/services/FgsOperations?wsdl".
184     * - name of the index into which the GS3 documents have been indexed
185     * and which FedoraGenericSearch should use to perform searches. If none is
186     * given in the properties file, then the index name defaults to "FgsIndex"
187     * (no longer BasicIndex  or FedoraIndex).
188     * @param properties is the Properties Map loaded from a properties file
189     * (if there was any) which specifies such things as host and port of the
190     * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
191     * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
192     * to whatever the final value of this.gSearchWSDLURL' suffix is, and
193     * "gsearch.indexName" will be set to to whatever the final value of
194     * this.gSearchIndexName is.
195    */
196    protected void setInitialisationProperties(Properties properties)
197        throws ParserConfigurationException, MalformedURLException,
198        CancelledException, ConnectException, RemoteException,
199        SSLHandshakeException, Exception
200    {
201        super.setInitialisationProperties(properties);
202        // gsearchWSDL URL suffix, if not specified, defaults to
203        // "fedoragsearch/services/FgsOperations?wsdl" which is
204        // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
205        this.gSearchWSDLSuffix = properties.getProperty(
206            "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
207        this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
208        // Set the property to whatever this.gSearchWSDLURL is now,
209        // so that it will be written out to the properties file again
210        properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
211       
212        // Similarly for the name of the index FedoraGenericSearch should use
213        // when performing searches for GS3 docs stored in Fedora's repository.
214        this.gSearchIndexName = properties.getProperty(
215                "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
216        properties.setProperty("gsearch.indexName", this.gSearchIndexName);
217        // Create a connection to FedoraGSearch's web services:
218        initSearchFunctionality();
219    }
220   
221    /** Overridden init method to work with the 5 argument constructor, so that we can
222     * bypass using setInitialisationProperties() which works with a Properties map.
223    */
224    protected void init(String protocol, String host, String port,
225            final String fedoraServerUsername, final String fedoraServerPassword)
226        throws ParserConfigurationException, MalformedURLException,
227            AuthenticationFailedException, RemoteException, Exception
228    {
229        super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
230        this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
231        this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
232        this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
233       
234        // Now need to set username and password for accessing WSDL (after GSearch 2.2)
235        // http://stackoverflow.com/questions/3037221/401-error-when-consuming-a-web-service-with-http-basic-authentication-using-cxf
236       
237        // The java.net.Authenticator can be used to send user credentials when needed.
238        Authenticator.setDefault(new Authenticator() {
239            @Override
240            protected PasswordAuthentication getPasswordAuthentication() {
241            return new PasswordAuthentication(
242                              fedoraServerUsername,
243                              fedoraServerPassword.toCharArray());
244            }
245        });
246       
247        initSearchFunctionality();
248    }   
249
250
251    /** Init method that instantiates a GSearchConnection object used
252     * to work with the separate FedoraGSearch web services.
253     * The url of the WSDL for FedoraGSearch's web services is worked out
254     * from the baseURL of the Fedora server.
255    */
256    protected void initSearchFunctionality()
257    {
258        try {
259            this.fedoraGSearch = null;
260            this.fedoraGSearch = new GSearchConnection(
261                    gSearchWSDLURL, gSearchIndexName);
262            this.serviceNames = SERVICES;
263        } catch(Exception e){
264            LOG.error("Cannot connect to FedoraGSearch's web services at "
265                  + gSearchWSDLURL + "\nQuery services will not be available.", e);
266            // Exception, e, as parameter prints the stacktrace of the exception to the log
267
268            // If an exception occurs, something has gone wrong when
269            // trying to connect to FedoraGSearch's web services. This
270            // means, we can't offer query services, as that's provided
271            // by FedoraGSearch
272            serviceNames = null;
273            int countOfNonQueryServices = 0;
274            for(int i = 0; i < SERVICES.length; i++) {
275                // do not count query services
276                if(!SERVICES[i].toLowerCase().contains("query")) {
277                    countOfNonQueryServices++;
278                }
279            }
280            // Services now supported are everything except Query services
281            serviceNames = new String[countOfNonQueryServices];
282            int j = 0;
283            for(int i = 0; i < SERVICES.length; i++) {
284                if(!SERVICES[i].toLowerCase().contains("query")) {
285                    serviceNames[j] = SERVICES[i];
286                    j++; // valid serviceName, so increment serviceName counter
287                }
288                 
289            }
290        }
291    }
292   
293    /** @return the gSearchWSDLURL, the url of the WSDL for the
294     * FedoraGSearch web services */
295    public String getGSearchWSDLURL() { return gSearchWSDLURL; }
296   
297    /** Sets the member variable gSearchWSDLURL that specify the location of
298     * the WSDL file of FedoraGSearch's web services. Then it attempts
299     * to instantiate a connection to those web services.
300     * @param url is the new url of the GSearch web services WSDL file */
301    public void setGSearchWSDLURL(String url) {
302        this.gSearchWSDLURL = url;
303        initSearchFunctionality();
304    }
305   
306    /** @return the gSearchIndexName, the name of the index Fedora Generic
307     * Search will search in (where GS3 docs have been indexed into). */
308    public String getGSearchIndexName() { return gSearchIndexName; }
309   
310    /** Sets the member variable gSearchIndexName that specifies the name
311     * of the index containing indexed GS3 documents. Then it attempts
312     * to instantiate a connection to the Fedora GSearch web services using
313     * this changed value for indexName.
314     * @param indexName is the new name of the index containing indexed GS3
315     * docs that GSearch should search in. */
316    public void setGSearchIndexName(String indexName) {
317        this.gSearchIndexName = indexName;
318        initSearchFunctionality();
319    }
320   
321    /** @return the array of the services actually supported by FedoraGS3 */
322    protected String[] getServiceNames() { return this.serviceNames;}
323   
324    /**
325     * For finding out if the sectionNumber is given as part of the docID.
326     * @param docID is the String that contains the docPID and may also
327     * contain the section number.
328     * @return true if the document identifier docID contains a section-
329     * number, and false if it consists solely of the docPID.
330     * That is, true is returned if
331     * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
332     * and false is returned if
333     * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
334     * */
335    protected boolean containsSectionNumber(String docID) {
336        // if there are two hyphens in the docID, then there are sections
337        // (and the section number is appended at end of docID)
338        // docID = "greenstone:colName-<docPID>-<sectionNum>"
339        return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
340    }
341   
342    /** This method will extract the docPID from docID and return it.
343     * (If a sectionNumber is suffixed to the docID, the docPID which is
344     * the prefix is returned; otherwise the docID is the docPID and is
345     * returned)
346     * @param docID is the String that contains the docPID and may also
347     * contain the section number.
348     * @return only the docPID portion of the docID.
349    */
350    protected String getDocPIDFromDocID(String docID) {
351        if(containsSectionNumber(docID))
352            return docID.substring(0, docID.lastIndexOf(HYPHEN));
353        // else (if there's no sectionNumber), docID is the docPID
354        return docID;
355    }
356   
357    /** This method will return the section Number, if there's any
358     * suffixed to the docID. Otherwise it will return the empty string
359     * @param docID is the String that contains the docPID and may also
360     * contain the section number.
361     * @return only the sectionID portion of the docID - if any, else "".
362    */
363    protected String getSectionIDFromDocID(String docID) {
364        if(containsSectionNumber(docID))
365            return docID.substring(
366                    docID.lastIndexOf(HYPHEN)+1, docID.length());
367        return "";
368    }
369   
370    /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
371     * response message that gives the metadata for each collection identified
372     * @param collIDs is an array of fedora pids identifying collections in the
373     * fedora repository
374     * @return a GS3 DocumentMetadataRetrieve response message containing the
375     * EX metadata for all the requested collections */
376    public String getCollectionMetadata(String[] collIDs) {
377    return getMetadata(collIDs, new String[] {"all"});
378    }
379   
380    /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
381     * response message is returned containing the metadata for each document.
382     * @param docIDs is an array of document identifiers (docID can either be
383     * &lt;pid&gt;s items (documents) in the fedora repository, or
384     * "&lt;pid&gt;-sectionNumber".
385     * @return a GS3 DocumentMetadataRetrieve response message containing the
386     * EX, DC, DLS metadata for all the requested documents
387     * @param metadata is the list of metadata elements to be retrieved for each doc */
388    public String getDocumentMetadata(String[] docIDs, String[] metadata) {
389        return getMetadata(docIDs, metadata);
390    }
391   
392    /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
393     * response message that gives the metadata for the collection identified
394     * @param collID is a fedora pid identifying a collection in its repository
395     * @return a GS3 DocumentMetadataRetrieve response message containing the
396     * EX metadata for the requested collection
397     * @param metadata is the list of metadata elements to be retrieved for each doc */
398    public String getCollectionMetadata(String collID) {
399        return getMetadata(new String[] {collID}, new String[] {"all"});
400    }
401   
402    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
403     * response message containing the metadata for the document.
404     * @param docID is a document identifier (docID can either be a &lt;pid&gt;
405     * of an item (document) in the fedora repository, or it can be
406     * "&lt;pid&gt;-sectionNumber".
407     * @return a GS3 DocumentMetadataRetrieve response message containing the
408     * EX, DC, DLS metadata for the requested document */
409    public String getDocumentMetadata(String docID, String[] metadata)  {
410    return getMetadata(new String[] {docID}, metadata);
411    }
412
413    /** @return a greenstone DocumentMetadataRetrieve response for the
414     * documents or collections indicated by the docIDsOrCollIDs.
415     * @param docIDsOrCollIDs is an array of identifiers which may be either the
416     * fedora pids for collections, or otherwise may be a document identifier.
417     * In the last case, the document ID may consist of either
418     * "documentPID-sectionNumber" or may just be just fedora documentPID
419     * @param metadata is the list of metadata elements to be retrieved for each doc */
420    public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
421    {
422        Document doc = builder.newDocument();
423        FedoraGS3RunException ex = null;
424       
425        Element docNodeList = doc.createElement(
426                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
427       
428        try{
429            for(int i = 0; i < docIDsOrCollIDs.length; i++) {
430                // create the <documentNode> containing the metadata
431                // for each document docID
432                Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
433                docNodeList.appendChild(docNode);
434            }
435        } catch(Exception e) {
436            ex = new FedoraGS3RunException(e);
437            ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
438        }
439       
440        Element responseMsg = createResponseMessage(doc, docNodeList, ex, 
441                GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
442        try{
443            return FedoraCommons.elementToString(responseMsg);
444        } catch(TransformerException e) {
445            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
446                + " " + e;
447        }
448    }
449   
450    /** Method that takes a new DOM document, as well as an identifier of either
451     * a collection or document (which may be a fedora pid for the collection
452     * or document, or may be the documentPid-sectionNumber for a document) and
453     * returns a documentNode element for it:
454     * &lt;documentNode&gt;&lt;metadataList&gt;
455     * &lt;metadata name=""&gt;value&lt;/metadata&gt;
456     * ...
457     * &lt;/metadataList&gt;&lt;/documentNode&gt;
458     * @return documentNode containing the metadata for the collection or
459     * document given by parameter ID
460     * @param id denotes a collection pid, a document pid or a docID of the 
461     * form "documentpid-sectionNumber"
462     * @param metadata is the list of metadata elements to be retrieved for each doc */
463    protected Element getMetadata(Document doc, String id, String[] metadata)
464        throws RemoteException, UnsupportedEncodingException,
465            SAXException, IOException
466    {
467        // We're going to create the documentNode nested inside the following
468        // documentNodeList:
469            // <documentNodeList>
470            // <documentNode nodeID=""><metadataList>
471            // <metadata name="">value</metadata>
472            // </metadataList></documentNode>
473            // <documentNode>...</documentNode>
474            // </documentNodeList>
475            // <documentNodeList>
476       
477        // <documentNode nodeID="docID"> - the docNode on which a metadata
478        // retrieve is being performed
479        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
480        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
481        attribute.setValue(id);
482        docNode.setAttributeNode(attribute);
483       
484        // <metadataList>
485        Element metadataList = doc.createElement(
486                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
487       
488        String ex = "";
489        String dc = "";
490        String dls = "";
491        if(id.endsWith(_COLLECTION)) { // docID refers to a collection
492            // Obtain the "EX" datastream (extracted metadata) for the collection
493            ex = this.getEX(id);
494        }
495        else { // docID refers to a document
496            // work out the document's fedora PID and section ID, and then
497            // obtain the EX (extracted metadata) and DC datastreams for the doc
498           
499            // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
500            // is the same as for pid="greenstone:<colname>-docPID"
501            // That is, <Section id="1"> refers to the toplevel document docPID
502            // If requested for top-level document, there may also be DLS meta
503            String sectionID = getSectionIDFromDocID(id);
504            String docPID = getDocPIDFromDocID(id);
505            if(sectionID.equals("") || sectionID.equals("1")) {
506                // metadata of toplevel document is requested
507                ex = this.getEX(docPID); // slightly faster than doing
508                        //getSectionEXMetadata(docID, "1")
509                dc = this.getDC(docPID);
510                dls = this.getDLS(docPID);
511            }
512            else {
513                ex = getSectionEXMetadata(docPID, sectionID);
514                dc = getSectionDCMetadata(docPID, sectionID);
515            }
516        }
517       
518        String metafields = "";
519        for(int i = 0; i < metadata.length; i++) {
520            metafields = metafields + metadata[i] + "|";           
521        }
522
523        // Adding in metadata sets in alphabetical order
524        // DC metadata for a top-level document is different from EX, DLS:
525        // only the element's namespace prefix is "dc", the rest of a tagname
526        // is unknown.
527        if(!dc.equals("")) {
528            addMetadataWithNamespacedTagNames(doc, metadataList,
529                              dc, DC, metafields);
530        }
531       
532        // Check if we were supposed to process dls and dc metadata
533        // as well. We only ever do this for top-level documents,
534        // in which case, dls and dc will be non-empty strings
535        if(!dls.equals("")) {
536            addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
537        }
538       
539        // we definitely have an EX metadatastream for each
540        // collection object, top-level document object,
541        // and document section item
542        addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
543       
544        // now the metadataList has been built up
545        docNode.appendChild(metadataList);
546       
547        return docNode; // return <documentNode> containing the metadata
548    }
549   
550    /** This method retrieves all the metadata elements in the metaDataStream
551     * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
552     * metadataSetNS is the namespace of each tag, and creates a new element of
553     * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
554     * each. Each of these are then appended to the metadataList parameter.
555     * @param doc is the Document object using which the new metadata Elements
556     * are to be constructed
557     * @param metadataList is the &lt;metadataList&gt; Element to which the new
558     * metadata Elements are to be appended as children.
559     * @param metaDatastream the metadata datastream in string form (e.g. the
560     * Dublin Core metadata stored in the Fedora repository).
561     * @param metadataSet is the constant datastream identifier, e.g. "DC".
562     * At present this method applies to the DC metadata and any others like it
563     * where each tagname is different except for the constant dc: namespace.
564     * @param metafields is a | separated string containing the metadatafields to
565     * extract or "all" if all fields are requested
566    */
567    protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
568                             String metaDatastream, String metadataSet, String metafields)
569        throws SAXException, IOException
570    {
571        Document src = builder.parse(
572                new InputSource(new StringReader(metaDatastream)));
573       
574        // The following doesn't work for some reason: to retrieve all elements
575        // whose namespace prefix starts with "dc", we pass "*" for localName
576        //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
577       
578        // Longer way: get the children of the root document
579        NodeList children = src.getDocumentElement().getChildNodes();
580       
581        for(int i = 0; i < children.getLength(); i++) {
582            String nodeName = children.item(i).getNodeName();
583            // check that the nodename starts with the metadataSet ("dc") namespace,
584            // which simultaneously ensures that the node's an element:
585            if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
586                // need to have a period for Greenstone instead of Fedora's colon
587                nodeName = nodeName.replace(COLON, PERIOD);
588                if(metadataSet.equals(DC)) { // dc:title -> dc.Title
589                nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
590                    + nodeName.substring(4);
591                }
592
593                // get the requested metadata fields
594                if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
595                Element metatag = (Element)children.item(i);
596                String value = FedoraCommons.getValue(metatag);
597                // <dc:tagname>value</dc:tagname>
598                // we're going to put this in our metadata element as
599                // <metadata name="dc.Tagname">value</metadata>
600               
601                // create metadata of (name, value) pairs in target DOM (doc)
602                Element metadata = doc.createElement(GSXML.METADATA_ELEM);
603                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
604               
605                attribute.setValue(nodeName);
606                metadata.setAttributeNode(attribute);
607                Text content = doc.createTextNode(value);
608                metadata.appendChild(content);
609                metadataList.appendChild(metadata);
610                }
611            }
612        }       
613    }
614   
615    /** This method retrieves all the metadata elements in the metaDataStream
616     * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt; 
617     * where "namespace" is the namespace prefix of each tag, and metadataName 
618     * is the name of the metadata (like author, title). For each element
619     * it creates a corresponding new element of the form
620     * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
621     * Each of these are then appended to the metadataList parameter.
622     * @param doc is the Document object using which the new metadata Elements
623     * are to be constructed
624     * @param metadataList is the &lt;metadataList&gt; Element to which the new
625     * metadata Elements are to be appended as children.
626     * @param metaDatastream the metadata datastream in string form (e.g. the
627     * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
628     * repository).
629     * @param metadataSet is the constant datastream identifier,
630     * e.g. "DLS" or "EX".
631     * At present this method applies to the DLS and EX metadata as they have
632     * constant tagnames throughout.
633     * @param metafields is a | separated string containing the metadatafields to
634     * extract or "all" if all fields are requested.
635    */
636    protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
637                           String metaDatastream, String metadataSet, String metafields)
638        throws SAXException, IOException
639    {
640        // Namespace prefix can be "ex:" or "dls:"
641        String namespacePrefix = "";
642        if(!metadataSet.equals(EX)) {
643            // need to have a period for Greenstone instead of Fedora's colon
644            namespacePrefix = metadataSet.toLowerCase() + PERIOD;
645        }
646               
647        Document src = builder.parse(
648                new InputSource(new StringReader(metaDatastream)));
649        NodeList metaTags = src.getElementsByTagName(
650                metadataSet.toLowerCase()+COLON+METADATA);
651                // Looking for tagnames: <ex:metadata> or <dls:metadata>
652           
653        for(int i = 0; i < metaTags.getLength(); i++) {
654            Element metatag = (Element)metaTags.item(i);
655           
656            // extract the metadata of (name, value) pairs from src DOM
657            // look for <metadata name="name">value</metadata>
658            String name = metatag.hasAttribute(NAME) ?
659                    metatag.getAttribute(NAME) : "";
660            // sometimes, there are several metadata for the same name, in this
661            // case, look for a qualifier and append its value to the name to
662            // distinguish it uniquely:
663            if(metatag.hasAttribute(QUALIFIER)) {
664                name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
665            }
666            name = namespacePrefix + name; // prefix with namespace, if any
667            if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
668                String value = FedoraCommons.getValue(metatag);
669               
670                // create metadata of (name, value) pairs in target DOM (doc)
671                Element metadata = doc.createElement(GSXML.METADATA_ELEM);
672                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
673                attribute.setValue(name);
674                metadata.setAttributeNode(attribute);
675                Text content = doc.createTextNode(value);
676                metadata.appendChild(content);
677               
678                metadataList.appendChild(metadata);
679            }
680        }
681    }
682   
683    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
684     * response message containing ONLY the Title metadata for the document.
685     * @param docID is a document identifier (docID can either be a &lt;pid&gt;
686     * of an item (document) in the fedora repository, or it can be
687     * "&lt;pid&gt;-sectionNumber".
688     * @return a GS3 DocumentMetadataRetrieve response message containing the
689     * Title metadata for the requested document */
690    public String getTitleMetadata(String docID) {
691        return getTitleMetadata(new String[] { docID });
692    }
693   
694    /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
695     * response message containing ONLY the Title metadata for the documents.
696     * @param docIDs is a list of document identifiers (where docID can either be
697     * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
698     * "&lt;pid&gt;-sectionNumber".
699     * @return a GS3 DocumentMetadataRetrieve response message containing the
700     * Title metadata for all the requested documents */
701    public String getTitleMetadata(String[] docIDs) {
702        // Must create message of the following form:
703        // <documentNodeList><documentNode nodeID="docID">
704        // <metadataList><metadata name="Title">sometitle</metadata>
705        // </metadataList></documentNode>
706       
707        Document doc = builder.newDocument();
708        FedoraGS3RunException ex = null;
709       
710        Element docNodeList = doc.createElement(
711                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
712        try{
713            for(int i = 0; i < docIDs.length; i++) {
714                Element docNode = getTitleMetadata(doc, docIDs[i]);
715                docNodeList.appendChild(docNode);
716            }
717        }catch(Exception e) {
718            ex = new FedoraGS3RunException(e);
719            //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
720            ex.setSpecifics("EX metadata datastream");
721        }
722       
723        Element responseMsg = createResponseMessage(doc, docNodeList, ex, 
724                GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
725        try{
726            return FedoraCommons.elementToString(responseMsg);
727        } catch(TransformerException e) {
728            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
729                + " " + e;
730        }
731    }
732   
733    /** Method that takes a new DOM document, as well as an identifier of either
734     * a document or document section and returns a documentNode element containing
735     * the title metadata for it:
736     * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
737     * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
738     * &lt;/metadataList&gt;&lt;/documentNode&gt;
739     * @return documentNode containing the metadata for the collection or
740     * document given by parameter ID
741     * @param docID denotes the id of a document or a document section, so id
742     * is either a document-pid or it's of the form documentpid-sectionNumber */
743    protected Element getTitleMetadata(Document doc, String docID)
744        throws RemoteException, UnsupportedEncodingException,
745            SAXException, IOException
746    {
747        // Returns a docNode element of the following form:
748        // <documentNode nodeID="docID">
749        // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
750        // </documentNode>
751       
752        // <documentNode nodeID="docID">
753        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
754        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
755        attribute.setValue(docID);
756        docNode.setAttributeNode(attribute);
757       
758        // <metadataList>
759        Element metaList = doc.createElement(
760                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
761        // <metadata name="Title">
762        Element metadata = doc.createElement(GSXML.METADATA_ELEM);
763        // if we connect it all up (append children), we can immediately add
764        // the name attribute into the metadata element:
765        metaList.appendChild(metadata);
766        docNode.appendChild(metaList);
767        metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
768       
769        String title = "";
770        String sectionID = getSectionIDFromDocID(docID);
771        String docPID = getDocPIDFromDocID(docID);
772   
773        // check if title of toplevel document is requested
774        if(sectionID.equals(""))
775            title = this.getDocTitle(docPID);
776        else { // title of document section
777            title = this.getSectionTitle(docPID, sectionID);
778        }
779       
780        metadata.appendChild(doc.createTextNode(title));
781       
782        return docNode;
783    }
784   
785    /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
786     * containing the requested portion of the document structure of the documents
787     * indicated by docIDs:
788     * @param docID is the document identifier of the document whose hierarchical
789     * structure is requested. The name of the collection is already included in the
790     * docID for a Fedora DL.
791     * @param structure - strings specifying the required structure of the document.
792     * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
793     * @param info - strings specifying the required structural info of the document.
794     * It can be any combination of: siblingPosition, numSiblings, numChildren.
795    */
796    public String getDocumentStructure(String docID, String[] structure, String[] info) {
797    return getStructure(new String[]{docID}, structure, info);
798    }
799   
800
801     /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
802     * containing the requested portion of the document structure of the documents
803     * indicated by docIDs:
804     * @param docIDs is an array of document identifiers of documents whose
805     * hierarchical structures are requested. The name of the collection is already
806     * included in the docID for a Fedora DL.
807     * @param structure - strings specifying the required structure of each document.
808     * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
809     * @param info - strings specifying the required structural info of each document.
810     * It can be any combination of: siblingPosition, numSiblings, numChildren.
811    */
812    public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
813    return getStructure(docIDs, structure, info);
814    }
815
816        /**
817     * Returns a greenstone3 DocumentStructureRetrieve XML response message
818     * containing the document structures for the given docIDs.
819     * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML, 
820     * greenstone formatted XML is returned. The requested section of the table
821     * of contents (TOC) for a document is converted into the greenstone3 xml 
822     * format that is returned upon DocumentStructureRetrieve requests.
823     * @param docIDs the documentIDs for which the section's structure is returned;
824     * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
825     * @param structure - the structure of the sections to return. Can be any combination of:
826     * ancestors, parent, siblings, children, descendants, entire.
827     * @param infos - strings containing any combination of the values: numChildren, numSiblings,
828     * siblingPosition. The requested info gets added as attributes to the returned root element.
829     * @return a greenstone3 DocumentStructureRetrieve XML response message in
830     * String format with the structure of the docIDs requested.
831    */
832    protected String getStructure(String[] docIDs, String[] structure, String[] infos)
833    {
834        Document doc = builder.newDocument();
835        FedoraGS3RunException ex = null;
836        // <documentNodeList>
837        Element docNodeList = doc.createElement(
838                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
839       
840        try{
841            // append the <documentNodes> for the docIDs
842            // to the docNodeList
843            //getStructureElement(docNodeList, docIDs, levels);
844            getStructureElement(docNodeList, docIDs, structure, infos);
845        } catch(Exception e) {
846            ex = new FedoraGS3RunException(e);
847            ex.setSpecifics("(requested portion of) TOC datastream");
848        }
849        // insert our <documentNodeList> into a GS3 response message
850        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
851                GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
852        try{
853            return FedoraCommons.elementToString(responseMsg);
854        } catch(TransformerException e) {
855            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
856                + " " + e;
857        }
858    }
859
860
861    /** Given a &lt;documentNodeList&gt; portion of a greenstone3
862     * DocumentStructureRetrieve XML response message, this method will populate
863     * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
864     * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
865     * the doc structures are appended.
866     * @param docIDs the documentIDs for which the section's structure is returned;
867     * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
868     * @param structures - the structure of the sections to return. Can be any combination of:
869     * ancestors, parent, siblings, children, descendants, entire.
870     * @param infos - a string containing any combination of the values: numChildren, numSiblings,
871     * siblingPosition. The requested info gets added as attributes to the returned root element.
872    */
873    protected void getStructureElement(Element docNodeList, String[] docIDs,
874                       String[] structures, String[] infos)
875        throws RemoteException, UnsupportedEncodingException, SAXException,
876            IOException
877    {
878        // Make one string out of requested structure components, and one string from info components
879        String structure = "";
880        String info = "";
881        for(int i = 0; i < structures.length; i++) {
882        structure = structure + structures[i] + "|";
883        }
884        for(int i = 0; i < infos.length; i++) {
885        info = info + infos[i] + "|";
886        }
887       
888        // process each docID
889        for(int i = 0; i < docIDs.length; i++) {
890        // work out the document's fedora PID and section ID
891        String sectionID = getSectionIDFromDocID(docIDs[i]);
892        String docPID = getDocPIDFromDocID(docIDs[i]);
893        if(sectionID.equals("")) {
894            sectionID = "1";
895        }
896
897        // get the required section, along with children or descendants
898        Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
899        Document doc = docNodeList.getOwnerDocument();
900       
901        // copy-and-convert that structure into a structure format for GS3
902        Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
903       
904        if(!info.equals("")) {
905            // <nodeStructureInfo>
906            //    <info name="" value="" />
907            //    <info name="" value="" />
908            //    ...
909            // </nodeStructureInfo>
910            Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
911            Element root = srcDocElement.getOwnerDocument().getDocumentElement();
912           
913            if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
914            String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
915            Element infoEl = doc.createElement(GSXML.INFO_ATT);
916            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
917            infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
918            nodeStructureInfo.appendChild(infoEl);
919            }
920           
921            if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
922            String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
923            Element infoEl = doc.createElement(GSXML.INFO_ATT);
924            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
925            infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
926            nodeStructureInfo.appendChild(infoEl);
927            }
928           
929            if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
930            String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
931            Element infoEl = doc.createElement(GSXML.INFO_ATT);     
932            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
933            infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
934            nodeStructureInfo.appendChild(infoEl);
935            }
936
937            if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
938            String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
939            Element infoEl = doc.createElement(GSXML.INFO_ATT);     
940            infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
941            infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
942            nodeStructureInfo.appendChild(infoEl);
943            }       
944
945            docNode.appendChild(nodeStructureInfo);
946        }
947       
948        // add it to our list of documentNodes
949        docNodeList.appendChild(docNode);
950        }
951    }
952
953       
954    /**
955     * Takes the portion of the XML document outlining the structure of the
956     * document (section)--in the format this is stored in Fedora--and returns
957     * Greenstone 3 DOM XML format for outlining document structure. 
958     * @return a &lt;documentNode&gt; element that contains a greenstone3
959     * DocumentStructureRetrieve XML corresponding to the parameter Element section
960     * (which is in fedora XML), for the document indicated by docID.
961     * @param requestingDocID is the identifier of the document for which the
962     * structure was requested. It's this document's children or descendants that
963     * will be returned. Note that this is not always the same as (clear from) 
964     * parameter docID.
965     * @param docID is the documentID for which the section's structure is
966     * returned where docID = "docPID-sectionNumber".
967     * @param section - the fedora section XML that is being mirrored in
968     * greenstone3 format.
969    */
970    protected Element getStructure(Document doc, String requestingDocID,
971            String docID, Element section)
972    {
973        // we want to mirror the section's DOM (given in fedora XML) in
974        // greenstone3's XML for a DocumentStructureRetrieve response.
975       
976        // <documentNode nodeID="docID"> - the docNode on which a structure retrieve 
977        // is being performed
978        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
979        Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
980        attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
981        docNode.setAttributeNode(attribute);       
982       
983        // <nodeStructure>
984        Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
985       
986        // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
987        Element rootNode = createDocNodeFromSubsection(doc, section, docID);
988       
989        // fills in the subtree of the rootNode in our nodeStructure element
990        createDocStructure(doc, section, rootNode, docID);
991            //where section represents the root section
992       
993        nodeStructure.appendChild(rootNode);
994        docNode.appendChild(nodeStructure);
995        return docNode;     
996    }
997
998
999    /** Recursive method that creates a documentStructure mirroring parameter
1000     * section, starting from parameter parent down to all descendants
1001     * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
1002     * for the docPID whose substructure is to be mirrored
1003     * @param parent is the XML documentNode in the greenstone repository whose
1004     * descendants created by this method will correspond to the descendants of
1005     * parameter section. 
1006     * @param doc is the document containing the parent;
1007     * @param docPID is the prefix of all nodeIDs in the parent's structure
1008    */
1009    protected void createDocStructure(
1010            Document doc, Element section, Element parent, String docPID)
1011    {
1012        // get the section's children (if any)
1013        NodeList children = section.getChildNodes();
1014        for(int i = 0; i < children.getLength(); i++) {
1015            Node n = children.item(i);
1016           
1017            if(n.getNodeName().equals(SECTION_ELEMENT)) {
1018                //then we know it's an element AND that its tagname is "Section"
1019                Element subsection = (Element)n;
1020                Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1021                parent.appendChild(child);
1022               
1023                // recursion call on newly found child-element and subsection
1024                createDocStructure(doc, subsection, child, docPID);
1025            }
1026        }
1027    }
1028   
1029    /** Given a particular subsection element, this method creates a
1030     * Greenstone3 DocumentNode element that mirrors it.
1031     * @param doc is the document that will contain the created DocumentNode
1032     * @param docID is the prefix of all nodeIDs in the parent's structure
1033     * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1034     * TOC for the docPID which will be mirrored in the greenstone XML
1035     * documentNode that will be returned.
1036     * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1037     * &lt;Section&gt; element passed as parameter subSection. */
1038    protected Element createDocNodeFromSubsection(
1039            Document doc, Element subSection, String docID)
1040    {
1041        Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1042        Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1043        docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1044        docNode.setAttributeNode(docType);
1045       
1046        Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1047        String sectionID = subSection.hasAttribute(ID) ?
1048                subSection.getAttribute(ID) : "";
1049        if(sectionID.equals("1")
1050           && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1051            // reset the attribute without the section number (just "docID" may be important for democlient?)
1052            nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1053        } else {
1054            nodeID.setValue(docID + HYPHEN + sectionID);
1055        }
1056        //nodeID.setValue(docID + HYPHEN + sectionID);
1057        docNode.setAttributeNode(nodeID);
1058       
1059        Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1060        if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1061            nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1062        }
1063        docNode.setAttributeNode(nodeType);
1064        return docNode;
1065    }
1066   
1067   
1068    /** Given an identifier that is either a docPID or a concatenation of
1069     * docPID+sectionID, this method works out the fedora assigned docPID and
1070     * sectionID and then calls getContentBody(docPID, sectionID) with those.
1071     * @param docID is expected to be of the form
1072     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1073     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1074     * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1075     * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1076     * is returned! */
1077    public String getContent(String docID) {
1078        return this.getContent(new String[]{docID});
1079    }
1080   
1081    /** Given an identifier that is a concatenation of docID+sectionID, this
1082     * method works out the fedora assigned docPID and sectionID and then calls
1083     * getContentBody(docPID, sectionID) with those.
1084     * @param docIDs is an array of document identifiers of the form
1085     * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1086     * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1087     * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1088    public String getContent(String[] docIDs) {
1089        Document doc = builder.newDocument();
1090        FedoraGS3RunException ex = null;
1091       
1092        //<documentNodeList>
1093        Element docNodeList = doc.createElement(
1094                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1095       
1096        try{
1097            for(int i = 0; i < docIDs.length; i++) {
1098                // get the sectionID and docPID from the docID
1099                String sectionID = this.removePrefix(
1100                        getSectionIDFromDocID(docIDs[i]), SECTION);
1101                String docPID = getDocPIDFromDocID(docIDs[i]);
1102                if(sectionID.equals("")) // if no section is specified, get
1103                    sectionID = "1"; // get the content for Section id="1"
1104                           
1105                // Get the contents for the requested section of document docPID
1106                String sectionContent = this.getContentBody(docPID, sectionID);
1107               
1108                // set the nodeID attribute
1109                Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1110                Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1111               
1112                nodeId.setValue(docIDs[i]); // just set the docID which will contain
1113                            // the docPID (and sectionID if already present)
1114               
1115                docNode.setAttributeNode(nodeId);
1116                // set the text content to what was retrieved
1117                Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1118                Text textNode = doc.createTextNode(sectionContent.trim());
1119               
1120                nodeContent.appendChild(textNode);
1121                docNode.appendChild(nodeContent);
1122                //add the documentNode to the docNodeList
1123                docNodeList.appendChild(docNode);
1124            }
1125        } catch(Exception e) {
1126            ex = new FedoraGS3RunException(e);
1127            ex.setSpecifics("requested doc Section datastream");
1128        }
1129        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1130                GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1131        try{
1132            return FedoraCommons.elementToString(responseMsg);
1133        } catch(TransformerException e) {
1134            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1135                + " " + e;
1136        }
1137    }
1138       
1139    /** Gets the contents of a textNode from a section.
1140     * @return the text content of a section.
1141     * @param docPID the pid of the document from which a section's text is to
1142     * be retrieved.
1143     * @param sectionID is the section identifier of the document denoted by
1144     * docPID whose text is to be returned.
1145    */
1146    protected String getContentBody(String docPID, String sectionID)
1147        throws RemoteException, UnsupportedEncodingException,
1148            SAXException, IOException
1149    {   
1150        String section = this.getSection(docPID, sectionID);
1151       
1152        // the content is nested inside a <Section> element,
1153        // we extract it from there:
1154        InputSource source = new InputSource(new StringReader(section));
1155        Document doc = builder.parse(source);
1156           
1157        // The document Element is the <Section> we want.
1158        // Get its text contents:
1159        section = FedoraCommons.getValue(doc.getDocumentElement());
1160       
1161        // we are going to remove all occurrences of "_httpdocimg_/"
1162        // that precede associated filenames, because that's a GS3
1163        // defined macro for resolving relative urls. It won't help
1164        // with documents stored in fedora.
1165        section = section.replaceAll(GS3FilePathMacro+"/", "");
1166        return section;
1167    }
1168       
1169    /** Here we create the greenstone's response message element:
1170     * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1171     * @return a greenstone response-message element.
1172     * @param doc - the Document object which should me used to create the
1173     * &lt;message&gt; and &lt;response&gt; elements
1174     * @param content - the element that is to be nested inside &lt;response&gt;
1175     * @param ex - any exception that occurred when trying to create
1176     * the content parameter
1177     * @param responseType - the value for the type attribute of &lt;response&gt;,
1178     * such as "describe", "retrieve", "browse", "query"...
1179     * @param originator - indiates the collectionName or service (like
1180     * DocumentContentRetrieve) from where this response message originates
1181    */
1182    protected Element createResponseMessage(Document doc, Element content,
1183            Exception ex, String responseType, String originator)
1184    {
1185        Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1186        // from = "FedoraGS3"
1187        Attr attribute = doc.createAttribute(GSXML.FROM_ATT);       
1188        attribute.setValue(originator);
1189        response.setAttributeNode(attribute);
1190       
1191        // type = "describe" or "process" - whatever's given in requestType:
1192        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1193        attribute.setValue(responseType);
1194        response.setAttributeNode(attribute);
1195       
1196        if(content != null)
1197            response.appendChild(content);
1198       
1199        // we'll create an error element for RemoteExceptions (web service problems) 
1200        // and UnsupportedEncodingExceptions and
1201        if(ex != null) {
1202            Element error = doc.createElement(GSXML.ERROR_ELEM);
1203            error.appendChild(doc.createTextNode(ex.getMessage()));
1204            // now append the error to the <response> element (after
1205            // the content element whatever that was)
1206            response.appendChild(error);
1207        }
1208       
1209        Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1210        message.appendChild(response);
1211        doc.appendChild(message);
1212        return message;
1213    }
1214   
1215    /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1216     * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1217     * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1218     * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1219     * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1220     * @param doc - the Document object which should me used to create the
1221     * &lt;serviceList&gt; element */
1222    protected Element createServiceList(Document doc)
1223    {
1224        Element serviceList = doc.createElement(
1225                GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1226       
1227        for(int i = 0; i < serviceNames.length; i++) {
1228            // create the <service name="serviceName[i]" type="servicetype" />
1229            Element service = doc.createElement(GSXML.SERVICE_ELEM);
1230           
1231            Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1232            attribute.setValue(serviceNames[i]);
1233            service.setAttributeNode(attribute);
1234           
1235            attribute = doc.createAttribute(GSXML.TYPE_ATT);
1236            if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1237                attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1238            else if(serviceNames[i].contains("Query")) // search services
1239                attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1240            else
1241                attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1242            service.setAttributeNode(attribute);
1243           
1244            // add the service element to the serviceList element
1245            // <serviceList><service /></serviceList>
1246            serviceList.appendChild(service);
1247        }
1248        return serviceList;
1249    }
1250   
1251    /** @return a GS3 response message for a describe services request:
1252     * indicating the list of services supported by the Fedora-Greenstone
1253     * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1254     * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1255     * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1256     * serviceNames. */
1257    public String getServiceList()
1258    {
1259        Document doc = builder.newDocument();
1260        Element serviceList = createServiceList(doc);
1261        // make <serviceList> the body of the responseMessage:
1262        // <message><response><serviceList></response></message>
1263        Element responseMsg = createResponseMessage(doc, serviceList, null,
1264                GSXML.REQUEST_TYPE_DESCRIBE, "");
1265        try {
1266            return FedoraCommons.elementToString(responseMsg);
1267        }catch(TransformerException e) {
1268            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1269                + " " + e;
1270        }
1271    }
1272   
1273    /** @return a GS3 describe response message listing the collections and 
1274     * collection-specific metadata stored in the Fedora-Greenstone repository. */
1275    public String getCollectionList()
1276    {
1277        Document doc = builder.newDocument();
1278        FedoraGS3RunException ex = null; // any RemoteException
1279       
1280        // create the <collectionList /> element
1281        Element collectionList = doc.createElement(
1282                GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1283        try{
1284            String[] collectionNames = this.getCollectionNames(
1285                    this.getCollections()); // this line could throw RemoteException
1286            for(int i = 0; i < collectionNames.length; i++) {
1287                // create the <collection name="somename" /> element
1288                Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1289                Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1290                attribute.setValue(collectionNames[i]);
1291                collection.setAttributeNode(attribute);
1292               
1293                // append the <collection> element as child of <collectionList>
1294                collectionList.appendChild(collection);
1295               
1296                //if(collection.hasAttribute(GSXML.NAME_ATT))
1297                    //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1298            }
1299        } catch(RemoteException e) { // if this happens, perhaps it's because it
1300            // can't find Greenstone collections in fedora repository?
1301            ex = new FedoraGS3RunException(e);
1302            ex.setSpecifics(
1303                "greenstone collections in fedora repository");
1304        }
1305       
1306        // make <collectionList> the body of the responseMessage:
1307        // <message><response><collectionList></response></message>
1308        Element responseMsg = createResponseMessage(doc, collectionList, ex,
1309                GSXML.REQUEST_TYPE_DESCRIBE, "");
1310        try{
1311            return FedoraCommons.elementToString(responseMsg);
1312        }catch(TransformerException e) {
1313            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1314                + " " + e;
1315        }
1316    }
1317   
1318    /** @return a GS3 describe response message for a collection in the
1319     * Fedora-Greenstone repository.
1320     * @param collectionName - the name of the collection that is to be described. 
1321     * It will be converted to a fedora collection pid, which is of the form
1322     * "greenstone:&lt;collectionName&gt;-collection". */
1323    public String describeCollection(String collectionName)
1324    {
1325        Document doc = builder.newDocument();
1326        FedoraGS3RunException ex = null;
1327       
1328        Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1329        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1330        attribute.setValue(collectionName);
1331        collection.setAttributeNode(attribute);
1332       
1333        //<displayItem assigned="true" lang="en" name="name">
1334        //"some display name"</displayItem>
1335        Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1336       
1337        attribute = doc.createAttribute(GSXML.LANG_ATT);
1338        attribute.setValue(this.lang);
1339        displayItem.setAttributeNode(attribute);
1340       
1341        attribute = doc.createAttribute(GSXML.NAME_ATT);
1342        attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1343        displayItem.setAttributeNode(attribute);
1344       
1345        try{
1346            Text textNode = doc.createTextNode(
1347                this.getCollectionTitle(getCollectionPID(collectionName)));
1348            displayItem.appendChild(textNode);
1349        } catch(Exception e) {
1350            // can't find Greenstone collections in fedora repository or problem
1351            // getting their titles from their metadata datastream?
1352            ex = new FedoraGS3RunException(e);
1353            ex.setSpecifics("greenstone collections or their metadata"
1354                    + "in the fedora repository");
1355        }
1356        // now append the displayItem element as child of the collection element
1357        collection.appendChild(displayItem);
1358        // get the <serviceList> and add it into the collection description.
1359        // Services for all collections in the FedoraGS3 repository are the
1360        // same, offering a ClassifierBrowse to browse titles by starting letter
1361        // and DocRetrieve services: Content, Metadata and Structure.
1362       
1363        Element serviceList = createServiceList(doc);
1364        collection.appendChild(serviceList);
1365       
1366        Element responseMsg = createResponseMessage(doc, collection, ex,
1367                GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1368        try{
1369            return FedoraCommons.elementToString(responseMsg);
1370        }catch(TransformerException e) {
1371            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1372                + " " + e;
1373        }
1374    }
1375   
1376    /** @return a GS3 describe response message for the services of a collection
1377     * in the Fedora-Greenstone repository. So far, these services are the same for
1378     * all fedora collections: they are the services given in member variable
1379     * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1380     * ClassifierBrowseMetadataRetrieve.   
1381     * All collections in this Digital Library (Fedora Repository) share the
1382     * same services, so this method returns the same services as getServiceList();
1383     * @param collectionName - the name of the collection whose services are to
1384     * be described. It will be converted to a fedora collection pid, which is of
1385     * the form "greenstone:&lt;collectionName&gt;-collection". */
1386    public String describeCollectionServices(String collectionName)
1387    {
1388        Document doc = builder.newDocument();
1389       
1390        Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1391        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1392        attribute.setValue(collectionName);
1393        collection.setAttributeNode(attribute);
1394       
1395        Element serviceList = createServiceList(doc);
1396        collection.appendChild(serviceList);
1397       
1398        Element responseMsg = createResponseMessage(doc, collection, null,
1399                GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1400        try{
1401            return FedoraCommons.elementToString(responseMsg);
1402        }catch(TransformerException e) {
1403            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1404                + " " + e;
1405        }
1406    }
1407   
1408    /** All collections in this Digital Library (Fedora Repository) share
1409     * the same services, so this method returns the same as
1410     * describeCollectionService(collName, serviceName).
1411     * @return a GS3 describe response message for the requested service
1412     * of the given collection. DocumentContent/Metadata/StructureRetrieve
1413     * return nothing special except their names; browse (and any query)
1414     * return more complex XML responses.
1415     * @param serviceName - the name of the service in the collection which is to
1416     * be described.*/
1417    public String describeService(String serviceName)
1418    {
1419        // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1420        // we return:
1421        // <message><response from="<name>Retrieve" type="describe">
1422        // <service name="<name>Retrieve" type="retrieve" /></response></message>
1423        // But for browse (and any query) service, we return the data necessary
1424        // for displaying it
1425       
1426        Document doc = this.builder.newDocument();
1427        Element service = doc.createElement(GSXML.SERVICE_ELEM);
1428        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1429        attribute.setValue(serviceName);
1430        service.setAttributeNode(attribute);
1431       
1432        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1433       
1434        if(serviceName.toLowerCase().endsWith("retrieve")) {
1435            attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1436        }
1437        else if(serviceName.toLowerCase().contains("browse")) {
1438            attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1439           
1440            // we need name and description <displayItem> elements
1441            Element displayItem
1442                = createNameValuePairElement(doc,
1443                        GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1444            service.appendChild(displayItem);
1445           
1446            displayItem = createNameValuePairElement(doc,
1447                    GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1448                    "Browse pre-defined classification hierarchies");
1449            service.appendChild(displayItem);
1450           
1451            // now need a classifierList
1452            Element classifierList = doc.createElement(
1453                    GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1454           
1455            int classifierNum = 1;
1456            // append a <classifier content="some letter" name="CL#">
1457            // for each letter of the alphabet:
1458            Element classifier = createClassifierElement(doc, "TitleByLetter",
1459                classifierNum++, "titles by letter", "Browse titles by letter");
1460            // now add this <classifier> to the <classifierList>
1461            classifierList.appendChild(classifier);
1462           
1463            // ANY MORE CLASSIFIERS? ADD THEM HERE
1464           
1465            service.appendChild(classifierList);
1466        } // ELSE check for whether it is a query service
1467        else if(serviceName.toLowerCase().contains("query")) {
1468            attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1469            if(serviceName.equals("TextQuery")) {
1470                describeTextQueryService(service);
1471            } else if(serviceName.equals("FieldQuery")) {
1472                describeFieldQueryService(service);
1473            }
1474        }
1475       
1476        // don't forget to add the type attribute to the service!
1477        service.setAttributeNode(attribute);
1478       
1479        String from = serviceName;
1480       
1481        Element responseMsg = createResponseMessage(doc, service, null,
1482                GSXML.REQUEST_TYPE_DESCRIBE, from);
1483        try{
1484            return FedoraCommons.elementToString(responseMsg);
1485        }catch(TransformerException e) {
1486            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1487                + " " + e;
1488        }
1489    }
1490   
1491    /** Appends children to the parameter service Element that make the
1492     * final service Element into a describe response XML for FedoraGS3's
1493     * TextQuery service.
1494     * @param service is the service Element that is being filled out. */
1495    protected void describeTextQueryService(Element service) {
1496        Document doc = service.getOwnerDocument();
1497        // we need name, submit (button) and description <displayItem> elements
1498        Element displayItem = createNameValuePairElement(doc,
1499            GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1500            "Text Search");
1501        service.appendChild(displayItem);
1502       
1503        displayItem = createNameValuePairElement(doc,
1504                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1505        service.appendChild(displayItem);
1506       
1507        displayItem = createNameValuePairElement(doc,
1508                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1509                "Title and full-text search service");
1510        service.appendChild(displayItem);
1511       
1512        //create the <paramList>
1513        Element paramList = doc.createElement(
1514            GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1515       
1516        // we ignore granularity to search at: it will always be
1517        // document and section level
1518        // we ignore casefolding: always on (that is, case is irrelevant)
1519        // we ignore document display order: always ranked
1520       
1521        // Constructing the following:
1522        // <param default="100" name="maxDocs" type="integer">
1523        // <displayItem name="name">Maximum hits to return</displayItem>
1524        // </param>
1525        Element param = doc.createElement(GSXML.PARAM_ELEM);
1526       
1527        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1528        attribute.setValue(MAXDOCS);
1529        param.setAttributeNode(attribute);
1530       
1531        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1532        attribute.setValue("100");
1533        param.setAttributeNode(attribute);
1534       
1535        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1536        attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1537        param.setAttributeNode(attribute);
1538       
1539        displayItem = createNameValuePairElement(doc,
1540                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1541                "Maximum hits to return");
1542        param.appendChild(displayItem);
1543       
1544        paramList.appendChild(param);
1545       
1546        // Constructing the following:
1547        // <param name="query" type="string">
1548        // <displayItem name="name">Query string</displayItem>
1549        // </param>
1550        param = doc.createElement(GSXML.PARAM_ELEM);
1551       
1552        attribute = doc.createAttribute(GSXML.NAME_ATT);
1553        attribute.setValue(QUERY);
1554        param.setAttributeNode(attribute);
1555       
1556        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1557        attribute.setValue(GSXML.PARAM_TYPE_STRING);
1558        param.setAttributeNode(attribute);
1559       
1560        displayItem = createNameValuePairElement(doc,
1561                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1562                "Query string");
1563        param.appendChild(displayItem);
1564       
1565        paramList.appendChild(param);
1566       
1567        service.appendChild(paramList);
1568    }
1569   
1570    /** Appends children to the parameter service Element that make the
1571     * final service Element into a describe response XML for FedoraGS3's
1572     * FieldQuery service.
1573     * @param service is the service Element that is being filled out. */
1574    protected void describeFieldQueryService(Element service) {
1575        Document doc = service.getOwnerDocument();
1576        // we need name, submit (button) and description <displayItem> elements
1577        Element displayItem = createNameValuePairElement(doc,
1578            GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1579            "Form Search");
1580        service.appendChild(displayItem);
1581       
1582        displayItem = createNameValuePairElement(doc,
1583                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1584        service.appendChild(displayItem);
1585       
1586        displayItem = createNameValuePairElement(doc,
1587                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1588                "Simple fielded search");
1589        service.appendChild(displayItem);
1590       
1591        //create the <paramList>
1592        Element paramList = doc.createElement(
1593            GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1594       
1595        // we ignore granularity to search at: it will always be
1596        // document and section level
1597        // we ignore casefolding: always on (that is, case is irrelevant)
1598        // we ignore document display order: always ranked
1599       
1600        // Constructing the following:
1601        // <param default="100" name="maxDocs" type="integer">
1602        // <displayItem name="name">Maximum hits to return</displayItem>
1603        // </param>
1604        Element param = doc.createElement(GSXML.PARAM_ELEM);
1605       
1606        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1607        attribute.setValue(MAXDOCS);
1608        param.setAttributeNode(attribute);
1609       
1610        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1611        attribute.setValue("100");
1612        param.setAttributeNode(attribute);
1613       
1614        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1615        attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1616        param.setAttributeNode(attribute);
1617       
1618        displayItem = createNameValuePairElement(doc,
1619                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1620                "Maximum hits to return");
1621        param.appendChild(displayItem);
1622       
1623        paramList.appendChild(param);
1624       
1625        // Constructing the following:
1626        // <param name="simpleField" occurs="4" type="multi">
1627        // <displayItem name="name"></displayItem>
1628        //
1629        // <param name="query" type="string">
1630        //   <displayItem name="name">Word or phrase </displayItem>
1631        // </param>
1632        //
1633        // <param default="allFields" name="fieldname" type="enum_single">
1634        //   <displayItem name="name">in field</displayItem>
1635        //   
1636        //   <option name="docTitles">
1637        //     <displayItem name="name">document titles</displayItem>
1638        //   </option>
1639        //   <option name="allTitles">
1640        //     <displayItem name="name">document and section titles</displayItem>
1641        //   </option>
1642        //   <option name="fullText">
1643        //     <displayItem name="name">full text</displayItem>
1644        //   </option>
1645        //   <option name="all">
1646        //     <displayItem name="name">titles and full text</displayItem>
1647        //   </option>
1648        //   <option name="">
1649        //     <displayItem name="name"></displayItem>
1650        //       </option>
1651        //  </param>
1652        // </param>
1653        Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM); 
1654        attribute = doc.createAttribute(GSXML.NAME_ATT);
1655        attribute.setValue(SIMPLEFIELD_ATT);
1656        rowOfParams.setAttributeNode(attribute);
1657       
1658        // we want the row of controls to occur multiple times
1659        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1660        attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1661        rowOfParams.setAttributeNode(attribute);
1662       
1663        attribute = doc.createAttribute(OCCURS_ATT);
1664        attribute.setValue("4"); // we want this row to occur 4 times
1665        rowOfParams.setAttributeNode(attribute);
1666       
1667        // <param name="query" type="string">
1668        //   <displayItem name="name">Word or phrase </displayItem>
1669        // </param>
1670        param = doc.createElement(GSXML.PARAM_ELEM);
1671       
1672        attribute = doc.createAttribute(GSXML.NAME_ATT);
1673        attribute.setValue(QUERY);
1674        param.setAttributeNode(attribute);
1675       
1676        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1677        attribute.setValue(GSXML.PARAM_TYPE_STRING);
1678        param.setAttributeNode(attribute);
1679       
1680        displayItem = createNameValuePairElement(doc,
1681                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1682                "Word or phrase");
1683        param.appendChild(displayItem);
1684        rowOfParams.appendChild(param);
1685       
1686        // <param default="allFields" name="fieldName" type="enum_single">
1687        //   <displayItem name="name">in field</displayItem>
1688        param = doc.createElement(GSXML.PARAM_ELEM);
1689        attribute = doc.createAttribute(GSXML.NAME_ATT);
1690        attribute.setValue(FIELDNAME_ATT);
1691        param.setAttributeNode(attribute);
1692       
1693        attribute = doc.createAttribute(GSXML.TYPE_ATT);
1694        attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1695        param.setAttributeNode(attribute);
1696       
1697        attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1698        attribute.setValue(ALL_FIELDS);
1699        param.setAttributeNode(attribute);
1700       
1701        displayItem = createNameValuePairElement(doc,
1702                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1703                "in field");
1704        param.appendChild(displayItem);
1705       
1706        String[] searchFieldNames
1707            = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1708        String[] searchFieldDisplay  = {"all titles and full-text",
1709            "document titles only", "document and section titles",
1710            "full-text only"};
1711       
1712        // for each fieldName create an option element and insert
1713        // the option into the enum_multi drop-down param:
1714        // <option name="fieldName">
1715        //   <displayItem name="name">fieldName</displayItem>
1716        // </option>
1717        for(int i = 0; i < searchFieldNames.length; i++) {
1718            Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1719            attribute = doc.createAttribute(GSXML.NAME_ATT);
1720            attribute.setValue(searchFieldNames[i]);
1721            option.setAttributeNode(attribute);
1722           
1723            displayItem = createNameValuePairElement(doc,
1724                    GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1725                    searchFieldDisplay[i]);
1726            option.appendChild(displayItem);
1727            param.appendChild(option); // add option to the drop-down box
1728        }
1729       
1730        rowOfParams.appendChild(param);
1731        paramList.appendChild(rowOfParams);
1732        service.appendChild(paramList);
1733    }
1734   
1735    /**
1736     * @return a GS3 describe response message for the requested service
1737     * of the given collection. DocumentContent/Metadata/StructureRetrieve
1738     * return nothing special except their names; browse (and any query)
1739     * return more complex XML responses.
1740     * All collections in this Digital Library (Fedora Repository) share
1741     * the same services, so this method returns the same as
1742     * describeService(serviceName).   
1743     * @param collectionName - the name of the collection whose service is to
1744     * be described. It will be converted to a fedora collection pid, which is of
1745     * the form "greenstone:&lt;collectionName&gt;-collection".
1746     * @param serviceName - the name of the service in the collection which is to
1747     * be described. */
1748    public String describeCollectionService(String collectionName,
1749            String serviceName) {
1750        // collectionName can be ignored, because all services are FedoraGS3
1751        // services and are not unique to any particular (greenstone) collection.
1752        return describeService(serviceName);
1753    }
1754   
1755    /** This method performs the implemented browse operation: allowing the
1756     * user to browse the titles of documents in the given collection by letter
1757     * and returning the results.
1758     * @param collectionName is the name of the collection whose documents
1759     * starting with the given letter will be returned.
1760     * @param classifierIDs are the ids of the classifiers on which to browse. In
1761     * this case, the classifier indicates whether we browse titles by letter, or
1762     * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
1763     * @param structures - the requested browse substructure. Can be any combination
1764     * of ancestors, parent, siblings, children, descendants.
1765     * @param infos - the requested structural info. Can be numSiblings,
1766     * siblingPosition, numChildren.
1767     * @return a GS3 ClassifierBrowse response message which lists all
1768     * the documents that start with the letter indicated by parameter classifier.
1769    */
1770    public String browse(String collectionName, String[] classifierIDs,
1771                 String[] structures, String[] infos)
1772    {
1773        // Construct one string from the structures and structural info arrays
1774        String structure = "";
1775        String info = "";
1776        for(int i = 0; i < structures.length; i++) {
1777        structure = structure + structures[i] + "|";
1778        }
1779        for(int i = 0; i < infos.length; i++) {
1780        info = info + infos[i] + "|";
1781        }
1782       
1783        Document doc = builder.newDocument();
1784        FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1785       
1786        // <classifierNodeList>
1787        Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1788       
1789        for(int i = 0; i < classifierIDs.length; i++) {
1790        if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1791            browseTitlesByLetterClassifier(doc, classifierNodeList,
1792                           collectionName, classifierIDs[i],
1793                           structure, info);           
1794        }
1795        }
1796
1797        Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1798                    GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
1799        try {
1800        return FedoraCommons.elementToString(responseMsg);
1801        } catch(TransformerException e) {
1802        return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1803            + " " + e;
1804        }
1805    }
1806
1807    /** CL1 browsing classifier: browsing titles by starting letter.
1808     * The browsing structure is retrieved.
1809     * @param doc - the document object that will contain the CL1 browsing structure.
1810     * @param classifierNodeList - the classifiers will be added to this nodeList.
1811     * @param collectionName - name of the collection through which we are browsing CL1.
1812     * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1813     * a letter.
1814     * @param structure - the requested browse substructure. Can be any combination of
1815     * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1816     * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1817     * numChildren.
1818     * @return the classifierNodeList with the CL1 classifier browse structure.
1819     */
1820    public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1821                         String collectionName, String classifierID,
1822                         String structure, String info)
1823    {
1824    FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1825
1826    if(structure.indexOf("entire") != -1) {
1827        structure = structure + "ancestors|descendants";
1828    }
1829   
1830    // Structure of ancestors and children only at this stage
1831    int firstLevel = classifierID.indexOf('.');
1832    int secondLevel = classifierID.lastIndexOf('.');
1833   
1834    // <nodeStructure>
1835    Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1836
1837    // requested classifier node
1838    Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1839    Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1840    attribute.setValue(classifierID);
1841    classNode.setAttributeNode(attribute);
1842    Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1843    typeAttribute.setValue(GSXML.VLIST);
1844    classNode.setAttributeNode(typeAttribute);
1845
1846    if(firstLevel == -1) { // CL1 - toplevel node     
1847        Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1848
1849        classifierNodeList.appendChild(classNode);
1850        classNode.appendChild(nodeStructure);
1851       
1852        nodeStructure.appendChild(root);
1853        if(structure.indexOf("descendants") != -1) {
1854        getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1855        } else if(structure.indexOf("children") != -1) {
1856        getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1857        }
1858        // nothing to be done for siblings
1859    }
1860    else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1861       
1862        if(structure.indexOf("parent") != -1
1863           || structure.indexOf("ancestors") != -1
1864           || structure.indexOf("siblings") != -1) {
1865        String toplevelID = classifierID.substring(0, firstLevel);
1866        Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1867        attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1868        attribute.setValue(toplevelID);
1869        toplevelNode.setAttributeNode(attribute);
1870        typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1871        typeAttribute.setValue(GSXML.VLIST);
1872        toplevelNode.setAttributeNode(typeAttribute);
1873        Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1874
1875        classifierNodeList.appendChild(toplevelNode);
1876        toplevelNode.appendChild(nodeStructure);       
1877        nodeStructure.appendChild(node);
1878
1879        if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1880            getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1881            // pass the requested node (classNode) so that it is attached in the correct
1882            // location among its siblings, and to ensure that it is not recreated.
1883            // getTitlesByLetterStructure() will append classNode to node
1884        } else {
1885            node.appendChild(classNode);
1886        }
1887        } else {
1888        Element node = (Element)classNode.cloneNode(true);
1889        classifierNodeList.appendChild(node);
1890        node.appendChild(nodeStructure);
1891        nodeStructure.appendChild(classNode);
1892        }
1893       
1894        int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1895        char ch = (char)(num - 1 + 'A');
1896        if(structure.indexOf("descendants") != -1) {
1897        getTitlesForLetter(ch, collectionName, classNode, "descendants");
1898        } else if(structure.indexOf("children") != -1) {
1899        getTitlesForLetter(ch, collectionName, classNode, "children");
1900        }
1901    }
1902    else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1903        LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1904    }
1905
1906    return classifierNodeList;
1907    }
1908
1909    /** Creates a (CL1) subclassifier element for the docs whose titles start with
1910     * the given letter.
1911     * @param ch - the starting letter of the document titles to retrieve.
1912     * @param collectionName - name of the collection through which we are browsing CL1.
1913     * @param classifierNode - the docNodes found will be appended to this node.
1914     * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1915     * gets descendants of any documents found, otherwise gets just the children.
1916     * @return the given classifierNode which will have the child (or descendant) documents
1917     * appended to it.
1918     */
1919    public Element getTitlesForLetter(char ch, String collectionName,
1920                      Element classifierNode, String depthStructure)
1921    {
1922    Document doc = classifierNode.getOwnerDocument();
1923    FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1924
1925   
1926    // Retrieve the document structure for each subClassifierID:
1927    // all the documents that begin with its letter.
1928    String letter = String.valueOf(ch);
1929    try {
1930        String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1931        if(docPIDs.length == 0) {
1932        return classifierNode; // skip letters that don't have any kids
1933        }       
1934       
1935        for(int i = 0; i < docPIDs.length; i++) {
1936        // work out the document's fedora PID and section ID
1937        String sectionID = getSectionIDFromDocID(docPIDs[i]);
1938        String docPID = getDocPIDFromDocID(docPIDs[i]);
1939       
1940        // get the required section, along with children or descendants
1941        Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1942       
1943        // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1944        Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);       
1945       
1946        // fills in the subtree of the rootNode in our nodeStructure element
1947        createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1948        classifierNode.appendChild(docRootNode);
1949        }
1950    } catch(Exception e) {
1951        ex = new FedoraGS3RunException(e);
1952        ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1953    }
1954
1955    return classifierNode;
1956    }
1957
1958
1959    /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1960     * starting letter of the alphabet. X is each letter of the alphabet for which there
1961     * are matching document titles.
1962     * @param collectionName - name of the collection through which we are browsing CL1.
1963     * @param classifierNode - the docNodes found will be appended to this node.
1964     * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1965     * the IDs for the subclassifiers (CL.x).
1966     * @param getDescendants - if true, get descendants of any documents found, otherwise
1967     * get just the children.
1968     * @param wantedSibling - the node (already created) whose siblings are requested. We
1969     * need to make sure not to recreate this node when creating its sibling nodes.
1970     * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1971     * the alphabet that are represented in the document titles.
1972     */
1973    public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1974                          String classifierID, boolean getDescendants,
1975                          Element wantedSibling)
1976    {   
1977    String ID = "";
1978    if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1979        ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1980    }
1981   
1982    Document doc = classifierNode.getOwnerDocument();
1983    FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1984
1985    // We're going to loop to the end of the alphabet
1986    int count = 1;
1987    for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1988        // Retrieve the document structure for each subClassifierID:
1989        // all the documents that begin with its letter.
1990        String letter = String.valueOf(ch);
1991        try {
1992        String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1993        if(docPIDs.length == 0) {
1994            continue; // skip letters that don't have any kids
1995        }
1996        Element subClassifier = null;
1997        if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1998                // already have the requested node, don't recreate it
1999            subClassifier = wantedSibling;
2000        } else {
2001            // <classifierNode childType="VList" nodeID="CL1.x">
2002            subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
2003            Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
2004            typeAttribute.setValue(GSXML.VLIST);
2005            subClassifier.setAttributeNode(typeAttribute);
2006            Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2007            attribute.setValue(classifierID+"."+count);
2008            subClassifier.setAttributeNode(attribute);
2009        }
2010        classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
2011       
2012        if(getDescendants) { // get the documents
2013
2014            // append the <docNodes> for the docPIDs found as children
2015            // of subclassifier
2016
2017            for(int i = 0; i < docPIDs.length; i++) {
2018            // work out the document's fedora PID and section ID
2019            String sectionID = getSectionIDFromDocID(docPIDs[i]);
2020            String docPID = getDocPIDFromDocID(docPIDs[i]);
2021       
2022            // get the required section, along with children or descendants
2023            Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2024
2025            // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2026            Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2027           
2028            // fills in the subtree of the rootNode in our nodeStructure element
2029            createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2030            subClassifier.appendChild(rootNode);
2031            }
2032        }
2033        } catch(Exception e) {
2034        ex = new FedoraGS3RunException(e);
2035        ex.setSpecifics("requested portion of TOC file or "
2036                + "trouble with fielded search ");
2037        }
2038    }
2039    return classifierNode;
2040    }
2041
2042   
2043    /** This method performs something equivalent to a greenstone3
2044     * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2045     * @param classNodeIDs array of classifierNode IDs for which the metadata
2046     * needs to be returned.
2047     * @param metafields are the classifier metadata fields that are to be returned.
2048     * At present this method ignores them/pretends the requested metafields are
2049     * "all" and always returns the Title meta for the requested classifier nodes
2050     * (because that is all the metadata this Fedora classifier has at present).
2051     * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2052     * lists the metadata for all the classifierNodes passed as parameter.*/
2053    public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2054    {
2055        Document doc = this.builder.newDocument();
2056        // <classifierNodeList>
2057        Element classifierNodeList = doc.createElement(
2058                GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2059       
2060        // create <classifierNode><metadataList><metadata>s
2061        // </metadataList></classifierNode> for all letters of the alphabet
2062        for(int i = 0; i < classNodeIDs.length; i++) {
2063            // strip ID of everything before the first '.' (i.e. remove "CL#.")
2064            int index = classNodeIDs[i].indexOf('.');
2065            String subClassifierNumber = classNodeIDs[i].substring(index+1);
2066            index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2067            if(index != -1) {
2068                subClassifierNumber = subClassifierNumber.substring(0, index);
2069            }
2070            int subClassifierNum = Integer.parseInt(subClassifierNumber);
2071            String classifierName = "";
2072            if(subClassifierNum == 0) { // no document titles started with a letter
2073                classifierName = "A-Z";
2074            } else {
2075                char letter = (char)('A' + subClassifierNum - 1); // A = 1
2076                classifierName = String.valueOf(letter);
2077            }
2078           
2079            // <classifierNode nodeID="CL#.subNum">
2080            Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2081            Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2082            attribute.setValue(classNodeIDs[i]);
2083            classifierNode.setAttributeNode(attribute);
2084           
2085            // <metadataList>
2086            Element metadataList = doc.createElement(
2087                    GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2088           
2089            // at least one metadata element: that of the title of this
2090            // classifierNode:
2091            // <metadata name="Title">letter</metadata>
2092            Element metadata = this.createNameValuePairElement(doc,
2093                    GSXML.METADATA_ELEM, "Title", classifierName);
2094           
2095            // now connect up everything
2096            metadataList.appendChild(metadata);
2097            classifierNode.appendChild(metadataList);
2098            classifierNodeList.appendChild(classifierNode);
2099        }
2100       
2101        Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2102                GSXML.REQUEST_TYPE_PROCESS, //collName +
2103                "ClassifierBrowseMetadataRetrieve");
2104        try{
2105            return FedoraCommons.elementToString(responseMsg);
2106        }catch(TransformerException e) {
2107            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2108                + " " + e;
2109        }
2110    }
2111   
2112    /** @return a newly created element of the following format:
2113     * &lt;classifier content="somecontent" name="CL+num"&gt;
2114     *      &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2115     *      &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2116     * &lt;/classifier&gt;
2117     * @param doc - the document used to create the element
2118     * @param content - value of the content attribute
2119     * @param classifierNum - the number suffixed to the CL, together forming
2120     * the classifier Node's ID
2121     * @param displayNameVal is the bodytext of a named displayItem element
2122     * @param displayDescrVal is the bodytext of a displayItem element with
2123     * description */
2124    protected Element createClassifierElement(Document doc, String content,
2125            int classifierNum, String displayNameVal, String displayDescrVal)
2126    {
2127        final String CL = "CL";
2128        Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2129        // content attribute
2130        Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2131        att.setValue(content);
2132        classifier.setAttributeNode(att);
2133        // name attribute
2134        att = doc.createAttribute(GSXML.NAME_ATT);
2135        att.setValue(CL + classifierNum);
2136        classifier.setAttributeNode(att);
2137       
2138        // now create the displayItem children for classifier:
2139        // <displayItem name="name">#letter</displayItem>
2140        // <displayItem name="description">Browse titles starting with #letter</displayItem>
2141        Element displayItem = createNameValuePairElement(doc,
2142                GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2143        classifier.appendChild(displayItem);
2144        displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2145                GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2146        classifier.appendChild(displayItem);
2147       
2148        return classifier;
2149    }
2150   
2151       
2152    /** @return a newly created element of the following format:
2153     * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2154     * @param doc - the document used to create the element
2155     * @param elementName - the tag name
2156     * @param name - value of attribute name
2157     * @param value - the body text of the element */
2158    protected Element createNameValuePairElement(Document doc, String elementName,
2159            String name, String value) {
2160        // <elementName name="somename">"some display value"</elementName>
2161        Element element = doc.createElement(elementName);
2162        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2163        attribute.setValue(name);
2164        element.setAttributeNode(attribute);
2165       
2166        element.appendChild(doc.createTextNode(value));
2167        return element;
2168    }
2169   
2170    /**
2171     * @param collection is the collection to search in
2172     * @param query is the query term to search for. It won't specify the
2173     * indexed field to search in, which will mean that GSearch will
2174     * search all default indexed fields.
2175     * @param maxDocs is the maximum number of results to return (which
2176     * at present we consider equivalent to FedoraGSearch's hitpageSize).   
2177    */
2178    public String[] textQuery(String collection, String query,
2179            int maxDocs)
2180        throws Exception
2181    {
2182        // no need to search there is no query or query is empty spaces
2183        if(query.trim().equals(""))
2184            return new String[]{};
2185       
2186        // QUERY value won't specify indexed field to search, Fedora
2187        // Gsearch will take that as meaning all default indexed fields.
2188        // Params to search() method below: string of fielded query terms; 
2189        // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2190        query = query + " " + "PID" + COLON + GREENSTONE;
2191       
2192        String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2193        // now we have the XML returned by FedoraGSearch, get the pids
2194        // of the documents returned (if any)
2195        String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2196                collection, searchResult);
2197        return pids;
2198    }
2199   
2200    /**
2201     * This method performs a fieldquery, searching for x number of phrases
2202     * in each of the 4 indexed fields. 
2203     * @param collection is the collection to search in
2204     * @param nameValParamsMap is a Map of several(key, value) entries,
2205     * 4 of which we're concerned with here:
2206     * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2207     * - the values are a comma separated list of terms (phrases or single
2208     * words) to search that field in. There may be more than 1 or
2209     * there may be none (in which case there may be N empty values or
2210     * spaces separated by commas).
2211     * @param maxDocs is the maximum number of results to return (which
2212     * at present we consider equivalent to FedoraGSearch's hitpageSize).   
2213     * */
2214    public String[] fieldQuery(String collection, Map nameValParamsMap,
2215            int maxDocs)
2216        throws Exception
2217    {
2218        // we're going to maintain a list of UNIQUE pids that were returned
2219        // in search results. Hence we use Set:
2220        java.util.Set set = new java.util.HashSet();
2221       
2222        // (1) Use Fedora's search to search document titles, if they were
2223        // specified:
2224        String[] docTitlepids = {};
2225       
2226        String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2227        if(docTitleTerms != null) { // no doc titles may have been specified
2228            String[] phrases = docTitleTerms.split(COMMA);
2229           
2230            // search the individual phrases first:
2231            for(int i = 0; i < phrases.length; i++) {
2232                if(phrases.equals("") || phrases.equals(" "))
2233                    continue; //skip when there are no terms
2234                docTitlepids = this.searchDocumentTitles(
2235                        collection, phrases[i], false);
2236                for(int j = 0; j < docTitlepids.length; j++)
2237                    set.add(docTitlepids[j]);
2238            }
2239        }
2240        // (2) use FedoraGSearch to search doc AND section titles, and
2241        // fulltext (in case these were specified in nameValParamsMap):
2242        String searchResult = this.fedoraGSearch.search(
2243                nameValParamsMap, 1, maxDocs);
2244       
2245        String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2246                collection, searchResult);
2247       
2248        for(int i = 0; i < pids.length; i++)
2249            set.add(pids[i]);
2250       
2251        pids = null;
2252        pids = new String[set.size()];
2253        set.toArray(pids); // unique pids
2254        return pids;
2255    }
2256   
2257    /** @return a String representing Greenstone3 XML for a query process
2258     * response returning the results for the query denoted by parameter
2259     * nameValParamsMap.
2260     * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2261     * query field data values. The names match the field names that
2262     * describeCollectionService() would have returned for the query service.
2263     * @param collection is the name of the collection
2264     * @param service is the name of the query service
2265     * This method is only ever called when any of the services in the digital
2266     * library described themselves as type=query. Therefore any digital
2267     * libraries that have no query services, can just return emtpy message
2268     * strings (or even "") since this method will never be called on them
2269     * anyway. */
2270    public String query(String collection, String service,
2271            Map nameValParamsMap)
2272    {
2273        FedoraGS3RunException ex = null;
2274        // (1) obtain the requested number of maximum result documents
2275        int maxDocs = 100;
2276        try{
2277            maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2278        } catch(NumberFormatException e) {
2279            maxDocs = 100;
2280        }
2281       
2282        String pids[] = {};
2283        // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2284        if(service.endsWith("TextQuery")) {
2285            try {
2286                // get the Query field:
2287                String query = (String)nameValParamsMap.get(QUERY);
2288                pids = textQuery(collection, query, maxDocs);
2289            }
2290            catch(Exception e) {
2291                LOG.error("Error in TextQuery processing: " + e);
2292                ex = new FedoraGS3RunException(
2293                    "When trying to use FedoraGenericSearch for a TextQuery", e);
2294               
2295            }
2296        } else { // (3) FieldQuery
2297            // first get the comma-separated lists
2298            String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2299            String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2300            // both are comma separated lists, so split both on 'comma'
2301            String[] fieldNames = listOfFieldNames.split(COMMA);
2302            String[] searchTerms = listOfSearchTerms.split(COMMA);
2303           
2304            // In the fieldNames and searchTerms lists of nameValParamsMap,
2305            // each searchTerm element was matched with its correspondingly
2306            // indexed fieldName.
2307            // A new map is going to reorganise this, by putting all terms
2308            // for a particular fieldName together in a comma separated list
2309            // and associating that with the fieldName. I.e. (key, value) ->
2310            // (fieldName, comma-separated list of all terms in that field)
2311            Map map = new HashMap();
2312            for(int i = 0; i < searchTerms.length; i++) {
2313                // there may be fewer searchTerms than fieldNames (since some
2314                // fieldNames may have been left empty), so loop on searchTerms
2315                if(map.containsKey(fieldNames[i])) { // fieldName is already
2316                    // in the list, so append comma with new value
2317                    String termsList = (String)map.get(fieldNames[i]);
2318                    termsList = termsList + COMMA + searchTerms[i];
2319                    map.put(fieldNames[i], termsList);
2320                } else { // this is the first time this fieldName occurred
2321                    // just put the fieldName with searchTerm as-is
2322                    map.put(fieldNames[i], searchTerms[i]);
2323                }
2324            }
2325           
2326            try {
2327                // For fieldquery, we search on all the fieldNames specified
2328                // - if DOC_TITLES is specified then we use Fedora's search
2329                // - for all other fieldNames specified, we use FedoraGSearch
2330                pids = fieldQuery(collection, map, maxDocs);
2331            }
2332            catch(Exception e) {
2333                LOG.error("Error in FieldQuery processing: " + e);
2334                ex = new FedoraGS3RunException(
2335                    "When trying to use FedoraGenericSearch for a FieldQuery", e);
2336            }
2337        }
2338       
2339        // Build Greenstone XML Query response message for from
2340        // the pids (which should be document identifiers)
2341        Document doc = builder.newDocument();
2342        // <metadataList><metadata name="numDocsMatched" value="n" />
2343        // </metadataList>
2344        Element metadataList = doc.createElement(
2345                GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2346        Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2347       
2348        Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2349        attribute.setValue(NUM_DOCS_MATCHED);
2350        metadata.setAttributeNode(attribute);
2351       
2352        attribute = doc.createAttribute(GSXML.VALUE_ATT);
2353        attribute.setValue(Integer.toString(pids.length));
2354        metadata.setAttributeNode(attribute);
2355       
2356        metadataList.appendChild(metadata);
2357       
2358        // <documentNodeList>
2359        // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2360        // docType='hierarchy' nodeType="leaf" />
2361        // ...
2362        // ...
2363        // </documentNodeList>
2364        Element docNodeList = doc.createElement(
2365                GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2366        // for each
2367        for(int i = 0; i < pids.length; i++) {
2368            Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2369            attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2370            attribute.setValue(pids[i]);
2371            docNode.setAttributeNode(attribute);
2372           
2373            attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2374            attribute.setValue("hierarchy");
2375            docNode.setAttributeNode(attribute);
2376           
2377            attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2378            attribute.setValue("root");
2379            docNode.setAttributeNode(attribute);
2380            docNodeList.appendChild(docNode);
2381        }
2382       
2383        Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2384                GSXML.REQUEST_TYPE_PROCESS, service);
2385        try{
2386            return FedoraCommons.elementToString(responseMsg);
2387        }catch(TransformerException e) {
2388            return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2389                + " " + e;
2390        }
2391    }
2392
2393   
2394    // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2395    /** Given a URL that represents a fedoraPID, will look up the object.
2396     * If it exists, it will return the contents of the DC:Title of its datastream.
2397     * If it doesn't exist, it will return the URL as-is.
2398     * @param URL: the URL that (after modification) represents a fedoraPID to look up.
2399     * @param collection: the name of collection in which to search for the URL
2400     * representing a fedoraPID.
2401     * @return the string (representing a fedoraPID) stored in the DC:Title of the
2402     * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2403     * then the parameter URL is returned.
2404    */
2405    public String getPIDforURL(String url, String collection) {
2406    FedoraGS3RunException ex = null; // any RemoteException
2407
2408    // (1) convert url to the fedorapid
2409    // / -> _ and : -> -
2410    String fedoraPID = url.replaceAll("/", "_");
2411    fedoraPID = fedoraPID.replaceAll(":", "-");
2412    // prefix "greenstone-http:<colname>-" to the fedoraPID
2413    fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2414    //LOG.error("### fedoraPID: " + fedoraPID);
2415
2416    // (2) Look up the datastream for the fedorapid
2417    String dcTitle = "";
2418    try {
2419        dcTitle = getDCTitle(fedoraPID);
2420    } catch(Exception e) {
2421        LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2422        ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2423    }
2424    //String dc = this.getDC(fedoraPID);
2425    //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2426
2427    // (3) if fedorapid exists, extract the dc:title content.
2428    // if it doesn't exist, return url
2429    if(dcTitle.equals("")) {       
2430        return url;
2431    } else {
2432        // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2433        //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2434        return dcTitle+"-1";
2435    }
2436    }
2437   
2438    public static void main(String args[]) {
2439        try{
2440            // testing default constructor
2441            //FedoraGS3Connection con = new FedoraGS3Connection();
2442           
2443            // testing constructor that takes properties file to show initial
2444            // fedora server values
2445            java.io.File propertyFilename
2446                = new java.io.File("fedoraGS3.properties");
2447            FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2448           
2449            // DESCRIBE: serviceList, collectionList
2450            System.out.println("serviceList:\n" + con.getServiceList());
2451           
2452            System.out.println("collectionList:\n" + con.getCollectionList());
2453           
2454            String[] colPIDs = con.getCollections();
2455            String[] collectionNames = con.getCollectionNames(con.getCollections());
2456           
2457           
2458            for(int i = 0; i < collectionNames.length; i++) {
2459                System.out.println("Describing collections:\n");
2460                System.out.println(con.describeCollection(collectionNames[i]));
2461                System.out.println("Describing collection services:\n"
2462                    + con.describeCollectionServices(collectionNames[i]));
2463            }
2464           
2465            String[] serviceNames = con.getServiceNames();
2466            for(int i = 0; i < serviceNames.length; i++) {
2467                System.out.println("Describing " + serviceNames[i] + ":\n"
2468                    + con.describeCollectionService("demo", serviceNames[i]));
2469            }
2470           
2471                       
2472            // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2473            // along with EX of the top-level document:
2474            System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2475            System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2476           
2477                       
2478            String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2479            System.out.println("\nGET CONTENT:");
2480            for(int i = 0; i < docIDs.length; i++) {
2481                System.out.println(con.getContent(docIDs[i]));
2482            }
2483           
2484            System.out.println("\nGET META:");
2485            for(int i = 0; i < docIDs.length; i++) {
2486                System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2487            }
2488           
2489            String[] getTitlesFor = {
2490                    "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2491                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2492                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2493                    "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2494                    "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2495            };
2496           
2497            // first let's display the regular meta for top-level docs and
2498            // their sections
2499            for(int i = 0; i < getTitlesFor.length; i++) {
2500                System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2501            }
2502           
2503            System.out.println("\nTitles are:");
2504            System.out.println(con.getTitleMetadata(getTitlesFor));
2505           
2506            System.out.println("\nGET STRUCTURE:");
2507            for(int i = 0; i < docIDs.length; i++) {
2508                System.out.println("Descendents and numChildren:\n"
2509                           + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2510                System.out.println("Parent and numSiblings:\n"
2511                           + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
2512            }
2513           
2514            // TEST ERROR CASES:
2515            System.out.println("\nTESTING ERROR CASES");
2516            System.out.println(con.getContent("greenstone:demo-pinky"));
2517            String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2518            "greenstone:demo-pinky" };
2519            System.out.println(con.getContent(errorCases));
2520            System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2521                           System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2522           
2523            System.out.println("\nCLASSIFIER BROWSE");
2524            System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2525                              new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2526           
2527            System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2528            String[] classNodeIDs = new String[26];
2529            for(int i = 0; i < classNodeIDs.length; i++) {
2530                int subClassifierNum = i + 1;
2531                classNodeIDs[i] = "CL1." + subClassifierNum;
2532            }
2533            System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2534                                      classNodeIDs, new String[]{"all"}));
2535           
2536            System.out.println("Testing query services");
2537            System.out.println("TEXT QUERY:");
2538            Map formControlValsMap = new HashMap();
2539            formControlValsMap.put(MAXDOCS, "100");
2540            formControlValsMap.put(QUERY, "snails");
2541            String searchResponse
2542                = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2543            System.out.println(searchResponse);
2544           
2545            System.out.println("FIELD QUERY:");
2546            formControlValsMap.clear();
2547            formControlValsMap.put(MAXDOCS, "100");
2548            formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2549            formControlValsMap.put(FIELDNAME_ATT,
2550                    "allFields,docTitles,allFields,allFields");
2551            searchResponse
2552                = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2553            System.out.println(searchResponse);
2554           
2555            System.exit(0);
2556        }catch(Exception e) {
2557            JOptionPane.showMessageDialog(
2558                    null, e, "Error", JOptionPane.ERROR_MESSAGE);
2559            //System.err.println("ERROR: " + e);
2560            e.printStackTrace();
2561        }
2562    }
2563}
Note: See TracBrowser for help on using the browser.