source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26270

Last change on this file since 26270 was 26270, checked in by ak19, 12 years ago
  1. Now checks request for nodeStructureInfo documentType, as is needed to get it working with GS3 again. 2. Added in reusable constants of gsdl3/util/AbstractBasicDocument.java since these have now been made public constants.
File size: 105.9 KB
RevLine 
[15222]1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
[26270]31import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
[15222]32import org.greenstone.gsdl3.util.GSXML;
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.Attr;
36import org.w3c.dom.Text;
37import org.w3c.dom.NodeList;
38import org.w3c.dom.Node;
39import org.xml.sax.InputSource;
40
41import java.io.File;
42import java.util.HashMap;
43import java.util.Properties;
44import java.util.Map;
45
46import javax.swing.JOptionPane;
47
48import org.xml.sax.SAXException;
49import java.io.UnsupportedEncodingException;
50import java.io.IOException;
51import javax.net.ssl.SSLHandshakeException;
52import java.net.ConnectException;
53import java.net.MalformedURLException;
54import java.rmi.RemoteException;
55import javax.xml.parsers.ParserConfigurationException;
56import javax.xml.transform.TransformerException;
57
58/**
59 * Class that extends FedoraConnection in order to be able to use
60 * Fedora's web services to retrieve the specific datastreams of
61 * Greenstone documents stored in Fedora's repository. This class
62 * provides methods that convert those datastreams into Greenstone3
63 * XML response messages which are returned.
64 * @author ak19
65*/
66public class FedoraGS3Connection
67 extends FedoraConnection implements FedoraToGS3Interface,
68 FedoraToGS3Interface.Constants
69{
70 /** The logging instance for this class */
71 private static final Logger LOG = Logger.getLogger(
72 FedoraGS3Connection.class.getName());
[21859]73
74 /** Default name of Fedora index */
75 private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
76
[15222]77 /** Complete list of services that are supported our FedoraGS3 would
78 * support if everything goes well. If a connection to FedoraGSearch
79 * cannot be established, the query services will no longer be
80 * available. The actual services supported are given by member
81 * variable serviceNames. */
82 protected static final String[] SERVICES = {
83 "DocumentContentRetrieve", "DocumentMetadataRetrieve",
84 "DocumentStructureRetrieve",
85 "TextQuery", "FieldQuery",
86 "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
87 };
88
89 /** List of services actually supported by our FedoraGS3 repository
90 * after construction. If FedoraGenericSearch can't be connected to,
91 * then query services will not be offered */
92 protected String[] serviceNames;
93
94 /** The object used to connect to FedoraGenericSearch, which is used
95 * for doing full-text searching */
96 protected GSearchConnection fedoraGSearch;
97
98 /** The url for the wsdl file of FedoraGSearch's web services
99 * by default this will be the Fedora server's base URL
100 * concatenated to "gsearch/services/FgsOperations?wsdl" */
101 protected String gSearchWSDLURL;
[15733]102
103 /** The last part of the gSearchWSDL URL. The first part is
104 * the same as the fedora server's base url. */
105 protected String gSearchWSDLSuffix;
106
[15437]107 /** The name of the index that FedoraGSearch will index the GS3
108 * documents into. If no name is specified in the properties file,
109 * this will default to FedoraIndex. */
110 protected String gSearchIndexName;
111
[15222]112 /** 5 argument constructor is the same as that of superclass FedoraConnection:
113 * @param protocol can be either http or https
114 * @param host is the host where the fedora server is listening
115 * @param port is the port where the fedora server is listening
116 * @param fedoraServerUsername is the username for administrative
117 * authentication required to access the fedora server.
118 * @param fedoraServerPassword is the password for administrative
119 * authentication required to access the fedora server. If no password was set
120 * when installing Fedora, leave the field "".
121 * Instantiates a FedoraGS3Connection object which connects to Fedora's
122 * web services through stub classes and tries to connect to FedoraGSearch's
123 * web services through the default WSDL location for it
124 * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
125 * call setGSearchWSDLURL(url) after the constructor instead.
126 */
127 public FedoraGS3Connection(String protocol, String host, int port,
128 String fedoraServerUsername, String fedoraServerPassword)
129 throws ParserConfigurationException, MalformedURLException,
130 SSLHandshakeException, RemoteException, AuthenticationFailedException,
131 NotAFedoraServerException, ConnectException, Exception
132 {
133 super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
134 // super() will call setInitialisationProperties(properties)
135 // And that will try to instantiate the GSearchConnection.
136 }
137
138 /** No-argument constructor which is the same as that of superclass
139 * FedoraConnection: it displays a small dialog requesting input for the
140 * host, port, administrative password and username of the fedora server.
141 * If no password was set on the fedora repository when installing it,
142 * the user can leave the password field blank. */
143 public FedoraGS3Connection()
144 throws ParserConfigurationException, MalformedURLException,
145 CancelledException, ConnectException, RemoteException,
146 SSLHandshakeException, Exception
147 {
148 super();
149 // super() will call setInitialisationProperties(properties)
150 // And that will try to instantiate the GSearchConnection.
151 }
152
153 /** Single-argument constructor which is the same as that of superclass
154 * FedoraConnection: it takes the name of the properties file where
155 * connection initialisation values may already be provided and then
156 * displays a small dialog requesting input for the host, port,
157 * administrative password and username of the fedora server showing
158 * the values in the properties file as default. If the necessary
[21835]159 * initialisation are not present in the file, the corresponding fields
[15222]160 * in the dialog will be blank.
161 * If no password was set on the fedora repository when installing it,
162 * the user can leave the password field blank. */
163 public FedoraGS3Connection(File propertiesFilename)
164 throws ParserConfigurationException, MalformedURLException,
165 CancelledException, ConnectException, RemoteException,
166 SSLHandshakeException, Exception
167 {
168 super(propertiesFilename);
169 // super() will call setInitialisationProperties(properties)
170 // And that will try to instantiate the GSearchConnection.
171 }
172
173 /** The superclass constructor calls this method passing any preset
174 * properties loaded from a propertiesFile. This method is overridden
175 * here in order to instantiate the gSearchConnection based on the
[15733]176 * - gSearchWSDLSuffix that will be appended to the fedora base url.
177 * (If one was not provided in the properties file, gSearchWSDLURL defaults
178 * to something of the form
179 * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
180 * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
181 * "gsearch/services/FgsOperations?wsdl".
[15437]182 * - name of the index into which the GS3 documents have been indexed
183 * and which FedoraGenericSearch should use to perform searches. If none is
[21573]184 * given in the properties file, then the index name defaults to "FedoraIndex".
[15222]185 * @param properties is the Properties Map loaded from a properties file
186 * (if there was any) which specifies such things as host and port of the
[15733]187 * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
188 * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
189 * to whatever the final value of this.gSearchWSDLURL' suffix is, and
[15437]190 * "gsearch.indexName" will be set to to whatever the final value of
191 * this.gSearchIndexName is.
[15222]192 */
193 protected void setInitialisationProperties(Properties properties)
194 throws ParserConfigurationException, MalformedURLException,
195 CancelledException, ConnectException, RemoteException,
196 SSLHandshakeException, Exception
197 {
198 super.setInitialisationProperties(properties);
[15733]199 // gsearchWSDL URL suffix, if not specified, defaults to
200 // "fedoragsearch/services/FgsOperations?wsdl" which is
201 // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
202 this.gSearchWSDLSuffix = properties.getProperty(
203 "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
204 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
[15222]205 // Set the property to whatever this.gSearchWSDLURL is now,
206 // so that it will be written out to the properties file again
[15733]207 properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
[15437]208
209 // Similarly for the name of the index FedoraGenericSearch should use
210 // when performing searches for GS3 docs stored in Fedora's repository.
211 this.gSearchIndexName = properties.getProperty(
[21859]212 "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
[15437]213 properties.setProperty("gsearch.indexName", this.gSearchIndexName);
[15222]214 // Create a connection to FedoraGSearch's web services:
215 initSearchFunctionality();
216 }
[21859]217
218 /** Overridden init method to work with the 5 argument constructor, so that we can
219 * bypass using setInitialisationProperties() which works with a Properties map.
220 */
221 protected void init(String protocol, String host, String port,
222 String fedoraServerUsername, String fedoraServerPassword)
223 throws ParserConfigurationException, MalformedURLException,
224 AuthenticationFailedException, RemoteException, Exception
225 {
226 super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
227 this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
228 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
229 this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
230 initSearchFunctionality();
231 }
[15222]232
[21859]233
[15222]234 /** Init method that instantiates a GSearchConnection object used
235 * to work with the separate FedoraGSearch web services.
236 * The url of the WSDL for FedoraGSearch's web services is worked out
237 * from the baseURL of the Fedora server.
238 */
239 protected void initSearchFunctionality()
240 {
241 try {
[15437]242 this.fedoraGSearch = null;
243 this.fedoraGSearch = new GSearchConnection(
244 gSearchWSDLURL, gSearchIndexName);
[15222]245 this.serviceNames = SERVICES;
246 } catch(Exception e){
247 LOG.error("Cannot connect to FedoraGSearch's web services at "
248 + gSearchWSDLURL + "\nQuery services will not be available.");
249 // If an exception occurs, something has gone wrong when
250 // trying to connect to FedoraGSearch's web services. This
251 // means, we can't offer query services, as that's provided
252 // by FedoraGSearch
253 serviceNames = null;
254 int countOfNonQueryServices = 0;
255 for(int i = 0; i < SERVICES.length; i++) {
256 // do not count query services
257 if(!SERVICES[i].toLowerCase().contains("query")) {
258 countOfNonQueryServices++;
259 }
260 }
261 // Services now supported are everything except Query services
262 serviceNames = new String[countOfNonQueryServices];
[15331]263 int j = 0;
[15222]264 for(int i = 0; i < SERVICES.length; i++) {
265 if(!SERVICES[i].toLowerCase().contains("query")) {
[15331]266 serviceNames[j] = SERVICES[i];
267 j++; // valid serviceName, so increment serviceName counter
[15222]268 }
269
270 }
271 }
272 }
273
274 /** @return the gSearchWSDLURL, the url of the WSDL for the
275 * FedoraGSearch web services */
276 public String getGSearchWSDLURL() { return gSearchWSDLURL; }
277
278 /** Sets the member variable gSearchWSDLURL that specify the location of
279 * the WSDL file of FedoraGSearch's web services. Then it attempts
280 * to instantiate a connection to those web services.
281 * @param url is the new url of the GSearch web services WSDL file */
282 public void setGSearchWSDLURL(String url) {
[15437]283 this.gSearchWSDLURL = url;
[15222]284 initSearchFunctionality();
285 }
286
[15437]287 /** @return the gSearchIndexName, the name of the index Fedora Generic
288 * Search will search in (where GS3 docs have been indexed into). */
289 public String getGSearchIndexName() { return gSearchIndexName; }
290
291 /** Sets the member variable gSearchIndexName that specifies the name
292 * of the index containing indexed GS3 documents. Then it attempts
293 * to instantiate a connection to the Fedora GSearch web services using
294 * this changed value for indexName.
295 * @param indexName is the new name of the index containing indexed GS3
296 * docs that GSearch should search in. */
297 public void setGSearchIndexName(String indexName) {
298 this.gSearchIndexName = indexName;
299 initSearchFunctionality();
300 }
301
[15222]302 /** @return the array of the services actually supported by FedoraGS3 */
303 protected String[] getServiceNames() { return this.serviceNames;}
304
305 /**
306 * For finding out if the sectionNumber is given as part of the docID.
307 * @param docID is the String that contains the docPID and may also
308 * contain the section number.
309 * @return true if the document identifier docID contains a section-
310 * number, and false if it consists solely of the docPID.
311 * That is, true is returned if
312 * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
313 * and false is returned if
314 * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
315 * */
316 protected boolean containsSectionNumber(String docID) {
317 // if there are two hyphens in the docID, then there are sections
318 // (and the section number is appended at end of docID)
319 // docID = "greenstone:colName-<docPID>-<sectionNum>"
320 return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
321 }
322
323 /** This method will extract the docPID from docID and return it.
324 * (If a sectionNumber is suffixed to the docID, the docPID which is
325 * the prefix is returned; otherwise the docID is the docPID and is
326 * returned)
327 * @param docID is the String that contains the docPID and may also
328 * contain the section number.
329 * @return only the docPID portion of the docID.
330 */
331 protected String getDocPIDFromDocID(String docID) {
332 if(containsSectionNumber(docID))
333 return docID.substring(0, docID.lastIndexOf(HYPHEN));
334 // else (if there's no sectionNumber), docID is the docPID
335 return docID;
336 }
337
338 /** This method will return the section Number, if there's any
339 * suffixed to the docID. Otherwise it will return the empty string
340 * @param docID is the String that contains the docPID and may also
341 * contain the section number.
342 * @return only the sectionID portion of the docID - if any, else "".
343 */
344 protected String getSectionIDFromDocID(String docID) {
345 if(containsSectionNumber(docID))
346 return docID.substring(
347 docID.lastIndexOf(HYPHEN)+1, docID.length());
348 return "";
349 }
350
351 /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
352 * response message that gives the metadata for each collection identified
353 * @param collIDs is an array of fedora pids identifying collections in the
354 * fedora repository
355 * @return a GS3 DocumentMetadataRetrieve response message containing the
356 * EX metadata for all the requested collections */
[22300]357 public String getCollectionMetadata(String[] collIDs) {
358 return getMetadata(collIDs, new String[] {"all"});
[15222]359 }
360
361 /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
362 * response message is returned containing the metadata for each document.
363 * @param docIDs is an array of document identifiers (docID can either be
364 * &lt;pid&gt;s items (documents) in the fedora repository, or
365 * "&lt;pid&gt;-sectionNumber".
366 * @return a GS3 DocumentMetadataRetrieve response message containing the
[22300]367 * EX, DC, DLS metadata for all the requested documents
368 * @param metadata is the list of metadata elements to be retrieved for each doc */
369 public String getDocumentMetadata(String[] docIDs, String[] metadata) {
370 return getMetadata(docIDs, metadata);
[15222]371 }
372
373 /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
374 * response message that gives the metadata for the collection identified
375 * @param collID is a fedora pid identifying a collection in its repository
376 * @return a GS3 DocumentMetadataRetrieve response message containing the
[22300]377 * EX metadata for the requested collection
378 * @param metadata is the list of metadata elements to be retrieved for each doc */
[15222]379 public String getCollectionMetadata(String collID) {
[22300]380 return getMetadata(new String[] {collID}, new String[] {"all"});
[15222]381 }
382
383 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
384 * response message containing the metadata for the document.
385 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
386 * of an item (document) in the fedora repository, or it can be
387 * "&lt;pid&gt;-sectionNumber".
388 * @return a GS3 DocumentMetadataRetrieve response message containing the
389 * EX, DC, DLS metadata for the requested document */
[22300]390 public String getDocumentMetadata(String docID, String[] metadata) {
391 return getMetadata(new String[] {docID}, metadata);
[15222]392 }
393
394 /** @return a greenstone DocumentMetadataRetrieve response for the
395 * documents or collections indicated by the docIDsOrCollIDs.
396 * @param docIDsOrCollIDs is an array of identifiers which may be either the
397 * fedora pids for collections, or otherwise may be a document identifier.
398 * In the last case, the document ID may consist of either
[22300]399 * "documentPID-sectionNumber" or may just be just fedora documentPID
400 * @param metadata is the list of metadata elements to be retrieved for each doc */
401 public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
[15222]402 {
403 Document doc = builder.newDocument();
404 FedoraGS3RunException ex = null;
405
406 Element docNodeList = doc.createElement(
407 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
408
409 try{
410 for(int i = 0; i < docIDsOrCollIDs.length; i++) {
411 // create the <documentNode> containing the metadata
412 // for each document docID
[22300]413 Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
[15222]414 docNodeList.appendChild(docNode);
415 }
416 } catch(Exception e) {
417 ex = new FedoraGS3RunException(e);
418 ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
419 }
420
421 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
422 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
423 try{
[22300]424 return FedoraCommons.elementToString(responseMsg);
[15222]425 } catch(TransformerException e) {
426 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
427 + " " + e;
428 }
429 }
430
431 /** Method that takes a new DOM document, as well as an identifier of either
432 * a collection or document (which may be a fedora pid for the collection
433 * or document, or may be the documentPid-sectionNumber for a document) and
434 * returns a documentNode element for it:
435 * &lt;documentNode&gt;&lt;metadataList&gt;
436 * &lt;metadata name=""&gt;value&lt;/metadata&gt;
437 * ...
438 * &lt;/metadataList&gt;&lt;/documentNode&gt;
439 * @return documentNode containing the metadata for the collection or
440 * document given by parameter ID
441 * @param id denotes a collection pid, a document pid or a docID of the
[22300]442 * form "documentpid-sectionNumber"
443 * @param metadata is the list of metadata elements to be retrieved for each doc */
444 protected Element getMetadata(Document doc, String id, String[] metadata)
[15222]445 throws RemoteException, UnsupportedEncodingException,
446 SAXException, IOException
447 {
448 // We're going to create the documentNode nested inside the following
449 // documentNodeList:
450 // <documentNodeList>
451 // <documentNode nodeID=""><metadataList>
452 // <metadata name="">value</metadata>
453 // </metadataList></documentNode>
454 // <documentNode>...</documentNode>
455 // </documentNodeList>
456 // <documentNodeList>
457
[26171]458 // <documentNode nodeID="docID"> - the docNode on which a metadata
[15222]459 // retrieve is being performed
460 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
461 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
462 attribute.setValue(id);
463 docNode.setAttributeNode(attribute);
464
465 // <metadataList>
466 Element metadataList = doc.createElement(
467 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
468
469 String ex = "";
470 String dc = "";
471 String dls = "";
472 if(id.endsWith(_COLLECTION)) { // docID refers to a collection
473 // Obtain the "EX" datastream (extracted metadata) for the collection
474 ex = this.getEX(id);
475 }
476 else { // docID refers to a document
477 // work out the document's fedora PID and section ID, and then
478 // obtain the EX (extracted metadata) and DC datastreams for the doc
479
480 // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
481 // is the same as for pid="greenstone:<colname>-docPID"
482 // That is, <Section id="1"> refers to the toplevel document docPID
483 // If requested for top-level document, there may also be DLS meta
484 String sectionID = getSectionIDFromDocID(id);
485 String docPID = getDocPIDFromDocID(id);
486 if(sectionID.equals("") || sectionID.equals("1")) {
[21573]487 // metadata of toplevel document is requested
[15222]488 ex = this.getEX(docPID); // slightly faster than doing
489 //getSectionEXMetadata(docID, "1")
490 dc = this.getDC(docPID);
491 dls = this.getDLS(docPID);
492 }
493 else {
494 ex = getSectionEXMetadata(docPID, sectionID);
495 dc = getSectionDCMetadata(docPID, sectionID);
496 }
497 }
498
[22300]499 String metafields = "";
500 for(int i = 0; i < metadata.length; i++) {
501 metafields = metafields + metadata[i] + "|";
502 }
503
[15222]504 // Adding in metadata sets in alphabetical order
505 // DC metadata for a top-level document is different from EX, DLS:
506 // only the element's namespace prefix is "dc", the rest of a tagname
507 // is unknown.
508 if(!dc.equals("")) {
509 addMetadataWithNamespacedTagNames(doc, metadataList,
[22300]510 dc, DC, metafields);
[15222]511 }
512
513 // Check if we were supposed to process dls and dc metadata
514 // as well. We only ever do this for top-level documents,
515 // in which case, dls and dc will be non-empty strings
516 if(!dls.equals("")) {
[22300]517 addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
[15222]518 }
519
520 // we definitely have an EX metadatastream for each
521 // collection object, top-level document object,
522 // and document section item
[22300]523 addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
[15222]524
525 // now the metadataList has been built up
526 docNode.appendChild(metadataList);
527
528 return docNode; // return <documentNode> containing the metadata
529 }
530
531 /** This method retrieves all the metadata elements in the metaDataStream
532 * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
533 * metadataSetNS is the namespace of each tag, and creates a new element of
534 * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
535 * each. Each of these are then appended to the metadataList parameter.
536 * @param doc is the Document object using which the new metadata Elements
537 * are to be constructed
538 * @param metadataList is the &lt;metadataList&gt; Element to which the new
539 * metadata Elements are to be appended as children.
540 * @param metaDatastream the metadata datastream in string form (e.g. the
541 * Dublin Core metadata stored in the Fedora repository).
542 * @param metadataSet is the constant datastream identifier, e.g. "DC".
[22300]543 * At present this method applies to the DC metadata and any others like it
544 * where each tagname is different except for the constant dc: namespace.
545 * @param metafields is a | separated string containing the metadatafields to
546 * extract or "all" if all fields are requested
[15222]547 */
[22300]548 protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
549 String metaDatastream, String metadataSet, String metafields)
[15222]550 throws SAXException, IOException
551 {
552 Document src = builder.parse(
553 new InputSource(new StringReader(metaDatastream)));
554
555 // The following doesn't work for some reason: to retrieve all elements
556 // whose namespace prefix starts with "dc", we pass "*" for localName
[22300]557 //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
[15222]558
559 // Longer way: get the children of the root document
560 NodeList children = src.getDocumentElement().getChildNodes();
561
562 for(int i = 0; i < children.getLength(); i++) {
563 String nodeName = children.item(i).getNodeName();
[22300]564 // check that the nodename starts with the metadataSet ("dc") namespace,
[15222]565 // which simultaneously ensures that the node's an element:
[22300]566 if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
567 // need to have a period for Greenstone instead of Fedora's colon
568 nodeName = nodeName.replace(COLON, PERIOD);
569 if(metadataSet.equals(DC)) { // dc:title -> dc.Title
570 nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
571 + nodeName.substring(4);
572 }
573
574 // get the requested metadata fields
575 if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
[15222]576 Element metatag = (Element)children.item(i);
577 String value = FedoraCommons.getValue(metatag);
578 // <dc:tagname>value</dc:tagname>
[22300]579 // we're going to put this in our metadata element as
580 // <metadata name="dc.Tagname">value</metadata>
[15222]581
582 // create metadata of (name, value) pairs in target DOM (doc)
583 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
584 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
[22300]585
[15222]586 attribute.setValue(nodeName);
587 metadata.setAttributeNode(attribute);
588 Text content = doc.createTextNode(value);
589 metadata.appendChild(content);
590 metadataList.appendChild(metadata);
[22300]591 }
[15222]592 }
593 }
594 }
595
596 /** This method retrieves all the metadata elements in the metaDataStream
597 * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt;
598 * where "namespace" is the namespace prefix of each tag, and metadataName
599 * is the name of the metadata (like author, title). For each element
600 * it creates a corresponding new element of the form
[22300]601 * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
602 * Each of these are then appended to the metadataList parameter.
[15222]603 * @param doc is the Document object using which the new metadata Elements
604 * are to be constructed
605 * @param metadataList is the &lt;metadataList&gt; Element to which the new
606 * metadata Elements are to be appended as children.
607 * @param metaDatastream the metadata datastream in string form (e.g. the
608 * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
609 * repository).
610 * @param metadataSet is the constant datastream identifier,
611 * e.g. "DLS" or "EX".
612 * At present this method applies to the DLS and EX metadata as they have
613 * constant tagnames throughout.
[22300]614 * @param metafields is a | separated string containing the metadatafields to
615 * extract or "all" if all fields are requested.
[15222]616 */
[22300]617 protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
618 String metaDatastream, String metadataSet, String metafields)
[15222]619 throws SAXException, IOException
620 {
621 // Namespace prefix can be "ex:" or "dls:"
622 String namespacePrefix = "";
623 if(!metadataSet.equals(EX)) {
624 // need to have a period for Greenstone instead of Fedora's colon
625 namespacePrefix = metadataSet.toLowerCase() + PERIOD;
626 }
627
628 Document src = builder.parse(
629 new InputSource(new StringReader(metaDatastream)));
630 NodeList metaTags = src.getElementsByTagName(
631 metadataSet.toLowerCase()+COLON+METADATA);
632 // Looking for tagnames: <ex:metadata> or <dls:metadata>
633
634 for(int i = 0; i < metaTags.getLength(); i++) {
635 Element metatag = (Element)metaTags.item(i);
636
637 // extract the metadata of (name, value) pairs from src DOM
638 // look for <metadata name="name">value</metadata>
639 String name = metatag.hasAttribute(NAME) ?
640 metatag.getAttribute(NAME) : "";
641 // sometimes, there are several metadata for the same name, in this
642 // case, look for a qualifier and append its value to the name to
643 // distinguish it uniquely:
644 if(metatag.hasAttribute(QUALIFIER)) {
645 name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
646 }
[22300]647 name = namespacePrefix + name; // prefix with namespace, if any
648 if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
649 String value = FedoraCommons.getValue(metatag);
650
651 // create metadata of (name, value) pairs in target DOM (doc)
652 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
653 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
654 attribute.setValue(name);
655 metadata.setAttributeNode(attribute);
656 Text content = doc.createTextNode(value);
657 metadata.appendChild(content);
658
659 metadataList.appendChild(metadata);
660 }
[15222]661 }
662 }
663
664 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
665 * response message containing ONLY the Title metadata for the document.
666 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
667 * of an item (document) in the fedora repository, or it can be
668 * "&lt;pid&gt;-sectionNumber".
669 * @return a GS3 DocumentMetadataRetrieve response message containing the
670 * Title metadata for the requested document */
671 public String getTitleMetadata(String docID) {
672 return getTitleMetadata(new String[] { docID });
673 }
674
675 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
676 * response message containing ONLY the Title metadata for the documents.
677 * @param docIDs is a list of document identifiers (where docID can either be
678 * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
679 * "&lt;pid&gt;-sectionNumber".
680 * @return a GS3 DocumentMetadataRetrieve response message containing the
681 * Title metadata for all the requested documents */
682 public String getTitleMetadata(String[] docIDs) {
683 // Must create message of the following form:
684 // <documentNodeList><documentNode nodeID="docID">
685 // <metadataList><metadata name="Title">sometitle</metadata>
686 // </metadataList></documentNode>
687
688 Document doc = builder.newDocument();
689 FedoraGS3RunException ex = null;
690
691 Element docNodeList = doc.createElement(
692 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
693 try{
694 for(int i = 0; i < docIDs.length; i++) {
695 Element docNode = getTitleMetadata(doc, docIDs[i]);
696 docNodeList.appendChild(docNode);
697 }
698 }catch(Exception e) {
699 ex = new FedoraGS3RunException(e);
[21573]700 //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
[15222]701 ex.setSpecifics("EX metadata datastream");
702 }
703
704 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
705 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
706 try{
[22300]707 return FedoraCommons.elementToString(responseMsg);
[15222]708 } catch(TransformerException e) {
709 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
710 + " " + e;
711 }
712 }
713
714 /** Method that takes a new DOM document, as well as an identifier of either
715 * a document or document section and returns a documentNode element containing
716 * the title metadata for it:
717 * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
718 * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
719 * &lt;/metadataList&gt;&lt;/documentNode&gt;
720 * @return documentNode containing the metadata for the collection or
721 * document given by parameter ID
722 * @param docID denotes the id of a document or a document section, so id
723 * is either a document-pid or it's of the form documentpid-sectionNumber */
724 protected Element getTitleMetadata(Document doc, String docID)
725 throws RemoteException, UnsupportedEncodingException,
726 SAXException, IOException
727 {
728 // Returns a docNode element of the following form:
729 // <documentNode nodeID="docID">
730 // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
731 // </documentNode>
732
733 // <documentNode nodeID="docID">
734 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
735 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
736 attribute.setValue(docID);
737 docNode.setAttributeNode(attribute);
738
739 // <metadataList>
740 Element metaList = doc.createElement(
741 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
742 // <metadata name="Title">
743 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
744 // if we connect it all up (append children), we can immediately add
745 // the name attribute into the metadata element:
746 metaList.appendChild(metadata);
747 docNode.appendChild(metaList);
748 metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
749
750 String title = "";
751 String sectionID = getSectionIDFromDocID(docID);
752 String docPID = getDocPIDFromDocID(docID);
753
754 // check if title of toplevel document is requested
755 if(sectionID.equals(""))
756 title = this.getDocTitle(docPID);
757 else { // title of document section
758 title = this.getSectionTitle(docPID, sectionID);
759 }
760
761 metadata.appendChild(doc.createTextNode(title));
762
763 return docNode;
764 }
765
[22300]766 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
767 * containing the requested portion of the document structure of the documents
768 * indicated by docIDs:
769 * @param docID is the document identifier of the document whose hierarchical
770 * structure is requested. The name of the collection is already included in the
771 * docID for a Fedora DL.
772 * @param structure - strings specifying the required structure of the document.
773 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
774 * @param info - strings specifying the required structural info of the document.
775 * It can be any combination of: siblingPosition, numSiblings, numChildren.
776 */
777 public String getDocumentStructure(String docID, String[] structure, String[] info) {
778 return getStructure(new String[]{docID}, structure, info);
779 }
780
781
782 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
783 * containing the requested portion of the document structure of the documents
784 * indicated by docIDs:
785 * @param docIDs is an array of document identifiers of documents whose
786 * hierarchical structures are requested. The name of the collection is already
787 * included in the docID for a Fedora DL.
788 * @param structure - strings specifying the required structure of each document.
789 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
790 * @param info - strings specifying the required structural info of each document.
791 * It can be any combination of: siblingPosition, numSiblings, numChildren.
792 */
793 public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
794 return getStructure(docIDs, structure, info);
795 }
796
797 /**
[15222]798 * Returns a greenstone3 DocumentStructureRetrieve XML response message
799 * containing the document structures for the given docIDs.
800 * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
801 * greenstone formatted XML is returned. The requested section of the table
802 * of contents (TOC) for a document is converted into the greenstone3 xml
803 * format that is returned upon DocumentStructureRetrieve requests.
804 * @param docIDs the documentIDs for which the section's structure is returned;
805 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
[22300]806 * @param structure - the structure of the sections to return. Can be any combination of:
807 * ancestors, parent, siblings, children, descendants, entire.
808 * @param infos - strings containing any combination of the values: numChildren, numSiblings,
809 * siblingPosition. The requested info gets added as attributes to the returned root element.
[15222]810 * @return a greenstone3 DocumentStructureRetrieve XML response message in
811 * String format with the structure of the docIDs requested.
812 */
[22300]813 protected String getStructure(String[] docIDs, String[] structure, String[] infos)
[15222]814 {
815 Document doc = builder.newDocument();
816 FedoraGS3RunException ex = null;
817 // <documentNodeList>
818 Element docNodeList = doc.createElement(
819 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
820
821 try{
822 // append the <documentNodes> for the docIDs
823 // to the docNodeList
[22300]824 //getStructureElement(docNodeList, docIDs, levels);
825 getStructureElement(docNodeList, docIDs, structure, infos);
[15222]826 } catch(Exception e) {
827 ex = new FedoraGS3RunException(e);
828 ex.setSpecifics("(requested portion of) TOC datastream");
829 }
830 // insert our <documentNodeList> into a GS3 response message
831 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
832 GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
833 try{
[22300]834 return FedoraCommons.elementToString(responseMsg);
[15222]835 } catch(TransformerException e) {
836 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
837 + " " + e;
838 }
839 }
[22300]840
841
842 /** Given a &lt;documentNodeList&gt; portion of a greenstone3
[15222]843 * DocumentStructureRetrieve XML response message, this method will populate
844 * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
845 * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
846 * the doc structures are appended.
847 * @param docIDs the documentIDs for which the section's structure is returned;
848 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
[22300]849 * @param structures - the structure of the sections to return. Can be any combination of:
850 * ancestors, parent, siblings, children, descendants, entire.
851 * @param infos - a string containing any combination of the values: numChildren, numSiblings,
852 * siblingPosition. The requested info gets added as attributes to the returned root element.
[15222]853 */
[22300]854 protected void getStructureElement(Element docNodeList, String[] docIDs,
855 String[] structures, String[] infos)
[15222]856 throws RemoteException, UnsupportedEncodingException, SAXException,
857 IOException
858 {
[22300]859 // Make one string out of requested structure components, and one string from info components
860 String structure = "";
861 String info = "";
862 for(int i = 0; i < structures.length; i++) {
863 structure = structure + structures[i] + "|";
864 }
865 for(int i = 0; i < infos.length; i++) {
866 info = info + infos[i] + "|";
867 }
868
869 // process each docID
870 for(int i = 0; i < docIDs.length; i++) {
871 // work out the document's fedora PID and section ID
872 String sectionID = getSectionIDFromDocID(docIDs[i]);
873 String docPID = getDocPIDFromDocID(docIDs[i]);
874 if(sectionID.equals("")) {
875 sectionID = "1";
[15222]876 }
[22300]877
878 // get the required section, along with children or descendants
879 Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
880 Document doc = docNodeList.getOwnerDocument();
881
882 // copy-and-convert that structure into a structure format for GS3
883 Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
884
885 if(!info.equals("")) {
886 // <nodeStructureInfo>
887 // <info name="" value="" />
888 // <info name="" value="" />
889 // ...
890 // </nodeStructureInfo>
[26270]891 Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
[22300]892 Element root = srcDocElement.getOwnerDocument().getDocumentElement();
893
[26270]894 if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
895 String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
[22300]896 Element infoEl = doc.createElement(GSXML.INFO_ATT);
[26270]897 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
[22300]898 infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
899 nodeStructureInfo.appendChild(infoEl);
900 }
901
[26270]902 if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
903 String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
[22300]904 Element infoEl = doc.createElement(GSXML.INFO_ATT);
[26270]905 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
[22300]906 infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
907 nodeStructureInfo.appendChild(infoEl);
908 }
909
[26270]910 if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
911 String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
[22300]912 Element infoEl = doc.createElement(GSXML.INFO_ATT);
[26270]913 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
[22300]914 infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
915 nodeStructureInfo.appendChild(infoEl);
916 }
[26270]917
918 if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
919 String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
920 Element infoEl = doc.createElement(GSXML.INFO_ATT);
921 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
922 infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
923 nodeStructureInfo.appendChild(infoEl);
924 }
925
[22300]926 docNode.appendChild(nodeStructureInfo);
927 }
928
929 // add it to our list of documentNodes
930 docNodeList.appendChild(docNode);
931 }
[15222]932 }
[22300]933
[15222]934
935 /**
936 * Takes the portion of the XML document outlining the structure of the
937 * document (section)--in the format this is stored in Fedora--and returns
938 * Greenstone 3 DOM XML format for outlining document structure.
939 * @return a &lt;documentNode&gt; element that contains a greenstone3
940 * DocumentStructureRetrieve XML corresponding to the parameter Element section
941 * (which is in fedora XML), for the document indicated by docID.
942 * @param requestingDocID is the identifier of the document for which the
943 * structure was requested. It's this document's children or descendants that
944 * will be returned. Note that this is not always the same as (clear from)
945 * parameter docID.
946 * @param docID is the documentID for which the section's structure is
947 * returned where docID = "docPID-sectionNumber".
948 * @param section - the fedora section XML that is being mirrored in
949 * greenstone3 format.
950 */
951 protected Element getStructure(Document doc, String requestingDocID,
952 String docID, Element section)
953 {
954 // we want to mirror the section's DOM (given in fedora XML) in
955 // greenstone3's XML for a DocumentStructureRetrieve response.
956
957 // <documentNode nodeID="docID"> - the docNode on which a structure retrieve
958 // is being performed
959 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
960 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
961 attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
[22300]962 docNode.setAttributeNode(attribute);
[15222]963
964 // <nodeStructure>
965 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
966
967 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
968 Element rootNode = createDocNodeFromSubsection(doc, section, docID);
969
970 // fills in the subtree of the rootNode in our nodeStructure element
971 createDocStructure(doc, section, rootNode, docID);
972 //where section represents the root section
973
974 nodeStructure.appendChild(rootNode);
975 docNode.appendChild(nodeStructure);
976 return docNode;
977 }
[22300]978
979
[15222]980 /** Recursive method that creates a documentStructure mirroring parameter
981 * section, starting from parameter parent down to all descendants
982 * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
983 * for the docPID whose substructure is to be mirrored
984 * @param parent is the XML documentNode in the greenstone repository whose
985 * descendants created by this method will correspond to the descendants of
986 * parameter section.
987 * @param doc is the document containing the parent;
988 * @param docPID is the prefix of all nodeIDs in the parent's structure
989 */
990 protected void createDocStructure(
991 Document doc, Element section, Element parent, String docPID)
992 {
993 // get the section's children (if any)
994 NodeList children = section.getChildNodes();
995 for(int i = 0; i < children.getLength(); i++) {
996 Node n = children.item(i);
997
998 if(n.getNodeName().equals(SECTION_ELEMENT)) {
999 //then we know it's an element AND that its tagname is "Section"
1000 Element subsection = (Element)n;
1001 Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1002 parent.appendChild(child);
1003
1004 // recursion call on newly found child-element and subsection
1005 createDocStructure(doc, subsection, child, docPID);
1006 }
1007 }
1008 }
1009
1010 /** Given a particular subsection element, this method creates a
1011 * Greenstone3 DocumentNode element that mirrors it.
1012 * @param doc is the document that will contain the created DocumentNode
1013 * @param docID is the prefix of all nodeIDs in the parent's structure
1014 * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1015 * TOC for the docPID which will be mirrored in the greenstone XML
1016 * documentNode that will be returned.
1017 * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1018 * &lt;Section&gt; element passed as parameter subSection. */
1019 protected Element createDocNodeFromSubsection(
1020 Document doc, Element subSection, String docID)
1021 {
1022 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1023 Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1024 docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1025 docNode.setAttributeNode(docType);
1026
1027 Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1028 String sectionID = subSection.hasAttribute(ID) ?
1029 subSection.getAttribute(ID) : "";
[22300]1030 if(sectionID.equals("1")
1031 && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
[22302]1032 // reset the attribute without the section number (just "docID" may be important for democlient?)
1033 nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
[22300]1034 } else {
1035 nodeID.setValue(docID + HYPHEN + sectionID);
1036 }
1037 //nodeID.setValue(docID + HYPHEN + sectionID);
[15222]1038 docNode.setAttributeNode(nodeID);
1039
1040 Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
[22300]1041 if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1042 nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1043 }
[15222]1044 docNode.setAttributeNode(nodeType);
1045 return docNode;
1046 }
1047
1048
1049 /** Given an identifier that is either a docPID or a concatenation of
1050 * docPID+sectionID, this method works out the fedora assigned docPID and
1051 * sectionID and then calls getContentBody(docPID, sectionID) with those.
1052 * @param docID is expected to be of the form
1053 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1054 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1055 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1056 * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1057 * is returned! */
1058 public String getContent(String docID) {
1059 return this.getContent(new String[]{docID});
1060 }
1061
1062 /** Given an identifier that is a concatenation of docID+sectionID, this
1063 * method works out the fedora assigned docPID and sectionID and then calls
1064 * getContentBody(docPID, sectionID) with those.
1065 * @param docIDs is an array of document identifiers of the form
1066 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1067 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1068 * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1069 public String getContent(String[] docIDs) {
1070 Document doc = builder.newDocument();
1071 FedoraGS3RunException ex = null;
1072
1073 //<documentNodeList>
1074 Element docNodeList = doc.createElement(
1075 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1076
1077 try{
1078 for(int i = 0; i < docIDs.length; i++) {
1079 // get the sectionID and docPID from the docID
1080 String sectionID = this.removePrefix(
1081 getSectionIDFromDocID(docIDs[i]), SECTION);
1082 String docPID = getDocPIDFromDocID(docIDs[i]);
1083 if(sectionID.equals("")) // if no section is specified, get
1084 sectionID = "1"; // get the content for Section id="1"
1085
1086 // Get the contents for the requested section of document docPID
1087 String sectionContent = this.getContentBody(docPID, sectionID);
1088
1089 // set the nodeID attribute
1090 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1091 Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1092
1093 nodeId.setValue(docIDs[i]); // just set the docID which will contain
1094 // the docPID (and sectionID if already present)
1095
1096 docNode.setAttributeNode(nodeId);
1097 // set the text content to what was retrieved
1098 Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[21775]1099 Text textNode = doc.createTextNode(sectionContent.trim());
[15222]1100
1101 nodeContent.appendChild(textNode);
1102 docNode.appendChild(nodeContent);
1103 //add the documentNode to the docNodeList
1104 docNodeList.appendChild(docNode);
1105 }
1106 } catch(Exception e) {
1107 ex = new FedoraGS3RunException(e);
1108 ex.setSpecifics("requested doc Section datastream");
1109 }
1110 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1111 GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1112 try{
[22300]1113 return FedoraCommons.elementToString(responseMsg);
[15222]1114 } catch(TransformerException e) {
1115 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1116 + " " + e;
1117 }
1118 }
1119
1120 /** Gets the contents of a textNode from a section.
1121 * @return the text content of a section.
1122 * @param docPID the pid of the document from which a section's text is to
1123 * be retrieved.
1124 * @param sectionID is the section identifier of the document denoted by
1125 * docPID whose text is to be returned.
1126 */
1127 protected String getContentBody(String docPID, String sectionID)
1128 throws RemoteException, UnsupportedEncodingException,
1129 SAXException, IOException
1130 {
1131 String section = this.getSection(docPID, sectionID);
1132
1133 // the content is nested inside a <Section> element,
1134 // we extract it from there:
1135 InputSource source = new InputSource(new StringReader(section));
1136 Document doc = builder.parse(source);
1137
1138 // The document Element is the <Section> we want.
1139 // Get its text contents:
1140 section = FedoraCommons.getValue(doc.getDocumentElement());
1141
1142 // we are going to remove all occurrences of "_httpdocimg_/"
1143 // that precede associated filenames, because that's a GS3
1144 // defined macro for resolving relative urls. It won't help
1145 // with documents stored in fedora.
1146 section = section.replaceAll(GS3FilePathMacro+"/", "");
1147 return section;
1148 }
1149
1150 /** Here we create the greenstone's response message element:
1151 * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1152 * @return a greenstone response-message element.
1153 * @param doc - the Document object which should me used to create the
1154 * &lt;message&gt; and &lt;response&gt; elements
1155 * @param content - the element that is to be nested inside &lt;response&gt;
1156 * @param ex - any exception that occurred when trying to create
1157 * the content parameter
1158 * @param responseType - the value for the type attribute of &lt;response&gt;,
1159 * such as "describe", "retrieve", "browse", "query"...
1160 * @param originator - indiates the collectionName or service (like
1161 * DocumentContentRetrieve) from where this response message originates
1162 */
1163 protected Element createResponseMessage(Document doc, Element content,
1164 Exception ex, String responseType, String originator)
1165 {
1166 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1167 // from = "FedoraGS3"
[21924]1168 Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1169 attribute.setValue(originator);
[15222]1170 response.setAttributeNode(attribute);
1171
1172 // type = "describe" or "process" - whatever's given in requestType:
1173 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1174 attribute.setValue(responseType);
1175 response.setAttributeNode(attribute);
1176
1177 if(content != null)
1178 response.appendChild(content);
1179
1180 // we'll create an error element for RemoteExceptions (web service problems)
1181 // and UnsupportedEncodingExceptions and
1182 if(ex != null) {
1183 Element error = doc.createElement(GSXML.ERROR_ELEM);
1184 error.appendChild(doc.createTextNode(ex.getMessage()));
1185 // now append the error to the <response> element (after
1186 // the content element whatever that was)
1187 response.appendChild(error);
1188 }
1189
1190 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1191 message.appendChild(response);
1192 doc.appendChild(message);
1193 return message;
1194 }
1195
1196 /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1197 * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1198 * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1199 * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1200 * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1201 * @param doc - the Document object which should me used to create the
1202 * &lt;serviceList&gt; element */
1203 protected Element createServiceList(Document doc)
1204 {
1205 Element serviceList = doc.createElement(
1206 GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1207
1208 for(int i = 0; i < serviceNames.length; i++) {
1209 // create the <service name="serviceName[i]" type="servicetype" />
1210 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1211
1212 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1213 attribute.setValue(serviceNames[i]);
1214 service.setAttributeNode(attribute);
1215
1216 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1217 if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1218 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1219 else if(serviceNames[i].contains("Query")) // search services
1220 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1221 else
1222 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1223 service.setAttributeNode(attribute);
1224
1225 // add the service element to the serviceList element
1226 // <serviceList><service /></serviceList>
1227 serviceList.appendChild(service);
1228 }
1229 return serviceList;
1230 }
1231
1232 /** @return a GS3 response message for a describe services request:
1233 * indicating the list of services supported by the Fedora-Greenstone
1234 * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1235 * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1236 * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1237 * serviceNames. */
1238 public String getServiceList()
1239 {
1240 Document doc = builder.newDocument();
1241 Element serviceList = createServiceList(doc);
1242 // make <serviceList> the body of the responseMessage:
1243 // <message><response><serviceList></response></message>
1244 Element responseMsg = createResponseMessage(doc, serviceList, null,
1245 GSXML.REQUEST_TYPE_DESCRIBE, "");
1246 try {
[22300]1247 return FedoraCommons.elementToString(responseMsg);
[15222]1248 }catch(TransformerException e) {
1249 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1250 + " " + e;
1251 }
1252 }
1253
1254 /** @return a GS3 describe response message listing the collections and
1255 * collection-specific metadata stored in the Fedora-Greenstone repository. */
1256 public String getCollectionList()
1257 {
1258 Document doc = builder.newDocument();
1259 FedoraGS3RunException ex = null; // any RemoteException
1260
1261 // create the <collectionList /> element
1262 Element collectionList = doc.createElement(
1263 GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1264 try{
1265 String[] collectionNames = this.getCollectionNames(
1266 this.getCollections()); // this line could throw RemoteException
1267 for(int i = 0; i < collectionNames.length; i++) {
1268 // create the <collection name="somename" /> element
1269 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1270 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1271 attribute.setValue(collectionNames[i]);
1272 collection.setAttributeNode(attribute);
1273
1274 // append the <collection> element as child of <collectionList>
1275 collectionList.appendChild(collection);
1276
1277 //if(collection.hasAttribute(GSXML.NAME_ATT))
1278 //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1279 }
1280 } catch(RemoteException e) { // if this happens, perhaps it's because it
1281 // can't find Greenstone collections in fedora repository?
1282 ex = new FedoraGS3RunException(e);
1283 ex.setSpecifics(
1284 "greenstone collections in fedora repository");
1285 }
1286
1287 // make <collectionList> the body of the responseMessage:
1288 // <message><response><collectionList></response></message>
1289 Element responseMsg = createResponseMessage(doc, collectionList, ex,
1290 GSXML.REQUEST_TYPE_DESCRIBE, "");
1291 try{
[22300]1292 return FedoraCommons.elementToString(responseMsg);
[15222]1293 }catch(TransformerException e) {
1294 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1295 + " " + e;
1296 }
1297 }
1298
1299 /** @return a GS3 describe response message for a collection in the
1300 * Fedora-Greenstone repository.
1301 * @param collectionName - the name of the collection that is to be described.
1302 * It will be converted to a fedora collection pid, which is of the form
1303 * "greenstone:&lt;collectionName&gt;-collection". */
1304 public String describeCollection(String collectionName)
1305 {
1306 Document doc = builder.newDocument();
1307 FedoraGS3RunException ex = null;
1308
1309 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1310 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1311 attribute.setValue(collectionName);
1312 collection.setAttributeNode(attribute);
1313
1314 //<displayItem assigned="true" lang="en" name="name">
1315 //"some display name"</displayItem>
1316 Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1317
1318 attribute = doc.createAttribute(GSXML.LANG_ATT);
1319 attribute.setValue(this.lang);
1320 displayItem.setAttributeNode(attribute);
1321
1322 attribute = doc.createAttribute(GSXML.NAME_ATT);
1323 attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1324 displayItem.setAttributeNode(attribute);
1325
1326 try{
1327 Text textNode = doc.createTextNode(
1328 this.getCollectionTitle(getCollectionPID(collectionName)));
1329 displayItem.appendChild(textNode);
1330 } catch(Exception e) {
1331 // can't find Greenstone collections in fedora repository or problem
1332 // getting their titles from their metadata datastream?
1333 ex = new FedoraGS3RunException(e);
1334 ex.setSpecifics("greenstone collections or their metadata"
1335 + "in the fedora repository");
1336 }
1337 // now append the displayItem element as child of the collection element
1338 collection.appendChild(displayItem);
1339 // get the <serviceList> and add it into the collection description.
1340 // Services for all collections in the FedoraGS3 repository are the
1341 // same, offering a ClassifierBrowse to browse titles by starting letter
1342 // and DocRetrieve services: Content, Metadata and Structure.
1343
1344 Element serviceList = createServiceList(doc);
1345 collection.appendChild(serviceList);
1346
1347 Element responseMsg = createResponseMessage(doc, collection, ex,
1348 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1349 try{
[22300]1350 return FedoraCommons.elementToString(responseMsg);
[15222]1351 }catch(TransformerException e) {
1352 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1353 + " " + e;
1354 }
1355 }
1356
1357 /** @return a GS3 describe response message for the services of a collection
1358 * in the Fedora-Greenstone repository. So far, these services are the same for
1359 * all fedora collections: they are the services given in member variable
1360 * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1361 * ClassifierBrowseMetadataRetrieve.
[21835]1362 * All collections in this Digital Library (Fedora Repository) share the
1363 * same services, so this method returns the same services as getServiceList();
[15222]1364 * @param collectionName - the name of the collection whose services are to
1365 * be described. It will be converted to a fedora collection pid, which is of
1366 * the form "greenstone:&lt;collectionName&gt;-collection". */
1367 public String describeCollectionServices(String collectionName)
1368 {
1369 Document doc = builder.newDocument();
1370
1371 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1372 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1373 attribute.setValue(collectionName);
1374 collection.setAttributeNode(attribute);
1375
1376 Element serviceList = createServiceList(doc);
1377 collection.appendChild(serviceList);
1378
1379 Element responseMsg = createResponseMessage(doc, collection, null,
1380 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1381 try{
[22300]1382 return FedoraCommons.elementToString(responseMsg);
[15222]1383 }catch(TransformerException e) {
1384 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1385 + " " + e;
1386 }
1387 }
1388
1389 /** All collections in this Digital Library (Fedora Repository) share
1390 * the same services, so this method returns the same as
1391 * describeCollectionService(collName, serviceName).
1392 * @return a GS3 describe response message for the requested service
1393 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1394 * return nothing special except their names; browse (and any query)
1395 * return more complex XML responses.
1396 * @param serviceName - the name of the service in the collection which is to
1397 * be described.*/
1398 public String describeService(String serviceName)
1399 {
1400 // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1401 // we return:
1402 // <message><response from="<name>Retrieve" type="describe">
1403 // <service name="<name>Retrieve" type="retrieve" /></response></message>
1404 // But for browse (and any query) service, we return the data necessary
1405 // for displaying it
1406
1407 Document doc = this.builder.newDocument();
1408 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1409 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1410 attribute.setValue(serviceName);
1411 service.setAttributeNode(attribute);
1412
1413 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1414
[21775]1415 if(serviceName.toLowerCase().endsWith("retrieve")) {
[15222]1416 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
[21775]1417 }
[15222]1418 else if(serviceName.toLowerCase().contains("browse")) {
1419 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1420
1421 // we need name and description <displayItem> elements
1422 Element displayItem
1423 = createNameValuePairElement(doc,
1424 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1425 service.appendChild(displayItem);
1426
1427 displayItem = createNameValuePairElement(doc,
1428 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1429 "Browse pre-defined classification hierarchies");
1430 service.appendChild(displayItem);
1431
1432 // now need a classifierList
1433 Element classifierList = doc.createElement(
1434 GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1435
1436 int classifierNum = 1;
1437 // append a <classifier content="some letter" name="CL#">
1438 // for each letter of the alphabet:
1439 Element classifier = createClassifierElement(doc, "TitleByLetter",
[15670]1440 classifierNum++, "titles by letter", "Browse titles by letter");
[15222]1441 // now add this <classifier> to the <classifierList>
1442 classifierList.appendChild(classifier);
1443
1444 // ANY MORE CLASSIFIERS? ADD THEM HERE
1445
1446 service.appendChild(classifierList);
1447 } // ELSE check for whether it is a query service
1448 else if(serviceName.toLowerCase().contains("query")) {
1449 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
[21775]1450 if(serviceName.equals("TextQuery")) {
[15222]1451 describeTextQueryService(service);
[21775]1452 } else if(serviceName.equals("FieldQuery")) {
[15222]1453 describeFieldQueryService(service);
[21775]1454 }
[15222]1455 }
1456
1457 // don't forget to add the type attribute to the service!
1458 service.setAttributeNode(attribute);
1459
1460 String from = serviceName;
1461
1462 Element responseMsg = createResponseMessage(doc, service, null,
1463 GSXML.REQUEST_TYPE_DESCRIBE, from);
1464 try{
[22300]1465 return FedoraCommons.elementToString(responseMsg);
[15222]1466 }catch(TransformerException e) {
1467 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1468 + " " + e;
1469 }
1470 }
1471
1472 /** Appends children to the parameter service Element that make the
1473 * final service Element into a describe response XML for FedoraGS3's
1474 * TextQuery service.
1475 * @param service is the service Element that is being filled out. */
1476 protected void describeTextQueryService(Element service) {
1477 Document doc = service.getOwnerDocument();
1478 // we need name, submit (button) and description <displayItem> elements
1479 Element displayItem = createNameValuePairElement(doc,
1480 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1481 "Text Search");
1482 service.appendChild(displayItem);
1483
1484 displayItem = createNameValuePairElement(doc,
1485 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1486 service.appendChild(displayItem);
1487
1488 displayItem = createNameValuePairElement(doc,
1489 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1490 "Title and full-text search service");
1491 service.appendChild(displayItem);
1492
1493 //create the <paramList>
1494 Element paramList = doc.createElement(
1495 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1496
1497 // we ignore granularity to search at: it will always be
1498 // document and section level
1499 // we ignore casefolding: always on (that is, case is irrelevant)
1500 // we ignore document display order: always ranked
1501
1502 // Constructing the following:
1503 // <param default="100" name="maxDocs" type="integer">
1504 // <displayItem name="name">Maximum hits to return</displayItem>
1505 // </param>
1506 Element param = doc.createElement(GSXML.PARAM_ELEM);
1507
1508 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1509 attribute.setValue(MAXDOCS);
1510 param.setAttributeNode(attribute);
1511
1512 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1513 attribute.setValue("100");
1514 param.setAttributeNode(attribute);
1515
1516 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1517 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1518 param.setAttributeNode(attribute);
1519
1520 displayItem = createNameValuePairElement(doc,
1521 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1522 "Maximum hits to return");
1523 param.appendChild(displayItem);
1524
1525 paramList.appendChild(param);
1526
1527 // Constructing the following:
1528 // <param name="query" type="string">
1529 // <displayItem name="name">Query string</displayItem>
1530 // </param>
1531 param = doc.createElement(GSXML.PARAM_ELEM);
1532
1533 attribute = doc.createAttribute(GSXML.NAME_ATT);
1534 attribute.setValue(QUERY);
1535 param.setAttributeNode(attribute);
1536
1537 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1538 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1539 param.setAttributeNode(attribute);
1540
1541 displayItem = createNameValuePairElement(doc,
1542 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1543 "Query string");
1544 param.appendChild(displayItem);
1545
1546 paramList.appendChild(param);
1547
1548 service.appendChild(paramList);
1549 }
1550
1551 /** Appends children to the parameter service Element that make the
1552 * final service Element into a describe response XML for FedoraGS3's
1553 * FieldQuery service.
1554 * @param service is the service Element that is being filled out. */
1555 protected void describeFieldQueryService(Element service) {
1556 Document doc = service.getOwnerDocument();
1557 // we need name, submit (button) and description <displayItem> elements
1558 Element displayItem = createNameValuePairElement(doc,
1559 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1560 "Form Search");
1561 service.appendChild(displayItem);
1562
1563 displayItem = createNameValuePairElement(doc,
1564 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1565 service.appendChild(displayItem);
1566
1567 displayItem = createNameValuePairElement(doc,
1568 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1569 "Simple fielded search");
1570 service.appendChild(displayItem);
1571
1572 //create the <paramList>
1573 Element paramList = doc.createElement(
1574 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1575
1576 // we ignore granularity to search at: it will always be
1577 // document and section level
1578 // we ignore casefolding: always on (that is, case is irrelevant)
1579 // we ignore document display order: always ranked
1580
1581 // Constructing the following:
1582 // <param default="100" name="maxDocs" type="integer">
1583 // <displayItem name="name">Maximum hits to return</displayItem>
1584 // </param>
1585 Element param = doc.createElement(GSXML.PARAM_ELEM);
1586
1587 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1588 attribute.setValue(MAXDOCS);
1589 param.setAttributeNode(attribute);
1590
1591 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1592 attribute.setValue("100");
1593 param.setAttributeNode(attribute);
1594
1595 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1596 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1597 param.setAttributeNode(attribute);
1598
1599 displayItem = createNameValuePairElement(doc,
1600 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1601 "Maximum hits to return");
1602 param.appendChild(displayItem);
1603
1604 paramList.appendChild(param);
1605
1606 // Constructing the following:
1607 // <param name="simpleField" occurs="4" type="multi">
1608 // <displayItem name="name"></displayItem>
1609 //
1610 // <param name="query" type="string">
1611 // <displayItem name="name">Word or phrase </displayItem>
1612 // </param>
1613 //
1614 // <param default="allFields" name="fieldname" type="enum_single">
1615 // <displayItem name="name">in field</displayItem>
1616 //
1617 // <option name="docTitles">
1618 // <displayItem name="name">document titles</displayItem>
1619 // </option>
1620 // <option name="allTitles">
1621 // <displayItem name="name">document and section titles</displayItem>
1622 // </option>
1623 // <option name="fullText">
1624 // <displayItem name="name">full text</displayItem>
1625 // </option>
1626 // <option name="all">
1627 // <displayItem name="name">titles and full text</displayItem>
1628 // </option>
1629 // <option name="">
1630 // <displayItem name="name"></displayItem>
1631 // </option>
1632 // </param>
1633 // </param>
1634 Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1635 attribute = doc.createAttribute(GSXML.NAME_ATT);
1636 attribute.setValue(SIMPLEFIELD_ATT);
1637 rowOfParams.setAttributeNode(attribute);
1638
1639 // we want the row of controls to occur multiple times
1640 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1641 attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1642 rowOfParams.setAttributeNode(attribute);
1643
1644 attribute = doc.createAttribute(OCCURS_ATT);
1645 attribute.setValue("4"); // we want this row to occur 4 times
1646 rowOfParams.setAttributeNode(attribute);
1647
1648 // <param name="query" type="string">
1649 // <displayItem name="name">Word or phrase </displayItem>
1650 // </param>
1651 param = doc.createElement(GSXML.PARAM_ELEM);
1652
1653 attribute = doc.createAttribute(GSXML.NAME_ATT);
1654 attribute.setValue(QUERY);
1655 param.setAttributeNode(attribute);
1656
1657 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1658 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1659 param.setAttributeNode(attribute);
1660
1661 displayItem = createNameValuePairElement(doc,
1662 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1663 "Word or phrase");
1664 param.appendChild(displayItem);
1665 rowOfParams.appendChild(param);
1666
1667 // <param default="allFields" name="fieldName" type="enum_single">
1668 // <displayItem name="name">in field</displayItem>
1669 param = doc.createElement(GSXML.PARAM_ELEM);
1670 attribute = doc.createAttribute(GSXML.NAME_ATT);
1671 attribute.setValue(FIELDNAME_ATT);
1672 param.setAttributeNode(attribute);
1673
1674 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1675 attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1676 param.setAttributeNode(attribute);
1677
1678 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1679 attribute.setValue(ALL_FIELDS);
1680 param.setAttributeNode(attribute);
1681
1682 displayItem = createNameValuePairElement(doc,
1683 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1684 "in field");
1685 param.appendChild(displayItem);
1686
1687 String[] searchFieldNames
1688 = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1689 String[] searchFieldDisplay = {"all titles and full-text",
1690 "document titles only", "document and section titles",
1691 "full-text only"};
1692
1693 // for each fieldName create an option element and insert
1694 // the option into the enum_multi drop-down param:
1695 // <option name="fieldName">
1696 // <displayItem name="name">fieldName</displayItem>
1697 // </option>
1698 for(int i = 0; i < searchFieldNames.length; i++) {
1699 Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1700 attribute = doc.createAttribute(GSXML.NAME_ATT);
1701 attribute.setValue(searchFieldNames[i]);
1702 option.setAttributeNode(attribute);
1703
1704 displayItem = createNameValuePairElement(doc,
1705 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1706 searchFieldDisplay[i]);
1707 option.appendChild(displayItem);
1708 param.appendChild(option); // add option to the drop-down box
1709 }
1710
1711 rowOfParams.appendChild(param);
1712 paramList.appendChild(rowOfParams);
1713 service.appendChild(paramList);
1714 }
1715
1716 /**
1717 * @return a GS3 describe response message for the requested service
1718 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1719 * return nothing special except their names; browse (and any query)
1720 * return more complex XML responses.
1721 * All collections in this Digital Library (Fedora Repository) share
1722 * the same services, so this method returns the same as
1723 * describeService(serviceName).
1724 * @param collectionName - the name of the collection whose service is to
1725 * be described. It will be converted to a fedora collection pid, which is of
1726 * the form "greenstone:&lt;collectionName&gt;-collection".
1727 * @param serviceName - the name of the service in the collection which is to
1728 * be described. */
1729 public String describeCollectionService(String collectionName,
1730 String serviceName) {
1731 // collectionName can be ignored, because all services are FedoraGS3
1732 // services and are not unique to any particular (greenstone) collection.
1733 return describeService(serviceName);
1734 }
1735
1736 /** This method performs the implemented browse operation: allowing the
1737 * user to browse the titles of documents in the given collection by letter
1738 * and returning the results.
[22300]1739 * @param collectionName is the name of the collection whose documents
1740 * starting with the given letter will be returned.
[21835]1741 * @param classifierIDs are the ids of the classifiers on which to browse. In
[15222]1742 * this case, the classifier indicates whether we browse titles by letter, or
1743 * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
[22300]1744 * @param structures - the requested browse substructure. Can be any combination
1745 * of ancestors, parent, siblings, children, descendants.
1746 * @param infos - the requested structural info. Can be numSiblings,
1747 * siblingPosition, numChildren.
1748 * @return a GS3 ClassifierBrowse response message which lists all
[15222]1749 * the documents that start with the letter indicated by parameter classifier.
1750 */
[22300]1751 public String browse(String collectionName, String[] classifierIDs,
1752 String[] structures, String[] infos)
[15222]1753 {
[22300]1754 // Construct one string from the structures and structural info arrays
1755 String structure = "";
1756 String info = "";
1757 for(int i = 0; i < structures.length; i++) {
1758 structure = structure + structures[i] + "|";
1759 }
1760 for(int i = 0; i < infos.length; i++) {
1761 info = info + infos[i] + "|";
1762 }
1763
1764 Document doc = builder.newDocument();
1765 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1766
1767 // <classifierNodeList>
1768 Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1769
1770 for(int i = 0; i < classifierIDs.length; i++) {
1771 if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1772 browseTitlesByLetterClassifier(doc, classifierNodeList,
1773 collectionName, classifierIDs[i],
1774 structure, info);
1775 }
1776 }
1777
1778 Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1779 GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
1780 try {
1781 return FedoraCommons.elementToString(responseMsg);
1782 } catch(TransformerException e) {
1783 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1784 + " " + e;
1785 }
1786 }
1787
1788 /** CL1 browsing classifier: browsing titles by starting letter.
1789 * The browsing structure is retrieved.
1790 * @param doc - the document object that will contain the CL1 browsing structure.
1791 * @param classifierNodeList - the classifiers will be added to this nodeList.
1792 * @param collectionName - name of the collection through which we are browsing CL1.
1793 * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1794 * a letter.
[22308]1795 * @param structure - the requested browse substructure. Can be any combination of
1796 * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
[22300]1797 * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1798 * numChildren.
1799 * @return the classifierNodeList with the CL1 classifier browse structure.
1800 */
1801 public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1802 String collectionName, String classifierID,
1803 String structure, String info)
1804 {
1805 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1806
1807 if(structure.indexOf("entire") != -1) {
1808 structure = structure + "ancestors|descendants";
1809 }
1810
1811 // Structure of ancestors and children only at this stage
1812 int firstLevel = classifierID.indexOf('.');
1813 int secondLevel = classifierID.lastIndexOf('.');
1814
1815 // <nodeStructure>
1816 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1817
1818 // requested classifier node
1819 Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1820 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1821 attribute.setValue(classifierID);
1822 classNode.setAttributeNode(attribute);
[26262]1823 Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1824 typeAttribute.setValue(GSXML.VLIST);
1825 classNode.setAttributeNode(typeAttribute);
[22300]1826
1827 if(firstLevel == -1) { // CL1 - toplevel node
1828 Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1829
1830 classifierNodeList.appendChild(classNode);
1831 classNode.appendChild(nodeStructure);
1832
1833 nodeStructure.appendChild(root);
1834 if(structure.indexOf("descendants") != -1) {
[22308]1835 getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
[22300]1836 } else if(structure.indexOf("children") != -1) {
[22308]1837 getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
[22300]1838 }
[22308]1839 // nothing to be done for siblings
[22300]1840 }
1841 else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1842
[22308]1843 if(structure.indexOf("parent") != -1
1844 || structure.indexOf("ancestors") != -1
1845 || structure.indexOf("siblings") != -1) {
[22300]1846 String toplevelID = classifierID.substring(0, firstLevel);
1847 Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1848 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1849 attribute.setValue(toplevelID);
1850 toplevelNode.setAttributeNode(attribute);
[26262]1851 typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1852 typeAttribute.setValue(GSXML.VLIST);
1853 toplevelNode.setAttributeNode(typeAttribute);
[22300]1854 Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1855
1856 classifierNodeList.appendChild(toplevelNode);
1857 toplevelNode.appendChild(nodeStructure);
1858 nodeStructure.appendChild(node);
[22308]1859
1860 if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1861 getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1862 // pass the requested node (classNode) so that it is attached in the correct
1863 // location among its siblings, and to ensure that it is not recreated.
1864 // getTitlesByLetterStructure() will append classNode to node
1865 } else {
1866 node.appendChild(classNode);
1867 }
[22300]1868 } else {
1869 Element node = (Element)classNode.cloneNode(true);
1870 classifierNodeList.appendChild(node);
1871 node.appendChild(nodeStructure);
1872 nodeStructure.appendChild(classNode);
1873 }
1874
1875 int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1876 char ch = (char)(num - 1 + 'A');
1877 if(structure.indexOf("descendants") != -1) {
1878 getTitlesForLetter(ch, collectionName, classNode, "descendants");
1879 } else if(structure.indexOf("children") != -1) {
1880 getTitlesForLetter(ch, collectionName, classNode, "children");
1881 }
1882 }
1883 else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1884 LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1885 }
1886
1887 return classifierNodeList;
1888 }
1889
1890 /** Creates a (CL1) subclassifier element for the docs whose titles start with
1891 * the given letter.
1892 * @param ch - the starting letter of the document titles to retrieve.
1893 * @param collectionName - name of the collection through which we are browsing CL1.
1894 * @param classifierNode - the docNodes found will be appended to this node.
1895 * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1896 * gets descendants of any documents found, otherwise gets just the children.
1897 * @return the given classifierNode which will have the child (or descendant) documents
1898 * appended to it.
1899 */
1900 public Element getTitlesForLetter(char ch, String collectionName,
1901 Element classifierNode, String depthStructure)
1902 {
1903 Document doc = classifierNode.getOwnerDocument();
1904 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1905
1906
1907 // Retrieve the document structure for each subClassifierID:
1908 // all the documents that begin with its letter.
1909 String letter = String.valueOf(ch);
1910 try {
1911 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1912 if(docPIDs.length == 0) {
1913 return classifierNode; // skip letters that don't have any kids
1914 }
1915
1916 for(int i = 0; i < docPIDs.length; i++) {
1917 // work out the document's fedora PID and section ID
1918 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1919 String docPID = getDocPIDFromDocID(docPIDs[i]);
[15222]1920
[22300]1921 // get the required section, along with children or descendants
1922 Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
[15222]1923
[22300]1924 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1925 Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1926
1927 // fills in the subtree of the rootNode in our nodeStructure element
1928 createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1929 classifierNode.appendChild(docRootNode);
1930 }
1931 } catch(Exception e) {
1932 ex = new FedoraGS3RunException(e);
1933 ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1934 }
1935
1936 return classifierNode;
1937 }
1938
1939
1940 /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1941 * starting letter of the alphabet. X is each letter of the alphabet for which there
1942 * are matching document titles.
1943 * @param collectionName - name of the collection through which we are browsing CL1.
1944 * @param classifierNode - the docNodes found will be appended to this node.
1945 * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1946 * the IDs for the subclassifiers (CL.x).
1947 * @param getDescendants - if true, get descendants of any documents found, otherwise
1948 * get just the children.
[22308]1949 * @param wantedSibling - the node (already created) whose siblings are requested. We
1950 * need to make sure not to recreate this node when creating its sibling nodes.
[22300]1951 * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1952 * the alphabet that are represented in the document titles.
1953 */
1954 public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
[22308]1955 String classifierID, boolean getDescendants,
1956 Element wantedSibling)
1957 {
1958 String ID = "";
1959 if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1960 ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1961 }
1962
[22300]1963 Document doc = classifierNode.getOwnerDocument();
1964 FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1965
1966 // We're going to loop to the end of the alphabet
1967 int count = 1;
1968 for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1969 // Retrieve the document structure for each subClassifierID:
1970 // all the documents that begin with its letter.
1971 String letter = String.valueOf(ch);
1972 try {
1973 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1974 if(docPIDs.length == 0) {
1975 continue; // skip letters that don't have any kids
1976 }
[22308]1977 Element subClassifier = null;
1978 if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1979 // already have the requested node, don't recreate it
1980 subClassifier = wantedSibling;
1981 } else {
[26262]1982 // <classifierNode childType="VList" nodeID="CL1.x">
[22308]1983 subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
[26262]1984 Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1985 typeAttribute.setValue(GSXML.VLIST);
1986 subClassifier.setAttributeNode(typeAttribute);
[22308]1987 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1988 attribute.setValue(classifierID+"."+count);
1989 subClassifier.setAttributeNode(attribute);
1990 }
1991 classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
[22300]1992
1993 if(getDescendants) { // get the documents
1994
1995 // append the <docNodes> for the docPIDs found as children
1996 // of subclassifier
1997
1998 for(int i = 0; i < docPIDs.length; i++) {
1999 // work out the document's fedora PID and section ID
2000 String sectionID = getSectionIDFromDocID(docPIDs[i]);
2001 String docPID = getDocPIDFromDocID(docPIDs[i]);
[15222]2002
[22300]2003 // get the required section, along with children or descendants
2004 Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2005
2006 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2007 Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2008
2009 // fills in the subtree of the rootNode in our nodeStructure element
2010 createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2011 subClassifier.appendChild(rootNode);
2012 }
[21863]2013 }
[22300]2014 } catch(Exception e) {
2015 ex = new FedoraGS3RunException(e);
2016 ex.setSpecifics("requested portion of TOC file or "
2017 + "trouble with fielded search ");
2018 }
[21835]2019 }
[22300]2020 return classifierNode;
2021 }
2022
[15222]2023
2024 /** This method performs something equivalent to a greenstone3
2025 * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
[22300]2026 * @param classNodeIDs array of classifierNode IDs for which the metadata
[15222]2027 * needs to be returned.
[22300]2028 * @param metafields are the classifier metadata fields that are to be returned.
2029 * At present this method ignores them/pretends the requested metafields are
2030 * "all" and always returns the Title meta for the requested classifier nodes
2031 * (because that is all the metadata this Fedora classifier has at present).
[15222]2032 * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2033 * lists the metadata for all the classifierNodes passed as parameter.*/
[22300]2034 public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
[15222]2035 {
2036 Document doc = this.builder.newDocument();
2037 // <classifierNodeList>
2038 Element classifierNodeList = doc.createElement(
2039 GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2040
2041 // create <classifierNode><metadataList><metadata>s
[21864]2042 // </metadataList></classifierNode> for all letters of the alphabet
[15222]2043 for(int i = 0; i < classNodeIDs.length; i++) {
2044 // strip ID of everything before the first '.' (i.e. remove "CL#.")
2045 int index = classNodeIDs[i].indexOf('.');
2046 String subClassifierNumber = classNodeIDs[i].substring(index+1);
[21864]2047 index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2048 if(index != -1) {
2049 subClassifierNumber = subClassifierNumber.substring(0, index);
2050 }
[15222]2051 int subClassifierNum = Integer.parseInt(subClassifierNumber);
[15672]2052 String classifierName = "";
2053 if(subClassifierNum == 0) { // no document titles started with a letter
2054 classifierName = "A-Z";
2055 } else {
2056 char letter = (char)('A' + subClassifierNum - 1); // A = 1
2057 classifierName = String.valueOf(letter);
2058 }
[15222]2059
2060 // <classifierNode nodeID="CL#.subNum">
2061 Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2062 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2063 attribute.setValue(classNodeIDs[i]);
2064 classifierNode.setAttributeNode(attribute);
2065
2066 // <metadataList>
2067 Element metadataList = doc.createElement(
2068 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2069
2070 // at least one metadata element: that of the title of this
2071 // classifierNode:
2072 // <metadata name="Title">letter</metadata>
2073 Element metadata = this.createNameValuePairElement(doc,
[15672]2074 GSXML.METADATA_ELEM, "Title", classifierName);
[15222]2075
2076 // now connect up everything
2077 metadataList.appendChild(metadata);
2078 classifierNode.appendChild(metadataList);
2079 classifierNodeList.appendChild(classifierNode);
2080 }
2081
2082 Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2083 GSXML.REQUEST_TYPE_PROCESS, //collName +
2084 "ClassifierBrowseMetadataRetrieve");
2085 try{
[22300]2086 return FedoraCommons.elementToString(responseMsg);
[15222]2087 }catch(TransformerException e) {
2088 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2089 + " " + e;
2090 }
2091 }
2092
2093 /** @return a newly created element of the following format:
2094 * &lt;classifier content="somecontent" name="CL+num"&gt;
2095 * &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2096 * &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2097 * &lt;/classifier&gt;
2098 * @param doc - the document used to create the element
2099 * @param content - value of the content attribute
2100 * @param classifierNum - the number suffixed to the CL, together forming
2101 * the classifier Node's ID
2102 * @param displayNameVal is the bodytext of a named displayItem element
2103 * @param displayDescrVal is the bodytext of a displayItem element with
2104 * description */
2105 protected Element createClassifierElement(Document doc, String content,
2106 int classifierNum, String displayNameVal, String displayDescrVal)
2107 {
2108 final String CL = "CL";
2109 Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2110 // content attribute
2111 Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2112 att.setValue(content);
2113 classifier.setAttributeNode(att);
2114 // name attribute
2115 att = doc.createAttribute(GSXML.NAME_ATT);
2116 att.setValue(CL + classifierNum);
2117 classifier.setAttributeNode(att);
2118
2119 // now create the displayItem children for classifier:
2120 // <displayItem name="name">#letter</displayItem>
[22300]2121 // <displayItem name="description">Browse titles starting with #letter</displayItem>
[15222]2122 Element displayItem = createNameValuePairElement(doc,
2123 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2124 classifier.appendChild(displayItem);
2125 displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2126 GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2127 classifier.appendChild(displayItem);
2128
2129 return classifier;
2130 }
2131
2132
2133 /** @return a newly created element of the following format:
2134 * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2135 * @param doc - the document used to create the element
2136 * @param elementName - the tag name
2137 * @param name - value of attribute name
2138 * @param value - the body text of the element */
2139 protected Element createNameValuePairElement(Document doc, String elementName,
2140 String name, String value) {
2141 // <elementName name="somename">"some display value"</elementName>
2142 Element element = doc.createElement(elementName);
2143 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2144 attribute.setValue(name);
2145 element.setAttributeNode(attribute);
2146
2147 element.appendChild(doc.createTextNode(value));
2148 return element;
2149 }
2150
2151 /**
2152 * @param collection is the collection to search in
2153 * @param query is the query term to search for. It won't specify the
2154 * indexed field to search in, which will mean that GSearch will
2155 * search all default indexed fields.
2156 * @param maxDocs is the maximum number of results to return (which
2157 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2158 */
2159 public String[] textQuery(String collection, String query,
2160 int maxDocs)
2161 throws Exception
2162 {
2163 // no need to search there is no query or query is empty spaces
2164 if(query.trim().equals(""))
2165 return new String[]{};
2166
2167 // QUERY value won't specify indexed field to search, Fedora
2168 // Gsearch will take that as meaning all default indexed fields.
2169 // Params to search() method below: string of fielded query terms;
2170 // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2171 query = query + " " + "PID" + COLON + GREENSTONE;
2172
2173 String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2174 // now we have the XML returned by FedoraGSearch, get the pids
2175 // of the documents returned (if any)
2176 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2177 collection, searchResult);
2178 return pids;
2179 }
2180
2181 /**
2182 * This method performs a fieldquery, searching for x number of phrases
2183 * in each of the 4 indexed fields.
2184 * @param collection is the collection to search in
2185 * @param nameValParamsMap is a Map of several(key, value) entries,
2186 * 4 of which we're concerned with here:
2187 * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2188 * - the values are a comma separated list of terms (phrases or single
2189 * words) to search that field in. There may be more than 1 or
2190 * there may be none (in which case there may be N empty values or
2191 * spaces separated by commas).
2192 * @param maxDocs is the maximum number of results to return (which
2193 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2194 * */
2195 public String[] fieldQuery(String collection, Map nameValParamsMap,
2196 int maxDocs)
2197 throws Exception
2198 {
2199 // we're going to maintain a list of UNIQUE pids that were returned
2200 // in search results. Hence we use Set:
2201 java.util.Set set = new java.util.HashSet();
2202
2203 // (1) Use Fedora's search to search document titles, if they were
2204 // specified:
2205 String[] docTitlepids = {};
2206
2207 String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2208 if(docTitleTerms != null) { // no doc titles may have been specified
2209 String[] phrases = docTitleTerms.split(COMMA);
2210
2211 // search the individual phrases first:
2212 for(int i = 0; i < phrases.length; i++) {
2213 if(phrases.equals("") || phrases.equals(" "))
2214 continue; //skip when there are no terms
2215 docTitlepids = this.searchDocumentTitles(
2216 collection, phrases[i], false);
2217 for(int j = 0; j < docTitlepids.length; j++)
2218 set.add(docTitlepids[j]);
2219 }
2220 }
2221 // (2) use FedoraGSearch to search doc AND section titles, and
2222 // fulltext (in case these were specified in nameValParamsMap):
2223 String searchResult = this.fedoraGSearch.search(
2224 nameValParamsMap, 1, maxDocs);
2225
2226 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2227 collection, searchResult);
2228
2229 for(int i = 0; i < pids.length; i++)
2230 set.add(pids[i]);
2231
2232 pids = null;
2233 pids = new String[set.size()];
2234 set.toArray(pids); // unique pids
2235 return pids;
2236 }
2237
[22300]2238 /** @return a String representing Greenstone3 XML for a query process
[15222]2239 * response returning the results for the query denoted by parameter
2240 * nameValParamsMap.
2241 * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2242 * query field data values. The names match the field names that
2243 * describeCollectionService() would have returned for the query service.
2244 * @param collection is the name of the collection
2245 * @param service is the name of the query service
2246 * This method is only ever called when any of the services in the digital
2247 * library described themselves as type=query. Therefore any digital
2248 * libraries that have no query services, can just return emtpy message
2249 * strings (or even "") since this method will never be called on them
2250 * anyway. */
2251 public String query(String collection, String service,
2252 Map nameValParamsMap)
2253 {
2254 FedoraGS3RunException ex = null;
2255 // (1) obtain the requested number of maximum result documents
2256 int maxDocs = 100;
2257 try{
2258 maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2259 } catch(NumberFormatException e) {
2260 maxDocs = 100;
2261 }
2262
2263 String pids[] = {};
2264 // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
[22300]2265 if(service.endsWith("TextQuery")) {
[15222]2266 try {
2267 // get the Query field:
2268 String query = (String)nameValParamsMap.get(QUERY);
2269 pids = textQuery(collection, query, maxDocs);
2270 }
2271 catch(Exception e) {
2272 LOG.error("Error in TextQuery processing: " + e);
2273 ex = new FedoraGS3RunException(
2274 "When trying to use FedoraGenericSearch for a TextQuery", e);
2275
2276 }
2277 } else { // (3) FieldQuery
2278 // first get the comma-separated lists
2279 String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2280 String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2281 // both are comma separated lists, so split both on 'comma'
2282 String[] fieldNames = listOfFieldNames.split(COMMA);
2283 String[] searchTerms = listOfSearchTerms.split(COMMA);
2284
2285 // In the fieldNames and searchTerms lists of nameValParamsMap,
2286 // each searchTerm element was matched with its correspondingly
2287 // indexed fieldName.
2288 // A new map is going to reorganise this, by putting all terms
2289 // for a particular fieldName together in a comma separated list
2290 // and associating that with the fieldName. I.e. (key, value) ->
2291 // (fieldName, comma-separated list of all terms in that field)
2292 Map map = new HashMap();
2293 for(int i = 0; i < searchTerms.length; i++) {
2294 // there may be fewer searchTerms than fieldNames (since some
2295 // fieldNames may have been left empty), so loop on searchTerms
[21775]2296 if(map.containsKey(fieldNames[i])) { // fieldName is already
[15222]2297 // in the list, so append comma with new value
2298 String termsList = (String)map.get(fieldNames[i]);
2299 termsList = termsList + COMMA + searchTerms[i];
2300 map.put(fieldNames[i], termsList);
2301 } else { // this is the first time this fieldName occurred
2302 // just put the fieldName with searchTerm as-is
2303 map.put(fieldNames[i], searchTerms[i]);
2304 }
2305 }
2306
2307 try {
2308 // For fieldquery, we search on all the fieldNames specified
2309 // - if DOC_TITLES is specified then we use Fedora's search
2310 // - for all other fieldNames specified, we use FedoraGSearch
2311 pids = fieldQuery(collection, map, maxDocs);
2312 }
2313 catch(Exception e) {
2314 LOG.error("Error in FieldQuery processing: " + e);
2315 ex = new FedoraGS3RunException(
2316 "When trying to use FedoraGenericSearch for a FieldQuery", e);
2317 }
2318 }
2319
2320 // Build Greenstone XML Query response message for from
2321 // the pids (which should be document identifiers)
2322 Document doc = builder.newDocument();
2323 // <metadataList><metadata name="numDocsMatched" value="n" />
2324 // </metadataList>
2325 Element metadataList = doc.createElement(
2326 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2327 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2328
2329 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2330 attribute.setValue(NUM_DOCS_MATCHED);
2331 metadata.setAttributeNode(attribute);
2332
2333 attribute = doc.createAttribute(GSXML.VALUE_ATT);
2334 attribute.setValue(Integer.toString(pids.length));
2335 metadata.setAttributeNode(attribute);
2336
2337 metadataList.appendChild(metadata);
2338
2339 // <documentNodeList>
2340 // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2341 // docType='hierarchy' nodeType="leaf" />
2342 // ...
2343 // ...
2344 // </documentNodeList>
2345 Element docNodeList = doc.createElement(
2346 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2347 // for each
2348 for(int i = 0; i < pids.length; i++) {
2349 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2350 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2351 attribute.setValue(pids[i]);
2352 docNode.setAttributeNode(attribute);
2353
2354 attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2355 attribute.setValue("hierarchy");
2356 docNode.setAttributeNode(attribute);
2357
2358 attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2359 attribute.setValue("root");
2360 docNode.setAttributeNode(attribute);
2361 docNodeList.appendChild(docNode);
2362 }
2363
2364 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2365 GSXML.REQUEST_TYPE_PROCESS, service);
2366 try{
[22300]2367 return FedoraCommons.elementToString(responseMsg);
[15222]2368 }catch(TransformerException e) {
2369 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2370 + " " + e;
2371 }
2372 }
[26171]2373
2374
2375 // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2376 /** Given a URL that represents a fedoraPID, will look up the object.
2377 * If it exists, it will return the contents of the DC:Title of its datastream.
2378 * If it doesn't exist, it will return the URL as-is.
2379 * @param URL: the URL that (after modification) represents a fedoraPID to look up.
2380 * @param collection: the name of collection in which to search for the URL
2381 * representing a fedoraPID.
2382 * @return the string (representing a fedoraPID) stored in the DC:Title of the
2383 * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2384 * then the parameter URL is returned.
2385 */
2386 public String getPIDforURL(String url, String collection) {
2387 FedoraGS3RunException ex = null; // any RemoteException
2388
2389 // (1) convert url to the fedorapid
2390 // / -> _ and : -> -
2391 String fedoraPID = url.replaceAll("/", "_");
2392 fedoraPID = fedoraPID.replaceAll(":", "-");
2393 // prefix "greenstone-http:<colname>-" to the fedoraPID
2394 fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2395 //LOG.error("### fedoraPID: " + fedoraPID);
2396
2397 // (2) Look up the datastream for the fedorapid
2398 String dcTitle = "";
2399 try {
2400 dcTitle = getDCTitle(fedoraPID);
2401 } catch(Exception e) {
2402 LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2403 ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2404 }
2405 //String dc = this.getDC(fedoraPID);
2406 //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2407
2408 // (3) if fedorapid exists, extract the dc:title content.
2409 // if it doesn't exist, return url
2410 if(dcTitle.equals("")) {
2411 return url;
2412 } else {
2413 // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2414 //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2415 return dcTitle+"-1";
2416 }
2417 }
[15222]2418
2419 public static void main(String args[]) {
2420 try{
2421 // testing default constructor
2422 //FedoraGS3Connection con = new FedoraGS3Connection();
2423
2424 // testing constructor that takes properties file to show initial
2425 // fedora server values
2426 java.io.File propertyFilename
2427 = new java.io.File("fedoraGS3.properties");
2428 FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2429
2430 // DESCRIBE: serviceList, collectionList
2431 System.out.println("serviceList:\n" + con.getServiceList());
2432
2433 System.out.println("collectionList:\n" + con.getCollectionList());
2434
2435 String[] colPIDs = con.getCollections();
2436 String[] collectionNames = con.getCollectionNames(con.getCollections());
2437
2438
2439 for(int i = 0; i < collectionNames.length; i++) {
2440 System.out.println("Describing collections:\n");
2441 System.out.println(con.describeCollection(collectionNames[i]));
2442 System.out.println("Describing collection services:\n"
2443 + con.describeCollectionServices(collectionNames[i]));
2444 }
2445
2446 String[] serviceNames = con.getServiceNames();
2447 for(int i = 0; i < serviceNames.length; i++) {
2448 System.out.println("Describing " + serviceNames[i] + ":\n"
2449 + con.describeCollectionService("demo", serviceNames[i]));
2450 }
2451
2452
2453 // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2454 // along with EX of the top-level document:
2455 System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
[22300]2456 System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
[15222]2457
2458
2459 String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2460 System.out.println("\nGET CONTENT:");
2461 for(int i = 0; i < docIDs.length; i++) {
2462 System.out.println(con.getContent(docIDs[i]));
2463 }
2464
2465 System.out.println("\nGET META:");
2466 for(int i = 0; i < docIDs.length; i++) {
[22300]2467 System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
[15222]2468 }
2469
2470 String[] getTitlesFor = {
2471 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2472 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2473 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2474 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2475 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2476 };
2477
2478 // first let's display the regular meta for top-level docs and
2479 // their sections
2480 for(int i = 0; i < getTitlesFor.length; i++) {
[22300]2481 System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
[15222]2482 }
2483
2484 System.out.println("\nTitles are:");
2485 System.out.println(con.getTitleMetadata(getTitlesFor));
2486
2487 System.out.println("\nGET STRUCTURE:");
2488 for(int i = 0; i < docIDs.length; i++) {
[22300]2489 System.out.println("Descendents and numChildren:\n"
[26270]2490 + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
[22300]2491 System.out.println("Parent and numSiblings:\n"
[26270]2492 + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
[15222]2493 }
2494
2495 // TEST ERROR CASES:
2496 System.out.println("\nTESTING ERROR CASES");
2497 System.out.println(con.getContent("greenstone:demo-pinky"));
2498 String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2499 "greenstone:demo-pinky" };
2500 System.out.println(con.getContent(errorCases));
[22300]2501 System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
[26270]2502 System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
[15222]2503
2504 System.out.println("\nCLASSIFIER BROWSE");
2505 System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
[22300]2506 new String[]{"CL1"}, new String[] {""}, new String[] {""}));
[15222]2507
2508 System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2509 String[] classNodeIDs = new String[26];
2510 for(int i = 0; i < classNodeIDs.length; i++) {
2511 int subClassifierNum = i + 1;
2512 classNodeIDs[i] = "CL1." + subClassifierNum;
2513 }
2514 System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
[22300]2515 classNodeIDs, new String[]{"all"}));
[15222]2516
2517 System.out.println("Testing query services");
2518 System.out.println("TEXT QUERY:");
2519 Map formControlValsMap = new HashMap();
2520 formControlValsMap.put(MAXDOCS, "100");
2521 formControlValsMap.put(QUERY, "snails");
2522 String searchResponse
2523 = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2524 System.out.println(searchResponse);
2525
2526 System.out.println("FIELD QUERY:");
2527 formControlValsMap.clear();
2528 formControlValsMap.put(MAXDOCS, "100");
2529 formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2530 formControlValsMap.put(FIELDNAME_ATT,
2531 "allFields,docTitles,allFields,allFields");
2532 searchResponse
2533 = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2534 System.out.println(searchResponse);
2535
2536 System.exit(0);
2537 }catch(Exception e) {
2538 JOptionPane.showMessageDialog(
2539 null, e, "Error", JOptionPane.ERROR_MESSAGE);
2540 //System.err.println("ERROR: " + e);
2541 e.printStackTrace();
2542 }
2543 }
[15733]2544}
Note: See TracBrowser for help on using the repository browser.