source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26309

Last change on this file since 26309 was 26309, checked in by ak19, 12 years ago
  1. Corrections to XML returned by FedoraGS3 to get the VList display for classifierBrowse to work correctly in Greenstone: classifierStyle attribute needs to be set on documents returned. Also added in further missing attributes for query and browse, in case these turn out to be important. 2. Replaced 3-line setAttributeNode() calls with 1-line setAttribute() calls.
File size: 106.8 KB
Line 
1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
32import org.greenstone.gsdl3.util.GSXML;
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.Attr;
36import org.w3c.dom.Text;
37import org.w3c.dom.NodeList;
38import org.w3c.dom.Node;
39import org.xml.sax.InputSource;
40
41import java.io.File;
42import java.util.HashMap;
43import java.util.Properties;
44import java.util.Map;
45
46import javax.swing.JOptionPane;
47
48import org.xml.sax.SAXException;
49import java.io.UnsupportedEncodingException;
50import java.io.IOException;
51import javax.net.ssl.SSLHandshakeException;
52import java.net.Authenticator;
53import java.net.ConnectException;
54import java.net.MalformedURLException;
55import java.net.PasswordAuthentication;
56import java.rmi.RemoteException;
57import javax.xml.parsers.ParserConfigurationException;
58import javax.xml.transform.TransformerException;
59
60/**
61 * Class that extends FedoraConnection in order to be able to use
62 * Fedora's web services to retrieve the specific datastreams of
63 * Greenstone documents stored in Fedora's repository. This class
64 * provides methods that convert those datastreams into Greenstone3
65 * XML response messages which are returned.
66 * @author ak19
67*/
68public class FedoraGS3Connection
69 extends FedoraConnection implements FedoraToGS3Interface,
70 FedoraToGS3Interface.Constants
71{
72 /** The logging instance for this class */
73 private static final Logger LOG = Logger.getLogger(
74 FedoraGS3Connection.class.getName());
75
76 /** Default name of Fedora index */
77 private static final String DEFAULT_FEDORA_INDEX = "FgsIndex"; //"BasicIndex" for older versions of GSearch
78
79 /** Complete list of services that our FedoraGS3 would support
80 * if everything goes well. If a connection to FedoraGSearch
81 * cannot be established, the query services will no longer be
82 * available. The actual services supported are given by member
83 * variable serviceNames. */
84 protected static final String[] SERVICES = {
85 "DocumentContentRetrieve", "DocumentMetadataRetrieve",
86 "DocumentStructureRetrieve",
87 "TextQuery", "FieldQuery",
88 "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
89 };
90
91 /** List of services actually supported by our FedoraGS3 repository
92 * after construction. If FedoraGenericSearch can't be connected to,
93 * then query services will not be offered */
94 protected String[] serviceNames;
95
96 /** The object used to connect to FedoraGenericSearch, which is used
97 * for doing full-text searching */
98 protected GSearchConnection fedoraGSearch;
99
100 /** The url for the wsdl file of FedoraGSearch's web services
101 * by default this will be the Fedora server's base URL
102 * concatenated to "gsearch/services/FgsOperations?wsdl" */
103 protected String gSearchWSDLURL;
104
105 /** The last part of the gSearchWSDL URL. The first part is
106 * the same as the fedora server's base url. */
107 protected String gSearchWSDLSuffix;
108
109 /** The name of the index that FedoraGSearch will index the GS3
110 * documents into. If no name is specified in the properties file,
111 * this will default to FedoraIndex. */
112 protected String gSearchIndexName;
113
114 /** 5 argument constructor is the same as that of superclass FedoraConnection:
115 * @param protocol can be either http or https
116 * @param host is the host where the fedora server is listening
117 * @param port is the port where the fedora server is listening
118 * @param fedoraServerUsername is the username for administrative
119 * authentication required to access the fedora server.
120 * @param fedoraServerPassword is the password for administrative
121 * authentication required to access the fedora server. If no password was set
122 * when installing Fedora, leave the field "".
123 * Instantiates a FedoraGS3Connection object which connects to Fedora's
124 * web services through stub classes and tries to connect to FedoraGSearch's
125 * web services through the default WSDL location for it
126 * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
127 * call setGSearchWSDLURL(url) after the constructor instead.
128 */
129 public FedoraGS3Connection(String protocol, String host, int port,
130 String fedoraServerUsername, String fedoraServerPassword)
131 throws ParserConfigurationException, MalformedURLException,
132 SSLHandshakeException, RemoteException, AuthenticationFailedException,
133 NotAFedoraServerException, ConnectException, Exception
134 {
135 super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
136 // super() will call setInitialisationProperties(properties)
137 // And that will try to instantiate the GSearchConnection.
138 }
139
140 /** No-argument constructor which is the same as that of superclass
141 * FedoraConnection: it displays a small dialog requesting input for the
142 * host, port, administrative password and username of the fedora server.
143 * If no password was set on the fedora repository when installing it,
144 * the user can leave the password field blank. */
145 public FedoraGS3Connection()
146 throws ParserConfigurationException, MalformedURLException,
147 CancelledException, ConnectException, RemoteException,
148 SSLHandshakeException, Exception
149 {
150 super();
151 // super() will call setInitialisationProperties(properties)
152 // And that will try to instantiate the GSearchConnection.
153 }
154
155 /** Single-argument constructor which is the same as that of superclass
156 * FedoraConnection: it takes the name of the properties file where
157 * connection initialisation values may already be provided and then
158 * displays a small dialog requesting input for the host, port,
159 * administrative password and username of the fedora server showing
160 * the values in the properties file as default. If the necessary
161 * initialisation are not present in the file, the corresponding fields
162 * in the dialog will be blank.
163 * If no password was set on the fedora repository when installing it,
164 * the user can leave the password field blank. */
165 public FedoraGS3Connection(File propertiesFilename)
166 throws ParserConfigurationException, MalformedURLException,
167 CancelledException, ConnectException, RemoteException,
168 SSLHandshakeException, Exception
169 {
170 super(propertiesFilename);
171 // super() will call setInitialisationProperties(properties)
172 // And that will try to instantiate the GSearchConnection.
173 }
174
175 /** The superclass constructor calls this method passing any preset
176 * properties loaded from a propertiesFile. This method is overridden
177 * here in order to instantiate the gSearchConnection based on the
178 * - gSearchWSDLSuffix that will be appended to the fedora base url.
179 * (If one was not provided in the properties file, gSearchWSDLURL defaults
180 * to something of the form
181 * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
182 * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
183 * "gsearch/services/FgsOperations?wsdl".
184 * - name of the index into which the GS3 documents have been indexed
185 * and which FedoraGenericSearch should use to perform searches. If none is
186 * given in the properties file, then the index name defaults to "FgsIndex"
187 * (no longer BasicIndex or FedoraIndex).
188 * @param properties is the Properties Map loaded from a properties file
189 * (if there was any) which specifies such things as host and port of the
190 * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
191 * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
192 * to whatever the final value of this.gSearchWSDLURL' suffix is, and
193 * "gsearch.indexName" will be set to to whatever the final value of
194 * this.gSearchIndexName is.
195 */
196 protected void setInitialisationProperties(Properties properties)
197 throws ParserConfigurationException, MalformedURLException,
198 CancelledException, ConnectException, RemoteException,
199 SSLHandshakeException, Exception
200 {
201 super.setInitialisationProperties(properties);
202 // gsearchWSDL URL suffix, if not specified, defaults to
203 // "fedoragsearch/services/FgsOperations?wsdl" which is
204 // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
205 this.gSearchWSDLSuffix = properties.getProperty(
206 "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
207 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
208 // Set the property to whatever this.gSearchWSDLURL is now,
209 // so that it will be written out to the properties file again
210 properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
211
212 // Similarly for the name of the index FedoraGenericSearch should use
213 // when performing searches for GS3 docs stored in Fedora's repository.
214 this.gSearchIndexName = properties.getProperty(
215 "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
216 properties.setProperty("gsearch.indexName", this.gSearchIndexName);
217 // Create a connection to FedoraGSearch's web services:
218 initSearchFunctionality();
219 }
220
221 /** Overridden init method to work with the 5 argument constructor, so that we can
222 * bypass using setInitialisationProperties() which works with a Properties map.
223 */
224 protected void init(String protocol, String host, String port,
225 final String fedoraServerUsername, final String fedoraServerPassword)
226 throws ParserConfigurationException, MalformedURLException,
227 AuthenticationFailedException, RemoteException, Exception
228 {
229 super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
230 this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
231 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
232 this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
233
234 // Now need to set username and password for accessing WSDL (after GSearch 2.2)
235 // http://stackoverflow.com/questions/3037221/401-error-when-consuming-a-web-service-with-http-basic-authentication-using-cxf
236
237 // The java.net.Authenticator can be used to send user credentials when needed.
238 Authenticator.setDefault(new Authenticator() {
239 @Override
240 protected PasswordAuthentication getPasswordAuthentication() {
241 return new PasswordAuthentication(
242 fedoraServerUsername,
243 fedoraServerPassword.toCharArray());
244 }
245 });
246
247 initSearchFunctionality();
248 }
249
250
251 /** Init method that instantiates a GSearchConnection object used
252 * to work with the separate FedoraGSearch web services.
253 * The url of the WSDL for FedoraGSearch's web services is worked out
254 * from the baseURL of the Fedora server.
255 */
256 protected void initSearchFunctionality()
257 {
258 try {
259 this.fedoraGSearch = null;
260 this.fedoraGSearch = new GSearchConnection(
261 gSearchWSDLURL, gSearchIndexName);
262 this.serviceNames = SERVICES;
263 } catch(Exception e){
264 LOG.error("Cannot connect to FedoraGSearch's web services at "
265 + gSearchWSDLURL + "\nQuery services will not be available.", e);
266 // Exception, e, as parameter prints the stacktrace of the exception to the log
267
268 // If an exception occurs, something has gone wrong when
269 // trying to connect to FedoraGSearch's web services. This
270 // means, we can't offer query services, as that's provided
271 // by FedoraGSearch
272 serviceNames = null;
273 int countOfNonQueryServices = 0;
274 for(int i = 0; i < SERVICES.length; i++) {
275 // do not count query services
276 if(!SERVICES[i].toLowerCase().contains("query")) {
277 countOfNonQueryServices++;
278 }
279 }
280 // Services now supported are everything except Query services
281 serviceNames = new String[countOfNonQueryServices];
282 int j = 0;
283 for(int i = 0; i < SERVICES.length; i++) {
284 if(!SERVICES[i].toLowerCase().contains("query")) {
285 serviceNames[j] = SERVICES[i];
286 j++; // valid serviceName, so increment serviceName counter
287 }
288
289 }
290 }
291 }
292
293 /** @return the gSearchWSDLURL, the url of the WSDL for the
294 * FedoraGSearch web services */
295 public String getGSearchWSDLURL() { return gSearchWSDLURL; }
296
297 /** Sets the member variable gSearchWSDLURL that specify the location of
298 * the WSDL file of FedoraGSearch's web services. Then it attempts
299 * to instantiate a connection to those web services.
300 * @param url is the new url of the GSearch web services WSDL file */
301 public void setGSearchWSDLURL(String url) {
302 this.gSearchWSDLURL = url;
303 initSearchFunctionality();
304 }
305
306 /** @return the gSearchIndexName, the name of the index Fedora Generic
307 * Search will search in (where GS3 docs have been indexed into). */
308 public String getGSearchIndexName() { return gSearchIndexName; }
309
310 /** Sets the member variable gSearchIndexName that specifies the name
311 * of the index containing indexed GS3 documents. Then it attempts
312 * to instantiate a connection to the Fedora GSearch web services using
313 * this changed value for indexName.
314 * @param indexName is the new name of the index containing indexed GS3
315 * docs that GSearch should search in. */
316 public void setGSearchIndexName(String indexName) {
317 this.gSearchIndexName = indexName;
318 initSearchFunctionality();
319 }
320
321 /** @return the array of the services actually supported by FedoraGS3 */
322 protected String[] getServiceNames() { return this.serviceNames;}
323
324 /**
325 * For finding out if the sectionNumber is given as part of the docID.
326 * @param docID is the String that contains the docPID and may also
327 * contain the section number.
328 * @return true if the document identifier docID contains a section-
329 * number, and false if it consists solely of the docPID.
330 * That is, true is returned if
331 * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
332 * and false is returned if
333 * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
334 * */
335 protected boolean containsSectionNumber(String docID) {
336 // if there are two hyphens in the docID, then there are sections
337 // (and the section number is appended at end of docID)
338 // docID = "greenstone:colName-<docPID>-<sectionNum>"
339 return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
340 }
341
342 /** This method will extract the docPID from docID and return it.
343 * (If a sectionNumber is suffixed to the docID, the docPID which is
344 * the prefix is returned; otherwise the docID is the docPID and is
345 * returned)
346 * @param docID is the String that contains the docPID and may also
347 * contain the section number.
348 * @return only the docPID portion of the docID.
349 */
350 protected String getDocPIDFromDocID(String docID) {
351 if(containsSectionNumber(docID))
352 return docID.substring(0, docID.lastIndexOf(HYPHEN));
353 // else (if there's no sectionNumber), docID is the docPID
354 return docID;
355 }
356
357 /** This method will return the section Number, if there's any
358 * suffixed to the docID. Otherwise it will return the empty string
359 * @param docID is the String that contains the docPID and may also
360 * contain the section number.
361 * @return only the sectionID portion of the docID - if any, else "".
362 */
363 protected String getSectionIDFromDocID(String docID) {
364 if(containsSectionNumber(docID))
365 return docID.substring(
366 docID.lastIndexOf(HYPHEN)+1, docID.length());
367 return "";
368 }
369
370 /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
371 * response message that gives the metadata for each collection identified
372 * @param collIDs is an array of fedora pids identifying collections in the
373 * fedora repository
374 * @return a GS3 DocumentMetadataRetrieve response message containing the
375 * EX metadata for all the requested collections */
376 public String getCollectionMetadata(String[] collIDs) {
377 return getMetadata(collIDs, new String[] {"all"});
378 }
379
380 /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
381 * response message is returned containing the metadata for each document.
382 * @param docIDs is an array of document identifiers (docID can either be
383 * &lt;pid&gt;s items (documents) in the fedora repository, or
384 * "&lt;pid&gt;-sectionNumber".
385 * @return a GS3 DocumentMetadataRetrieve response message containing the
386 * EX, DC, DLS metadata for all the requested documents
387 * @param metadata is the list of metadata elements to be retrieved for each doc */
388 public String getDocumentMetadata(String[] docIDs, String[] metadata) {
389 return getMetadata(docIDs, metadata);
390 }
391
392 /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
393 * response message that gives the metadata for the collection identified
394 * @param collID is a fedora pid identifying a collection in its repository
395 * @return a GS3 DocumentMetadataRetrieve response message containing the
396 * EX metadata for the requested collection
397 * @param metadata is the list of metadata elements to be retrieved for each doc */
398 public String getCollectionMetadata(String collID) {
399 return getMetadata(new String[] {collID}, new String[] {"all"});
400 }
401
402 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
403 * response message containing the metadata for the document.
404 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
405 * of an item (document) in the fedora repository, or it can be
406 * "&lt;pid&gt;-sectionNumber".
407 * @return a GS3 DocumentMetadataRetrieve response message containing the
408 * EX, DC, DLS metadata for the requested document */
409 public String getDocumentMetadata(String docID, String[] metadata) {
410 return getMetadata(new String[] {docID}, metadata);
411 }
412
413 /** @return a greenstone DocumentMetadataRetrieve response for the
414 * documents or collections indicated by the docIDsOrCollIDs.
415 * @param docIDsOrCollIDs is an array of identifiers which may be either the
416 * fedora pids for collections, or otherwise may be a document identifier.
417 * In the last case, the document ID may consist of either
418 * "documentPID-sectionNumber" or may just be just fedora documentPID
419 * @param metadata is the list of metadata elements to be retrieved for each doc */
420 public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
421 {
422 Document doc = builder.newDocument();
423 FedoraGS3RunException ex = null;
424
425 Element docNodeList = doc.createElement(
426 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
427
428 try{
429 for(int i = 0; i < docIDsOrCollIDs.length; i++) {
430 // create the <documentNode> containing the metadata
431 // for each document docID
432 Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
433 docNodeList.appendChild(docNode);
434 }
435 } catch(Exception e) {
436 ex = new FedoraGS3RunException(e);
437 ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
438 }
439
440 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
441 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
442 try{
443 return FedoraCommons.elementToString(responseMsg);
444 } catch(TransformerException e) {
445 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
446 + " " + e;
447 }
448 }
449
450 /** Method that takes a new DOM document, as well as an identifier of either
451 * a collection or document (which may be a fedora pid for the collection
452 * or document, or may be the documentPid-sectionNumber for a document) and
453 * returns a documentNode element for it:
454 * &lt;documentNode&gt;&lt;metadataList&gt;
455 * &lt;metadata name=""&gt;value&lt;/metadata&gt;
456 * ...
457 * &lt;/metadataList&gt;&lt;/documentNode&gt;
458 * @return documentNode containing the metadata for the collection or
459 * document given by parameter ID
460 * @param id denotes a collection pid, a document pid or a docID of the
461 * form "documentpid-sectionNumber"
462 * @param metadata is the list of metadata elements to be retrieved for each doc */
463 protected Element getMetadata(Document doc, String id, String[] metadata)
464 throws RemoteException, UnsupportedEncodingException,
465 SAXException, IOException
466 {
467 // We're going to create the documentNode nested inside the following
468 // documentNodeList:
469 // <documentNodeList>
470 // <documentNode nodeID=""><metadataList>
471 // <metadata name="">value</metadata>
472 // </metadataList></documentNode>
473 // <documentNode>...</documentNode>
474 // </documentNodeList>
475 // <documentNodeList>
476
477 // <documentNode nodeID="docID"> - the docNode on which a metadata
478 // retrieve is being performed
479 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
480 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
481 attribute.setValue(id);
482 docNode.setAttributeNode(attribute);
483
484 // <metadataList>
485 Element metadataList = doc.createElement(
486 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
487
488 String ex = "";
489 String dc = "";
490 String dls = "";
491 if(id.endsWith(_COLLECTION)) { // docID refers to a collection
492 // Obtain the "EX" datastream (extracted metadata) for the collection
493 ex = this.getEX(id);
494 }
495 else { // docID refers to a document
496
497 docNode.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
498 docNode.setAttribute(GSXML.NODE_RANK_ATT, "NaN");
499
500 // work out the document's fedora PID and section ID, and then
501 // obtain the EX (extracted metadata) and DC datastreams for the doc
502
503 // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
504 // is the same as for pid="greenstone:<colname>-docPID"
505 // That is, <Section id="1"> refers to the toplevel document docPID
506 // If requested for top-level document, there may also be DLS meta
507 String sectionID = getSectionIDFromDocID(id);
508 String docPID = getDocPIDFromDocID(id);
509 if(sectionID.equals("") || sectionID.equals("1")) {
510 // metadata of toplevel document is requested
511 ex = this.getEX(docPID); // slightly faster than doing
512 //getSectionEXMetadata(docID, "1")
513 dc = this.getDC(docPID);
514 dls = this.getDLS(docPID);
515 docNode.setAttribute(GSXML.NODE_TYPE_ATT, "root");
516 }
517 else {
518 ex = getSectionEXMetadata(docPID, sectionID);
519 dc = getSectionDCMetadata(docPID, sectionID);
520 docNode.setAttribute(GSXML.NODE_TYPE_ATT, "leaf");
521 }
522 }
523
524 String metafields = "";
525 for(int i = 0; i < metadata.length; i++) {
526 metafields = metafields + metadata[i] + "|";
527 }
528
529 // Adding in metadata sets in alphabetical order
530 // DC metadata for a top-level document is different from EX, DLS:
531 // only the element's namespace prefix is "dc", the rest of a tagname
532 // is unknown.
533 if(!dc.equals("")) {
534 addMetadataWithNamespacedTagNames(doc, metadataList,
535 dc, DC, metafields);
536 }
537
538 // Check if we were supposed to process dls and dc metadata
539 // as well. We only ever do this for top-level documents,
540 // in which case, dls and dc will be non-empty strings
541 if(!dls.equals("")) {
542 addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
543 }
544
545 // we definitely have an EX metadatastream for each
546 // collection object, top-level document object,
547 // and document section item
548 addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
549
550 // now the metadataList has been built up
551 docNode.appendChild(metadataList);
552
553 return docNode; // return <documentNode> containing the metadata
554 }
555
556 /** This method retrieves all the metadata elements in the metaDataStream
557 * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
558 * metadataSetNS is the namespace of each tag, and creates a new element of
559 * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
560 * each. Each of these are then appended to the metadataList parameter.
561 * @param doc is the Document object using which the new metadata Elements
562 * are to be constructed
563 * @param metadataList is the &lt;metadataList&gt; Element to which the new
564 * metadata Elements are to be appended as children.
565 * @param metaDatastream the metadata datastream in string form (e.g. the
566 * Dublin Core metadata stored in the Fedora repository).
567 * @param metadataSet is the constant datastream identifier, e.g. "DC".
568 * At present this method applies to the DC metadata and any others like it
569 * where each tagname is different except for the constant dc: namespace.
570 * @param metafields is a | separated string containing the metadatafields to
571 * extract or "all" if all fields are requested
572 */
573 protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
574 String metaDatastream, String metadataSet, String metafields)
575 throws SAXException, IOException
576 {
577 Document src = builder.parse(
578 new InputSource(new StringReader(metaDatastream)));
579
580 // The following doesn't work for some reason: to retrieve all elements
581 // whose namespace prefix starts with "dc", we pass "*" for localName
582 //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
583
584 // Longer way: get the children of the root document
585 NodeList children = src.getDocumentElement().getChildNodes();
586
587 for(int i = 0; i < children.getLength(); i++) {
588 String nodeName = children.item(i).getNodeName();
589 // check that the nodename starts with the metadataSet ("dc") namespace,
590 // which simultaneously ensures that the node's an element:
591 if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
592 // need to have a period for Greenstone instead of Fedora's colon
593 nodeName = nodeName.replace(COLON, PERIOD);
594 if(metadataSet.equals(DC)) { // dc:title -> dc.Title
595 nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
596 + nodeName.substring(4);
597 }
598
599 // get the requested metadata fields
600 if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
601 Element metatag = (Element)children.item(i);
602 String value = FedoraCommons.getValue(metatag);
603 // <dc:tagname>value</dc:tagname>
604 // we're going to put this in our metadata element as
605 // <metadata name="dc.Tagname">value</metadata>
606
607 // create metadata of (name, value) pairs in target DOM (doc)
608 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
609 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
610
611 attribute.setValue(nodeName);
612 metadata.setAttributeNode(attribute);
613 Text content = doc.createTextNode(value);
614 metadata.appendChild(content);
615 metadataList.appendChild(metadata);
616 }
617 }
618 }
619 }
620
621 /** This method retrieves all the metadata elements in the metaDataStream
622 * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt;
623 * where "namespace" is the namespace prefix of each tag, and metadataName
624 * is the name of the metadata (like author, title). For each element
625 * it creates a corresponding new element of the form
626 * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
627 * Each of these are then appended to the metadataList parameter.
628 * @param doc is the Document object using which the new metadata Elements
629 * are to be constructed
630 * @param metadataList is the &lt;metadataList&gt; Element to which the new
631 * metadata Elements are to be appended as children.
632 * @param metaDatastream the metadata datastream in string form (e.g. the
633 * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
634 * repository).
635 * @param metadataSet is the constant datastream identifier,
636 * e.g. "DLS" or "EX".
637 * At present this method applies to the DLS and EX metadata as they have
638 * constant tagnames throughout.
639 * @param metafields is a | separated string containing the metadatafields to
640 * extract or "all" if all fields are requested.
641 */
642 protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
643 String metaDatastream, String metadataSet, String metafields)
644 throws SAXException, IOException
645 {
646 // Namespace prefix can be "ex:" or "dls:"
647 String namespacePrefix = "";
648 if(!metadataSet.equals(EX)) {
649 // need to have a period for Greenstone instead of Fedora's colon
650 namespacePrefix = metadataSet.toLowerCase() + PERIOD;
651 }
652
653 Document src = builder.parse(
654 new InputSource(new StringReader(metaDatastream)));
655 NodeList metaTags = src.getElementsByTagName(
656 metadataSet.toLowerCase()+COLON+METADATA);
657 // Looking for tagnames: <ex:metadata> or <dls:metadata>
658
659 for(int i = 0; i < metaTags.getLength(); i++) {
660 Element metatag = (Element)metaTags.item(i);
661
662 // extract the metadata of (name, value) pairs from src DOM
663 // look for <metadata name="name">value</metadata>
664 String name = metatag.hasAttribute(NAME) ?
665 metatag.getAttribute(NAME) : "";
666 // sometimes, there are several metadata for the same name, in this
667 // case, look for a qualifier and append its value to the name to
668 // distinguish it uniquely:
669 if(metatag.hasAttribute(QUALIFIER)) {
670 name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
671 }
672 name = namespacePrefix + name; // prefix with namespace, if any
673 if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
674 String value = FedoraCommons.getValue(metatag);
675
676 // create metadata of (name, value) pairs in target DOM (doc)
677 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
678 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
679 attribute.setValue(name);
680 metadata.setAttributeNode(attribute);
681 Text content = doc.createTextNode(value);
682 metadata.appendChild(content);
683
684 metadataList.appendChild(metadata);
685 }
686 }
687 }
688
689 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
690 * response message containing ONLY the Title metadata for the document.
691 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
692 * of an item (document) in the fedora repository, or it can be
693 * "&lt;pid&gt;-sectionNumber".
694 * @return a GS3 DocumentMetadataRetrieve response message containing the
695 * Title metadata for the requested document */
696 public String getTitleMetadata(String docID) {
697 return getTitleMetadata(new String[] { docID });
698 }
699
700 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
701 * response message containing ONLY the Title metadata for the documents.
702 * @param docIDs is a list of document identifiers (where docID can either be
703 * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
704 * "&lt;pid&gt;-sectionNumber".
705 * @return a GS3 DocumentMetadataRetrieve response message containing the
706 * Title metadata for all the requested documents */
707 public String getTitleMetadata(String[] docIDs) {
708 // Must create message of the following form:
709 // <documentNodeList><documentNode nodeID="docID">
710 // <metadataList><metadata name="Title">sometitle</metadata>
711 // </metadataList></documentNode>
712
713 Document doc = builder.newDocument();
714 FedoraGS3RunException ex = null;
715
716 Element docNodeList = doc.createElement(
717 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
718 try{
719 for(int i = 0; i < docIDs.length; i++) {
720 Element docNode = getTitleMetadata(doc, docIDs[i]);
721 docNodeList.appendChild(docNode);
722 }
723 }catch(Exception e) {
724 ex = new FedoraGS3RunException(e);
725 //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
726 ex.setSpecifics("EX metadata datastream");
727 }
728
729 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
730 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
731 try{
732 return FedoraCommons.elementToString(responseMsg);
733 } catch(TransformerException e) {
734 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
735 + " " + e;
736 }
737 }
738
739 /** Method that takes a new DOM document, as well as an identifier of either
740 * a document or document section and returns a documentNode element containing
741 * the title metadata for it:
742 * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
743 * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
744 * &lt;/metadataList&gt;&lt;/documentNode&gt;
745 * @return documentNode containing the metadata for the collection or
746 * document given by parameter ID
747 * @param docID denotes the id of a document or a document section, so id
748 * is either a document-pid or it's of the form documentpid-sectionNumber */
749 protected Element getTitleMetadata(Document doc, String docID)
750 throws RemoteException, UnsupportedEncodingException,
751 SAXException, IOException
752 {
753 // Returns a docNode element of the following form:
754 // <documentNode nodeID="docID">
755 // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
756 // </documentNode>
757
758 // <documentNode nodeID="docID">
759 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
760 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
761 attribute.setValue(docID);
762 docNode.setAttributeNode(attribute);
763
764 // <metadataList>
765 Element metaList = doc.createElement(
766 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
767 // <metadata name="Title">
768 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
769 // if we connect it all up (append children), we can immediately add
770 // the name attribute into the metadata element:
771 metaList.appendChild(metadata);
772 docNode.appendChild(metaList);
773 metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
774
775 String title = "";
776 String sectionID = getSectionIDFromDocID(docID);
777 String docPID = getDocPIDFromDocID(docID);
778
779 // check if title of toplevel document is requested
780 if(sectionID.equals(""))
781 title = this.getDocTitle(docPID);
782 else { // title of document section
783 title = this.getSectionTitle(docPID, sectionID);
784 }
785
786 metadata.appendChild(doc.createTextNode(title));
787
788 return docNode;
789 }
790
791 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
792 * containing the requested portion of the document structure of the documents
793 * indicated by docIDs:
794 * @param docID is the document identifier of the document whose hierarchical
795 * structure is requested. The name of the collection is already included in the
796 * docID for a Fedora DL.
797 * @param structure - strings specifying the required structure of the document.
798 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
799 * @param info - strings specifying the required structural info of the document.
800 * It can be any combination of: siblingPosition, numSiblings, numChildren.
801 */
802 public String getDocumentStructure(String docID, String[] structure, String[] info) {
803 return getStructure(new String[]{docID}, structure, info);
804 }
805
806
807 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
808 * containing the requested portion of the document structure of the documents
809 * indicated by docIDs:
810 * @param docIDs is an array of document identifiers of documents whose
811 * hierarchical structures are requested. The name of the collection is already
812 * included in the docID for a Fedora DL.
813 * @param structure - strings specifying the required structure of each document.
814 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
815 * @param info - strings specifying the required structural info of each document.
816 * It can be any combination of: siblingPosition, numSiblings, numChildren.
817 */
818 public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
819 return getStructure(docIDs, structure, info);
820 }
821
822 /**
823 * Returns a greenstone3 DocumentStructureRetrieve XML response message
824 * containing the document structures for the given docIDs.
825 * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
826 * greenstone formatted XML is returned. The requested section of the table
827 * of contents (TOC) for a document is converted into the greenstone3 xml
828 * format that is returned upon DocumentStructureRetrieve requests.
829 * @param docIDs the documentIDs for which the section's structure is returned;
830 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
831 * @param structure - the structure of the sections to return. Can be any combination of:
832 * ancestors, parent, siblings, children, descendants, entire.
833 * @param infos - strings containing any combination of the values: numChildren, numSiblings,
834 * siblingPosition. The requested info gets added as attributes to the returned root element.
835 * @return a greenstone3 DocumentStructureRetrieve XML response message in
836 * String format with the structure of the docIDs requested.
837 */
838 protected String getStructure(String[] docIDs, String[] structure, String[] infos)
839 {
840 Document doc = builder.newDocument();
841 FedoraGS3RunException ex = null;
842 // <documentNodeList>
843 Element docNodeList = doc.createElement(
844 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
845
846 try{
847 // append the <documentNodes> for the docIDs
848 // to the docNodeList
849 //getStructureElement(docNodeList, docIDs, levels);
850 getStructureElement(docNodeList, docIDs, structure, infos);
851 } catch(Exception e) {
852 ex = new FedoraGS3RunException(e);
853 ex.setSpecifics("(requested portion of) TOC datastream");
854 }
855 // insert our <documentNodeList> into a GS3 response message
856 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
857 GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
858 try{
859 return FedoraCommons.elementToString(responseMsg);
860 } catch(TransformerException e) {
861 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
862 + " " + e;
863 }
864 }
865
866
867 /** Given a &lt;documentNodeList&gt; portion of a greenstone3
868 * DocumentStructureRetrieve XML response message, this method will populate
869 * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
870 * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
871 * the doc structures are appended.
872 * @param docIDs the documentIDs for which the section's structure is returned;
873 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
874 * @param structures - the structure of the sections to return. Can be any combination of:
875 * ancestors, parent, siblings, children, descendants, entire.
876 * @param infos - a string containing any combination of the values: numChildren, numSiblings,
877 * siblingPosition. The requested info gets added as attributes to the returned root element.
878 */
879 protected void getStructureElement(Element docNodeList, String[] docIDs,
880 String[] structures, String[] infos)
881 throws RemoteException, UnsupportedEncodingException, SAXException,
882 IOException
883 {
884 // Make one string out of requested structure components, and one string from info components
885 String structure = "";
886 String info = "";
887 for(int i = 0; i < structures.length; i++) {
888 structure = structure + structures[i] + "|";
889 }
890 for(int i = 0; i < infos.length; i++) {
891 info = info + infos[i] + "|";
892 }
893
894 // process each docID
895 for(int i = 0; i < docIDs.length; i++) {
896 // work out the document's fedora PID and section ID
897 String sectionID = getSectionIDFromDocID(docIDs[i]);
898 String docPID = getDocPIDFromDocID(docIDs[i]);
899 if(sectionID.equals("")) {
900 sectionID = "1";
901 }
902
903 // get the required section, along with children or descendants
904 Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
905 Document doc = docNodeList.getOwnerDocument();
906
907 // copy-and-convert that structure into a structure format for GS3
908 Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
909
910 if(!info.equals("")) {
911 // <nodeStructureInfo>
912 // <info name="" value="" />
913 // <info name="" value="" />
914 // ...
915 // </nodeStructureInfo>
916 Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
917 Element root = srcDocElement.getOwnerDocument().getDocumentElement();
918
919 if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
920 String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
921 Element infoEl = doc.createElement(GSXML.INFO_ATT);
922 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
923 infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
924 nodeStructureInfo.appendChild(infoEl);
925 }
926
927 if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
928 String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
929 Element infoEl = doc.createElement(GSXML.INFO_ATT);
930 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
931 infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
932 nodeStructureInfo.appendChild(infoEl);
933 }
934
935 if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
936 String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
937 Element infoEl = doc.createElement(GSXML.INFO_ATT);
938 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
939 infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
940 nodeStructureInfo.appendChild(infoEl);
941 }
942
943 if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
944 String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
945 Element infoEl = doc.createElement(GSXML.INFO_ATT);
946 infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
947 infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
948 nodeStructureInfo.appendChild(infoEl);
949 }
950
951 docNode.appendChild(nodeStructureInfo);
952 }
953
954 // add it to our list of documentNodes
955 docNodeList.appendChild(docNode);
956 }
957 }
958
959
960 /**
961 * Takes the portion of the XML document outlining the structure of the
962 * document (section)--in the format this is stored in Fedora--and returns
963 * Greenstone 3 DOM XML format for outlining document structure.
964 * @return a &lt;documentNode&gt; element that contains a greenstone3
965 * DocumentStructureRetrieve XML corresponding to the parameter Element section
966 * (which is in fedora XML), for the document indicated by docID.
967 * @param requestingDocID is the identifier of the document for which the
968 * structure was requested. It's this document's children or descendants that
969 * will be returned. Note that this is not always the same as (clear from)
970 * parameter docID.
971 * @param docID is the documentID for which the section's structure is
972 * returned where docID = "docPID-sectionNumber".
973 * @param section - the fedora section XML that is being mirrored in
974 * greenstone3 format.
975 */
976 protected Element getStructure(Document doc, String requestingDocID,
977 String docID, Element section)
978 {
979 // we want to mirror the section's DOM (given in fedora XML) in
980 // greenstone3's XML for a DocumentStructureRetrieve response.
981
982 // <documentNode nodeID="docID"> - the docNode on which a structure retrieve
983 // is being performed
984 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
985 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
986 attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
987 docNode.setAttributeNode(attribute);
988
989 // <nodeStructure>
990 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
991
992 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
993 Element rootNode = createDocNodeFromSubsection(doc, section, docID);
994
995 // fills in the subtree of the rootNode in our nodeStructure element
996 createDocStructure(doc, section, rootNode, docID);
997 //where section represents the root section
998
999 nodeStructure.appendChild(rootNode);
1000 docNode.appendChild(nodeStructure);
1001 return docNode;
1002 }
1003
1004
1005 /** Recursive method that creates a documentStructure mirroring parameter
1006 * section, starting from parameter parent down to all descendants
1007 * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
1008 * for the docPID whose substructure is to be mirrored
1009 * @param parent is the XML documentNode in the greenstone repository whose
1010 * descendants created by this method will correspond to the descendants of
1011 * parameter section.
1012 * @param doc is the document containing the parent;
1013 * @param docPID is the prefix of all nodeIDs in the parent's structure
1014 */
1015 protected void createDocStructure(
1016 Document doc, Element section, Element parent, String docPID)
1017 {
1018 // get the section's children (if any)
1019 NodeList children = section.getChildNodes();
1020 for(int i = 0; i < children.getLength(); i++) {
1021 Node n = children.item(i);
1022
1023 if(n.getNodeName().equals(SECTION_ELEMENT)) {
1024 //then we know it's an element AND that its tagname is "Section"
1025 Element subsection = (Element)n;
1026 Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1027 parent.appendChild(child);
1028
1029 // recursion call on newly found child-element and subsection
1030 createDocStructure(doc, subsection, child, docPID);
1031 }
1032 }
1033 }
1034
1035 /** Given a particular subsection element, this method creates a
1036 * Greenstone3 DocumentNode element that mirrors it.
1037 * @param doc is the document that will contain the created DocumentNode
1038 * @param docID is the prefix of all nodeIDs in the parent's structure
1039 * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1040 * TOC for the docPID which will be mirrored in the greenstone XML
1041 * documentNode that will be returned.
1042 * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1043 * &lt;Section&gt; element passed as parameter subSection. */
1044 protected Element createDocNodeFromSubsection(
1045 Document doc, Element subSection, String docID)
1046 {
1047 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1048 Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1049 docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1050 docNode.setAttributeNode(docType);
1051
1052 Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1053 String sectionID = subSection.hasAttribute(ID) ?
1054 subSection.getAttribute(ID) : "";
1055 if(sectionID.equals("1")
1056 && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1057 // reset the attribute without the section number (just "docID" may be important for democlient?)
1058 nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1059 } else {
1060 nodeID.setValue(docID + HYPHEN + sectionID);
1061 }
1062 //nodeID.setValue(docID + HYPHEN + sectionID);
1063 docNode.setAttributeNode(nodeID);
1064
1065 Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1066 if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1067 nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1068 }
1069 docNode.setAttributeNode(nodeType);
1070 return docNode;
1071 }
1072
1073
1074 /** Given an identifier that is either a docPID or a concatenation of
1075 * docPID+sectionID, this method works out the fedora assigned docPID and
1076 * sectionID and then calls getContentBody(docPID, sectionID) with those.
1077 * @param docID is expected to be of the form
1078 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1079 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1080 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1081 * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1082 * is returned! */
1083 public String getContent(String docID) {
1084 return this.getContent(new String[]{docID});
1085 }
1086
1087 /** Given an identifier that is a concatenation of docID+sectionID, this
1088 * method works out the fedora assigned docPID and sectionID and then calls
1089 * getContentBody(docPID, sectionID) with those.
1090 * @param docIDs is an array of document identifiers of the form
1091 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1092 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1093 * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1094 public String getContent(String[] docIDs) {
1095 Document doc = builder.newDocument();
1096 FedoraGS3RunException ex = null;
1097
1098 //<documentNodeList>
1099 Element docNodeList = doc.createElement(
1100 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1101
1102 try{
1103 for(int i = 0; i < docIDs.length; i++) {
1104 // get the sectionID and docPID from the docID
1105 String sectionID = this.removePrefix(
1106 getSectionIDFromDocID(docIDs[i]), SECTION);
1107 String docPID = getDocPIDFromDocID(docIDs[i]);
1108 if(sectionID.equals("")) // if no section is specified, get
1109 sectionID = "1"; // get the content for Section id="1"
1110
1111 // Get the contents for the requested section of document docPID
1112 String sectionContent = this.getContentBody(docPID, sectionID);
1113
1114 // set the nodeID attribute
1115 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1116 Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1117
1118 nodeId.setValue(docIDs[i]); // just set the docID which will contain
1119 // the docPID (and sectionID if already present)
1120
1121 docNode.setAttributeNode(nodeId);
1122 // set the text content to what was retrieved
1123 Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1124 Text textNode = doc.createTextNode(sectionContent.trim());
1125
1126 nodeContent.appendChild(textNode);
1127 docNode.appendChild(nodeContent);
1128 //add the documentNode to the docNodeList
1129 docNodeList.appendChild(docNode);
1130 }
1131 } catch(Exception e) {
1132 ex = new FedoraGS3RunException(e);
1133 ex.setSpecifics("requested doc Section datastream");
1134 }
1135 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1136 GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1137 try{
1138 return FedoraCommons.elementToString(responseMsg);
1139 } catch(TransformerException e) {
1140 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1141 + " " + e;
1142 }
1143 }
1144
1145 /** Gets the contents of a textNode from a section.
1146 * @return the text content of a section.
1147 * @param docPID the pid of the document from which a section's text is to
1148 * be retrieved.
1149 * @param sectionID is the section identifier of the document denoted by
1150 * docPID whose text is to be returned.
1151 */
1152 protected String getContentBody(String docPID, String sectionID)
1153 throws RemoteException, UnsupportedEncodingException,
1154 SAXException, IOException
1155 {
1156 String section = this.getSection(docPID, sectionID);
1157
1158 // the content is nested inside a <Section> element,
1159 // we extract it from there:
1160 InputSource source = new InputSource(new StringReader(section));
1161 Document doc = builder.parse(source);
1162
1163 // The document Element is the <Section> we want.
1164 // Get its text contents:
1165 section = FedoraCommons.getValue(doc.getDocumentElement());
1166
1167 // we are going to remove all occurrences of "_httpdocimg_/"
1168 // that precede associated filenames, because that's a GS3
1169 // defined macro for resolving relative urls. It won't help
1170 // with documents stored in fedora.
1171 section = section.replaceAll(GS3FilePathMacro+"/", "");
1172 return section;
1173 }
1174
1175 /** Here we create the greenstone's response message element:
1176 * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1177 * @return a greenstone response-message element.
1178 * @param doc - the Document object which should me used to create the
1179 * &lt;message&gt; and &lt;response&gt; elements
1180 * @param content - the element that is to be nested inside &lt;response&gt;
1181 * @param ex - any exception that occurred when trying to create
1182 * the content parameter
1183 * @param responseType - the value for the type attribute of &lt;response&gt;,
1184 * such as "describe", "retrieve", "browse", "query"...
1185 * @param originator - indiates the collectionName or service (like
1186 * DocumentContentRetrieve) from where this response message originates
1187 */
1188 protected Element createResponseMessage(Document doc, Element content,
1189 Exception ex, String responseType, String originator)
1190 {
1191 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1192 // from = "FedoraGS3"
1193 Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1194 attribute.setValue(originator);
1195 response.setAttributeNode(attribute);
1196
1197 // type = "describe" or "process" - whatever's given in requestType:
1198 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1199 attribute.setValue(responseType);
1200 response.setAttributeNode(attribute);
1201
1202 if(content != null)
1203 response.appendChild(content);
1204
1205 // we'll create an error element for RemoteExceptions (web service problems)
1206 // and UnsupportedEncodingExceptions and
1207 if(ex != null) {
1208 Element error = doc.createElement(GSXML.ERROR_ELEM);
1209 error.appendChild(doc.createTextNode(ex.getMessage()));
1210 // now append the error to the <response> element (after
1211 // the content element whatever that was)
1212 response.appendChild(error);
1213 }
1214
1215 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1216 message.appendChild(response);
1217 doc.appendChild(message);
1218 return message;
1219 }
1220
1221 /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1222 * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1223 * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1224 * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1225 * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1226 * @param doc - the Document object which should me used to create the
1227 * &lt;serviceList&gt; element */
1228 protected Element createServiceList(Document doc)
1229 {
1230 Element serviceList = doc.createElement(
1231 GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1232
1233 for(int i = 0; i < serviceNames.length; i++) {
1234 // create the <service name="serviceName[i]" type="servicetype" />
1235 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1236
1237 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1238 attribute.setValue(serviceNames[i]);
1239 service.setAttributeNode(attribute);
1240
1241 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1242 if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1243 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1244 else if(serviceNames[i].contains("Query")) // search services
1245 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1246 else
1247 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1248 service.setAttributeNode(attribute);
1249
1250 // add the service element to the serviceList element
1251 // <serviceList><service /></serviceList>
1252 serviceList.appendChild(service);
1253 }
1254 return serviceList;
1255 }
1256
1257 /** @return a GS3 response message for a describe services request:
1258 * indicating the list of services supported by the Fedora-Greenstone
1259 * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1260 * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1261 * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1262 * serviceNames. */
1263 public String getServiceList()
1264 {
1265 Document doc = builder.newDocument();
1266 Element serviceList = createServiceList(doc);
1267 // make <serviceList> the body of the responseMessage:
1268 // <message><response><serviceList></response></message>
1269 Element responseMsg = createResponseMessage(doc, serviceList, null,
1270 GSXML.REQUEST_TYPE_DESCRIBE, "");
1271 try {
1272 return FedoraCommons.elementToString(responseMsg);
1273 }catch(TransformerException e) {
1274 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1275 + " " + e;
1276 }
1277 }
1278
1279 /** @return a GS3 describe response message listing the collections and
1280 * collection-specific metadata stored in the Fedora-Greenstone repository. */
1281 public String getCollectionList()
1282 {
1283 Document doc = builder.newDocument();
1284 FedoraGS3RunException ex = null; // any RemoteException
1285
1286 // create the <collectionList /> element
1287 Element collectionList = doc.createElement(
1288 GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1289 try{
1290 String[] collectionNames = this.getCollectionNames(
1291 this.getCollections()); // this line could throw RemoteException
1292 for(int i = 0; i < collectionNames.length; i++) {
1293 // create the <collection name="somename" /> element
1294 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1295 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1296 attribute.setValue(collectionNames[i]);
1297 collection.setAttributeNode(attribute);
1298
1299 // append the <collection> element as child of <collectionList>
1300 collectionList.appendChild(collection);
1301
1302 //if(collection.hasAttribute(GSXML.NAME_ATT))
1303 //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1304 }
1305 } catch(RemoteException e) { // if this happens, perhaps it's because it
1306 // can't find Greenstone collections in fedora repository?
1307 ex = new FedoraGS3RunException(e);
1308 ex.setSpecifics(
1309 "greenstone collections in fedora repository");
1310 }
1311
1312 // make <collectionList> the body of the responseMessage:
1313 // <message><response><collectionList></response></message>
1314 Element responseMsg = createResponseMessage(doc, collectionList, ex,
1315 GSXML.REQUEST_TYPE_DESCRIBE, "");
1316 try{
1317 return FedoraCommons.elementToString(responseMsg);
1318 }catch(TransformerException e) {
1319 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1320 + " " + e;
1321 }
1322 }
1323
1324 /** @return a GS3 describe response message for a collection in the
1325 * Fedora-Greenstone repository.
1326 * @param collectionName - the name of the collection that is to be described.
1327 * It will be converted to a fedora collection pid, which is of the form
1328 * "greenstone:&lt;collectionName&gt;-collection". */
1329 public String describeCollection(String collectionName)
1330 {
1331 Document doc = builder.newDocument();
1332 FedoraGS3RunException ex = null;
1333
1334 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1335 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1336 attribute.setValue(collectionName);
1337 collection.setAttributeNode(attribute);
1338
1339 //<displayItem assigned="true" lang="en" name="name">
1340 //"some display name"</displayItem>
1341 Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1342
1343 attribute = doc.createAttribute(GSXML.LANG_ATT);
1344 attribute.setValue(this.lang);
1345 displayItem.setAttributeNode(attribute);
1346
1347 attribute = doc.createAttribute(GSXML.NAME_ATT);
1348 attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1349 displayItem.setAttributeNode(attribute);
1350
1351 try{
1352 Text textNode = doc.createTextNode(
1353 this.getCollectionTitle(getCollectionPID(collectionName)));
1354 displayItem.appendChild(textNode);
1355 } catch(Exception e) {
1356 // can't find Greenstone collections in fedora repository or problem
1357 // getting their titles from their metadata datastream?
1358 ex = new FedoraGS3RunException(e);
1359 ex.setSpecifics("greenstone collections or their metadata"
1360 + "in the fedora repository");
1361 }
1362 // now append the displayItem element as child of the collection element
1363 collection.appendChild(displayItem);
1364 // get the <serviceList> and add it into the collection description.
1365 // Services for all collections in the FedoraGS3 repository are the
1366 // same, offering a ClassifierBrowse to browse titles by starting letter
1367 // and DocRetrieve services: Content, Metadata and Structure.
1368
1369 Element serviceList = createServiceList(doc);
1370 collection.appendChild(serviceList);
1371
1372 Element responseMsg = createResponseMessage(doc, collection, ex,
1373 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1374 try{
1375 return FedoraCommons.elementToString(responseMsg);
1376 }catch(TransformerException e) {
1377 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1378 + " " + e;
1379 }
1380 }
1381
1382 /** @return a GS3 describe response message for the services of a collection
1383 * in the Fedora-Greenstone repository. So far, these services are the same for
1384 * all fedora collections: they are the services given in member variable
1385 * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1386 * ClassifierBrowseMetadataRetrieve.
1387 * All collections in this Digital Library (Fedora Repository) share the
1388 * same services, so this method returns the same services as getServiceList();
1389 * @param collectionName - the name of the collection whose services are to
1390 * be described. It will be converted to a fedora collection pid, which is of
1391 * the form "greenstone:&lt;collectionName&gt;-collection". */
1392 public String describeCollectionServices(String collectionName)
1393 {
1394 Document doc = builder.newDocument();
1395
1396 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1397 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1398 attribute.setValue(collectionName);
1399 collection.setAttributeNode(attribute);
1400
1401 Element serviceList = createServiceList(doc);
1402 collection.appendChild(serviceList);
1403
1404 Element responseMsg = createResponseMessage(doc, collection, null,
1405 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1406 try{
1407 return FedoraCommons.elementToString(responseMsg);
1408 }catch(TransformerException e) {
1409 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1410 + " " + e;
1411 }
1412 }
1413
1414 /** All collections in this Digital Library (Fedora Repository) share
1415 * the same services, so this method returns the same as
1416 * describeCollectionService(collName, serviceName).
1417 * @return a GS3 describe response message for the requested service
1418 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1419 * return nothing special except their names; browse (and any query)
1420 * return more complex XML responses.
1421 * @param serviceName - the name of the service in the collection which is to
1422 * be described.*/
1423 public String describeService(String serviceName)
1424 {
1425 // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1426 // we return:
1427 // <message><response from="<name>Retrieve" type="describe">
1428 // <service name="<name>Retrieve" type="retrieve" /></response></message>
1429 // But for browse (and any query) service, we return the data necessary
1430 // for displaying it
1431
1432 Document doc = this.builder.newDocument();
1433 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1434 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1435 attribute.setValue(serviceName);
1436 service.setAttributeNode(attribute);
1437
1438 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1439
1440 if(serviceName.toLowerCase().endsWith("retrieve")) {
1441 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1442 }
1443 else if(serviceName.toLowerCase().contains("browse")) {
1444 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1445
1446 // we need name and description <displayItem> elements
1447 Element displayItem
1448 = createNameValuePairElement(doc,
1449 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1450 service.appendChild(displayItem);
1451
1452 displayItem = createNameValuePairElement(doc,
1453 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1454 "Browse pre-defined classification hierarchies");
1455 service.appendChild(displayItem);
1456
1457 // now need a classifierList
1458 Element classifierList = doc.createElement(
1459 GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1460
1461 int classifierNum = 1;
1462 // append a <classifier content="some letter" name="CL#">
1463 // for each letter of the alphabet:
1464 Element classifier = createClassifierElement(doc, "TitleByLetter",
1465 classifierNum++, "titles by letter", "Browse titles by letter");
1466 // now add this <classifier> to the <classifierList>
1467 classifierList.appendChild(classifier);
1468
1469 // ANY MORE CLASSIFIERS? ADD THEM HERE
1470
1471 service.appendChild(classifierList);
1472 } // ELSE check for whether it is a query service
1473 else if(serviceName.toLowerCase().contains("query")) {
1474 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1475 if(serviceName.equals("TextQuery")) {
1476 describeTextQueryService(service);
1477 } else if(serviceName.equals("FieldQuery")) {
1478 describeFieldQueryService(service);
1479 }
1480 }
1481
1482 // don't forget to add the type attribute to the service!
1483 service.setAttributeNode(attribute);
1484
1485 String from = serviceName;
1486
1487 Element responseMsg = createResponseMessage(doc, service, null,
1488 GSXML.REQUEST_TYPE_DESCRIBE, from);
1489 try{
1490 return FedoraCommons.elementToString(responseMsg);
1491 }catch(TransformerException e) {
1492 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1493 + " " + e;
1494 }
1495 }
1496
1497 /** Appends children to the parameter service Element that make the
1498 * final service Element into a describe response XML for FedoraGS3's
1499 * TextQuery service.
1500 * @param service is the service Element that is being filled out. */
1501 protected void describeTextQueryService(Element service) {
1502 Document doc = service.getOwnerDocument();
1503 // we need name, submit (button) and description <displayItem> elements
1504 Element displayItem = createNameValuePairElement(doc,
1505 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1506 "Text Search");
1507 service.appendChild(displayItem);
1508
1509 displayItem = createNameValuePairElement(doc,
1510 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1511 service.appendChild(displayItem);
1512
1513 displayItem = createNameValuePairElement(doc,
1514 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1515 "Title and full-text search service");
1516 service.appendChild(displayItem);
1517
1518 //create the <paramList>
1519 Element paramList = doc.createElement(
1520 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1521
1522 // we ignore granularity to search at: it will always be
1523 // document and section level
1524 // we ignore casefolding: always on (that is, case is irrelevant)
1525 // we ignore document display order: always ranked
1526
1527 // Constructing the following:
1528 // <param default="100" name="maxDocs" type="integer">
1529 // <displayItem name="name">Maximum hits to return</displayItem>
1530 // </param>
1531 Element param = doc.createElement(GSXML.PARAM_ELEM);
1532
1533 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1534 attribute.setValue(MAXDOCS);
1535 param.setAttributeNode(attribute);
1536
1537 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1538 attribute.setValue("100");
1539 param.setAttributeNode(attribute);
1540
1541 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1542 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1543 param.setAttributeNode(attribute);
1544
1545 displayItem = createNameValuePairElement(doc,
1546 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1547 "Maximum hits to return");
1548 param.appendChild(displayItem);
1549
1550 paramList.appendChild(param);
1551
1552 // Constructing the following:
1553 // <param name="query" type="string">
1554 // <displayItem name="name">Query string</displayItem>
1555 // </param>
1556 param = doc.createElement(GSXML.PARAM_ELEM);
1557
1558 attribute = doc.createAttribute(GSXML.NAME_ATT);
1559 attribute.setValue(QUERY);
1560 param.setAttributeNode(attribute);
1561
1562 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1563 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1564 param.setAttributeNode(attribute);
1565
1566 displayItem = createNameValuePairElement(doc,
1567 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1568 "Query string");
1569 param.appendChild(displayItem);
1570
1571 paramList.appendChild(param);
1572
1573 service.appendChild(paramList);
1574 }
1575
1576 /** Appends children to the parameter service Element that make the
1577 * final service Element into a describe response XML for FedoraGS3's
1578 * FieldQuery service.
1579 * @param service is the service Element that is being filled out. */
1580 protected void describeFieldQueryService(Element service) {
1581 Document doc = service.getOwnerDocument();
1582 // we need name, submit (button) and description <displayItem> elements
1583 Element displayItem = createNameValuePairElement(doc,
1584 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1585 "Form Search");
1586 service.appendChild(displayItem);
1587
1588 displayItem = createNameValuePairElement(doc,
1589 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1590 service.appendChild(displayItem);
1591
1592 displayItem = createNameValuePairElement(doc,
1593 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1594 "Simple fielded search");
1595 service.appendChild(displayItem);
1596
1597 //create the <paramList>
1598 Element paramList = doc.createElement(
1599 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1600
1601 // we ignore granularity to search at: it will always be
1602 // document and section level
1603 // we ignore casefolding: always on (that is, case is irrelevant)
1604 // we ignore document display order: always ranked
1605
1606 // Constructing the following:
1607 // <param default="100" name="maxDocs" type="integer">
1608 // <displayItem name="name">Maximum hits to return</displayItem>
1609 // </param>
1610 Element param = doc.createElement(GSXML.PARAM_ELEM);
1611
1612 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1613 attribute.setValue(MAXDOCS);
1614 param.setAttributeNode(attribute);
1615
1616 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1617 attribute.setValue("100");
1618 param.setAttributeNode(attribute);
1619
1620 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1621 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1622 param.setAttributeNode(attribute);
1623
1624 displayItem = createNameValuePairElement(doc,
1625 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1626 "Maximum hits to return");
1627 param.appendChild(displayItem);
1628
1629 paramList.appendChild(param);
1630
1631 // Constructing the following:
1632 // <param name="simpleField" occurs="4" type="multi">
1633 // <displayItem name="name"></displayItem>
1634 //
1635 // <param name="query" type="string">
1636 // <displayItem name="name">Word or phrase </displayItem>
1637 // </param>
1638 //
1639 // <param default="allFields" name="fieldname" type="enum_single">
1640 // <displayItem name="name">in field</displayItem>
1641 //
1642 // <option name="docTitles">
1643 // <displayItem name="name">document titles</displayItem>
1644 // </option>
1645 // <option name="allTitles">
1646 // <displayItem name="name">document and section titles</displayItem>
1647 // </option>
1648 // <option name="fullText">
1649 // <displayItem name="name">full text</displayItem>
1650 // </option>
1651 // <option name="all">
1652 // <displayItem name="name">titles and full text</displayItem>
1653 // </option>
1654 // <option name="">
1655 // <displayItem name="name"></displayItem>
1656 // </option>
1657 // </param>
1658 // </param>
1659 Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1660 attribute = doc.createAttribute(GSXML.NAME_ATT);
1661 attribute.setValue(SIMPLEFIELD_ATT);
1662 rowOfParams.setAttributeNode(attribute);
1663
1664 // we want the row of controls to occur multiple times
1665 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1666 attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1667 rowOfParams.setAttributeNode(attribute);
1668
1669 attribute = doc.createAttribute(OCCURS_ATT);
1670 attribute.setValue("4"); // we want this row to occur 4 times
1671 rowOfParams.setAttributeNode(attribute);
1672
1673 // <param name="query" type="string">
1674 // <displayItem name="name">Word or phrase </displayItem>
1675 // </param>
1676 param = doc.createElement(GSXML.PARAM_ELEM);
1677
1678 attribute = doc.createAttribute(GSXML.NAME_ATT);
1679 attribute.setValue(QUERY);
1680 param.setAttributeNode(attribute);
1681
1682 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1683 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1684 param.setAttributeNode(attribute);
1685
1686 displayItem = createNameValuePairElement(doc,
1687 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1688 "Word or phrase");
1689 param.appendChild(displayItem);
1690 rowOfParams.appendChild(param);
1691
1692 // <param default="allFields" name="fieldName" type="enum_single">
1693 // <displayItem name="name">in field</displayItem>
1694 param = doc.createElement(GSXML.PARAM_ELEM);
1695 attribute = doc.createAttribute(GSXML.NAME_ATT);
1696 attribute.setValue(FIELDNAME_ATT);
1697 param.setAttributeNode(attribute);
1698
1699 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1700 attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1701 param.setAttributeNode(attribute);
1702
1703 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1704 attribute.setValue(ALL_FIELDS);
1705 param.setAttributeNode(attribute);
1706
1707 displayItem = createNameValuePairElement(doc,
1708 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1709 "in field");
1710 param.appendChild(displayItem);
1711
1712 String[] searchFieldNames
1713 = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1714 String[] searchFieldDisplay = {"all titles and full-text",
1715 "document titles only", "document and section titles",
1716 "full-text only"};
1717
1718 // for each fieldName create an option element and insert
1719 // the option into the enum_multi drop-down param:
1720 // <option name="fieldName">
1721 // <displayItem name="name">fieldName</displayItem>
1722 // </option>
1723 for(int i = 0; i < searchFieldNames.length; i++) {
1724 Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1725 attribute = doc.createAttribute(GSXML.NAME_ATT);
1726 attribute.setValue(searchFieldNames[i]);
1727 option.setAttributeNode(attribute);
1728
1729 displayItem = createNameValuePairElement(doc,
1730 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1731 searchFieldDisplay[i]);
1732 option.appendChild(displayItem);
1733 param.appendChild(option); // add option to the drop-down box
1734 }
1735
1736 rowOfParams.appendChild(param);
1737 paramList.appendChild(rowOfParams);
1738 service.appendChild(paramList);
1739 }
1740
1741 /**
1742 * @return a GS3 describe response message for the requested service
1743 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1744 * return nothing special except their names; browse (and any query)
1745 * return more complex XML responses.
1746 * All collections in this Digital Library (Fedora Repository) share
1747 * the same services, so this method returns the same as
1748 * describeService(serviceName).
1749 * @param collectionName - the name of the collection whose service is to
1750 * be described. It will be converted to a fedora collection pid, which is of
1751 * the form "greenstone:&lt;collectionName&gt;-collection".
1752 * @param serviceName - the name of the service in the collection which is to
1753 * be described. */
1754 public String describeCollectionService(String collectionName,
1755 String serviceName) {
1756 // collectionName can be ignored, because all services are FedoraGS3
1757 // services and are not unique to any particular (greenstone) collection.
1758 return describeService(serviceName);
1759 }
1760
1761 /** This method performs the implemented browse operation: allowing the
1762 * user to browse the titles of documents in the given collection by letter
1763 * and returning the results.
1764 * @param collectionName is the name of the collection whose documents
1765 * starting with the given letter will be returned.
1766 * @param classifierIDs are the ids of the classifiers on which to browse. In
1767 * this case, the classifier indicates whether we browse titles by letter, or
1768 * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
1769 * @param structures - the requested browse substructure. Can be any combination
1770 * of ancestors, parent, siblings, children, descendants.
1771 * @param infos - the requested structural info. Can be numSiblings,
1772 * siblingPosition, numChildren.
1773 * @return a GS3 ClassifierBrowse response message which lists all
1774 * the documents that start with the letter indicated by parameter classifier.
1775 */
1776 public String browse(String collectionName, String[] classifierIDs,
1777 String[] structures, String[] infos)
1778 {
1779 // Construct one string from the structures and structural info arrays
1780 String structure = "";
1781 String info = "";
1782 for(int i = 0; i < structures.length; i++) {
1783 structure = structure + structures[i] + "|";
1784 }
1785 for(int i = 0; i < infos.length; i++) {
1786 info = info + infos[i] + "|";
1787 }
1788
1789 Document doc = builder.newDocument();
1790 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1791
1792 // <classifierNodeList>
1793 Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1794
1795 for(int i = 0; i < classifierIDs.length; i++) {
1796 if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1797 browseTitlesByLetterClassifier(doc, classifierNodeList,
1798 collectionName, classifierIDs[i],
1799 structure, info);
1800 }
1801 }
1802
1803 Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1804 GSXML.REQUEST_TYPE_PROCESS, /*collectionName+/ */"ClassifierBrowse");
1805 try {
1806 return FedoraCommons.elementToString(responseMsg);
1807 } catch(TransformerException e) {
1808 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1809 + " " + e;
1810 }
1811 }
1812
1813 /** CL1 browsing classifier: browsing titles by starting letter.
1814 * The browsing structure is retrieved.
1815 * @param doc - the document object that will contain the CL1 browsing structure.
1816 * @param classifierNodeList - the classifiers will be added to this nodeList.
1817 * @param collectionName - name of the collection through which we are browsing CL1.
1818 * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1819 * a letter.
1820 * @param structure - the requested browse substructure. Can be any combination of
1821 * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1822 * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1823 * numChildren.
1824 * @return the classifierNodeList with the CL1 classifier browse structure.
1825 */
1826 public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1827 String collectionName, String classifierID,
1828 String structure, String info)
1829 {
1830 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1831
1832 if(structure.indexOf("entire") != -1) {
1833 structure = structure + "ancestors|descendants";
1834 }
1835
1836 // Structure of ancestors and children only at this stage
1837 int firstLevel = classifierID.indexOf('.');
1838 int secondLevel = classifierID.lastIndexOf('.');
1839
1840 // <nodeStructure>
1841 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1842
1843 // requested classifier node
1844 Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1845 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1846 attribute.setValue(classifierID);
1847 classNode.setAttributeNode(attribute);
1848
1849 if(firstLevel == -1) { // CL1 - toplevel node
1850 Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1851
1852 classifierNodeList.appendChild(classNode);
1853 classNode.appendChild(nodeStructure);
1854 nodeStructure.appendChild(root);
1855 root.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1856
1857 if(structure.indexOf("descendants") != -1) {
1858 getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1859 } else if(structure.indexOf("children") != -1) {
1860 getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1861 }
1862 // nothing to be done for siblings
1863 }
1864 else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1865
1866 if(structure.indexOf("parent") != -1
1867 || structure.indexOf("ancestors") != -1
1868 || structure.indexOf("siblings") != -1) {
1869 String toplevelID = classifierID.substring(0, firstLevel);
1870 Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1871 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1872 attribute.setValue(toplevelID);
1873 toplevelNode.setAttributeNode(attribute);
1874 Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1875
1876 classifierNodeList.appendChild(toplevelNode);
1877 toplevelNode.appendChild(nodeStructure);
1878 nodeStructure.appendChild(node);
1879 node.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1880
1881 if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1882 getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1883 // pass the requested node (classNode) so that it is attached in the correct
1884 // location among its siblings, and to ensure that it is not recreated.
1885 // getTitlesByLetterStructure() will append classNode to node
1886 } else {
1887 node.appendChild(classNode);
1888 classNode.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1889 }
1890 } else {
1891 Element node = (Element)classNode.cloneNode(true);
1892 classifierNodeList.appendChild(node);
1893 node.appendChild(nodeStructure);
1894 nodeStructure.appendChild(classNode);
1895 classNode.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1896 }
1897
1898 int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1899 char ch = (char)(num - 1 + 'A');
1900 if(structure.indexOf("descendants") != -1) {
1901 getTitlesForLetter(ch, collectionName, classNode, "descendants");
1902 } else if(structure.indexOf("children") != -1) {
1903 getTitlesForLetter(ch, collectionName, classNode, "children");
1904 }
1905 }
1906 else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1907 LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1908 }
1909
1910 return classifierNodeList;
1911 }
1912
1913 /** Creates a (CL1) subclassifier element for the docs whose titles start with
1914 * the given letter.
1915 * @param ch - the starting letter of the document titles to retrieve.
1916 * @param collectionName - name of the collection through which we are browsing CL1.
1917 * @param classifierNode - the docNodes found will be appended to this node.
1918 * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1919 * gets descendants of any documents found, otherwise gets just the children.
1920 * @return the given classifierNode which will have the child (or descendant) documents
1921 * appended to it.
1922 */
1923 public Element getTitlesForLetter(char ch, String collectionName,
1924 Element classifierNode, String depthStructure)
1925 {
1926 Document doc = classifierNode.getOwnerDocument();
1927 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1928
1929
1930 // Retrieve the document structure for each subClassifierID:
1931 // all the documents that begin with its letter.
1932 String letter = String.valueOf(ch);
1933 try {
1934 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1935 if(docPIDs.length == 0) {
1936 return classifierNode; // skip letters that don't have any kids
1937 }
1938
1939 for(int i = 0; i < docPIDs.length; i++) {
1940 // work out the document's fedora PID and section ID
1941 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1942 String docPID = getDocPIDFromDocID(docPIDs[i]);
1943
1944 // get the required section, along with children or descendants
1945 Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1946
1947 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1948 Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1949
1950 // fills in the subtree of the rootNode in our nodeStructure element
1951 createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1952 classifierNode.appendChild(docRootNode);
1953 }
1954 } catch(Exception e) {
1955 ex = new FedoraGS3RunException(e);
1956 ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1957 }
1958
1959 return classifierNode;
1960 }
1961
1962
1963 /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1964 * starting letter of the alphabet. X is each letter of the alphabet for which there
1965 * are matching document titles.
1966 * @param collectionName - name of the collection through which we are browsing CL1.
1967 * @param classifierNode - the docNodes found will be appended to this node.
1968 * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1969 * the IDs for the subclassifiers (CL.x).
1970 * @param getDescendants - if true, get descendants of any documents found, otherwise
1971 * get just the children.
1972 * @param wantedSibling - the node (already created) whose siblings are requested. We
1973 * need to make sure not to recreate this node when creating its sibling nodes.
1974 * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1975 * the alphabet that are represented in the document titles.
1976 */
1977 public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1978 String classifierID, boolean getDescendants,
1979 Element wantedSibling)
1980 {
1981 String ID = "";
1982 if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1983 ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1984 }
1985
1986 Document doc = classifierNode.getOwnerDocument();
1987 FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1988
1989 // We're going to loop to the end of the alphabet
1990 int count = 1;
1991 for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1992 // Retrieve the document structure for each subClassifierID:
1993 // all the documents that begin with its letter.
1994 String letter = String.valueOf(ch);
1995 try {
1996 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1997 if(docPIDs.length == 0) {
1998 continue; // skip letters that don't have any kids
1999 }
2000 Element subClassifier = null;
2001 if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
2002 // already have the requested node, don't recreate it
2003 subClassifier = wantedSibling;
2004 } else {
2005 // <classifierNode childType="VList" nodeID="CL1.x">
2006 subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
2007 subClassifier.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
2008 subClassifier.setAttribute(GSXML.NODE_ID_ATT, classifierID+"."+count);
2009 subClassifier.setAttribute(GSXML.CLASSIFIER_STYLE_ATT, GSXML.VLIST);
2010 }
2011 classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
2012
2013 if(getDescendants) { // get the documents
2014
2015 // append the <docNodes> for the docPIDs found as children
2016 // of subclassifier
2017
2018 for(int i = 0; i < docPIDs.length; i++) {
2019 // work out the document's fedora PID and section ID
2020 String sectionID = getSectionIDFromDocID(docPIDs[i]);
2021 String docPID = getDocPIDFromDocID(docPIDs[i]);
2022
2023 // get the required section, along with children or descendants
2024 Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2025
2026 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2027 Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2028
2029 // fills in the subtree of the rootNode in our nodeStructure element
2030 createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2031 subClassifier.appendChild(rootNode);
2032 }
2033 }
2034 } catch(Exception e) {
2035 ex = new FedoraGS3RunException(e);
2036 ex.setSpecifics("requested portion of TOC file or "
2037 + "trouble with fielded search ");
2038 }
2039 }
2040 return classifierNode;
2041 }
2042
2043
2044 /** This method performs something equivalent to a greenstone3
2045 * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2046 * @param classNodeIDs array of classifierNode IDs for which the metadata
2047 * needs to be returned.
2048 * @param metafields are the classifier metadata fields that are to be returned.
2049 * At present this method ignores them/pretends the requested metafields are
2050 * "all" and always returns the Title meta for the requested classifier nodes
2051 * (because that is all the metadata this Fedora classifier has at present).
2052 * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2053 * lists the metadata for all the classifierNodes passed as parameter.*/
2054 public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2055 {
2056 Document doc = this.builder.newDocument();
2057 // <classifierNodeList>
2058 Element classifierNodeList = doc.createElement(
2059 GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2060
2061 // create <classifierNode><metadataList><metadata>s
2062 // </metadataList></classifierNode> for all letters of the alphabet
2063 for(int i = 0; i < classNodeIDs.length; i++) {
2064 // strip ID of everything before the first '.' (i.e. remove "CL#.")
2065 int index = classNodeIDs[i].indexOf('.');
2066 String subClassifierNumber = classNodeIDs[i].substring(index+1);
2067 index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2068 if(index != -1) {
2069 subClassifierNumber = subClassifierNumber.substring(0, index);
2070 }
2071 int subClassifierNum = Integer.parseInt(subClassifierNumber);
2072 String classifierName = "";
2073 if(subClassifierNum == 0) { // no document titles started with a letter
2074 classifierName = "A-Z";
2075 } else {
2076 char letter = (char)('A' + subClassifierNum - 1); // A = 1
2077 classifierName = String.valueOf(letter);
2078 }
2079
2080 // <classifierNode nodeID="CL#.subNum">
2081 Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2082 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2083 attribute.setValue(classNodeIDs[i]);
2084 classifierNode.setAttributeNode(attribute);
2085
2086 // <metadataList>
2087 Element metadataList = doc.createElement(
2088 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2089
2090 // at least one metadata element: that of the title of this
2091 // classifierNode:
2092 // <metadata name="Title">letter</metadata>
2093 Element metadata = this.createNameValuePairElement(doc,
2094 GSXML.METADATA_ELEM, "Title", classifierName);
2095
2096 // now connect up everything
2097 metadataList.appendChild(metadata);
2098 classifierNode.appendChild(metadataList);
2099 classifierNodeList.appendChild(classifierNode);
2100 }
2101
2102 Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2103 GSXML.REQUEST_TYPE_PROCESS, //collName +
2104 "ClassifierBrowseMetadataRetrieve");
2105 try{
2106 return FedoraCommons.elementToString(responseMsg);
2107 }catch(TransformerException e) {
2108 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2109 + " " + e;
2110 }
2111 }
2112
2113 /** @return a newly created element of the following format:
2114 * &lt;classifier content="somecontent" name="CL+num"&gt;
2115 * &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2116 * &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2117 * &lt;/classifier&gt;
2118 * @param doc - the document used to create the element
2119 * @param content - value of the content attribute
2120 * @param classifierNum - the number suffixed to the CL, together forming
2121 * the classifier Node's ID
2122 * @param displayNameVal is the bodytext of a named displayItem element
2123 * @param displayDescrVal is the bodytext of a displayItem element with
2124 * description */
2125 protected Element createClassifierElement(Document doc, String content,
2126 int classifierNum, String displayNameVal, String displayDescrVal)
2127 {
2128 final String CL = "CL";
2129 Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2130 // content attribute
2131 Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2132 att.setValue(content);
2133 classifier.setAttributeNode(att);
2134 // name attribute
2135 att = doc.createAttribute(GSXML.NAME_ATT);
2136 att.setValue(CL + classifierNum);
2137 classifier.setAttributeNode(att);
2138
2139 // now create the displayItem children for classifier:
2140 // <displayItem name="name">#letter</displayItem>
2141 // <displayItem name="description">Browse titles starting with #letter</displayItem>
2142 Element displayItem = createNameValuePairElement(doc,
2143 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2144 classifier.appendChild(displayItem);
2145 displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2146 GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2147 classifier.appendChild(displayItem);
2148
2149 return classifier;
2150 }
2151
2152
2153 /** @return a newly created element of the following format:
2154 * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2155 * @param doc - the document used to create the element
2156 * @param elementName - the tag name
2157 * @param name - value of attribute name
2158 * @param value - the body text of the element */
2159 protected Element createNameValuePairElement(Document doc, String elementName,
2160 String name, String value) {
2161 // <elementName name="somename">"some display value"</elementName>
2162 Element element = doc.createElement(elementName);
2163 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2164 attribute.setValue(name);
2165 element.setAttributeNode(attribute);
2166
2167 element.appendChild(doc.createTextNode(value));
2168 return element;
2169 }
2170
2171 /**
2172 * @param collection is the collection to search in
2173 * @param query is the query term to search for. It won't specify the
2174 * indexed field to search in, which will mean that GSearch will
2175 * search all default indexed fields.
2176 * @param maxDocs is the maximum number of results to return (which
2177 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2178 */
2179 public String[] textQuery(String collection, String query,
2180 int maxDocs)
2181 throws Exception
2182 {
2183 // no need to search there is no query or query is empty spaces
2184 if(query.trim().equals(""))
2185 return new String[]{};
2186
2187 // QUERY value won't specify indexed field to search, Fedora
2188 // Gsearch will take that as meaning all default indexed fields.
2189 // Params to search() method below: string of fielded query terms;
2190 // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2191 query = query + " " + "PID" + COLON + GREENSTONE;
2192
2193 String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2194 // now we have the XML returned by FedoraGSearch, get the pids
2195 // of the documents returned (if any)
2196 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2197 collection, searchResult);
2198 return pids;
2199 }
2200
2201 /**
2202 * This method performs a fieldquery, searching for x number of phrases
2203 * in each of the 4 indexed fields.
2204 * @param collection is the collection to search in
2205 * @param nameValParamsMap is a Map of several(key, value) entries,
2206 * 4 of which we're concerned with here:
2207 * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2208 * - the values are a comma separated list of terms (phrases or single
2209 * words) to search that field in. There may be more than 1 or
2210 * there may be none (in which case there may be N empty values or
2211 * spaces separated by commas).
2212 * @param maxDocs is the maximum number of results to return (which
2213 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2214 * */
2215 public String[] fieldQuery(String collection, Map nameValParamsMap,
2216 int maxDocs)
2217 throws Exception
2218 {
2219 // we're going to maintain a list of UNIQUE pids that were returned
2220 // in search results. Hence we use Set:
2221 java.util.Set set = new java.util.HashSet();
2222
2223 // (1) Use Fedora's search to search document titles, if they were
2224 // specified:
2225 String[] docTitlepids = {};
2226
2227 String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2228 if(docTitleTerms != null) { // no doc titles may have been specified
2229 String[] phrases = docTitleTerms.split(COMMA);
2230
2231 // search the individual phrases first:
2232 for(int i = 0; i < phrases.length; i++) {
2233 if(phrases.equals("") || phrases.equals(" "))
2234 continue; //skip when there are no terms
2235 docTitlepids = this.searchDocumentTitles(
2236 collection, phrases[i], false);
2237 for(int j = 0; j < docTitlepids.length; j++)
2238 set.add(docTitlepids[j]);
2239 }
2240 }
2241 // (2) use FedoraGSearch to search doc AND section titles, and
2242 // fulltext (in case these were specified in nameValParamsMap):
2243 String searchResult = this.fedoraGSearch.search(
2244 nameValParamsMap, 1, maxDocs);
2245
2246 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2247 collection, searchResult);
2248
2249 for(int i = 0; i < pids.length; i++)
2250 set.add(pids[i]);
2251
2252 pids = null;
2253 pids = new String[set.size()];
2254 set.toArray(pids); // unique pids
2255 return pids;
2256 }
2257
2258 /** @return a String representing Greenstone3 XML for a query process
2259 * response returning the results for the query denoted by parameter
2260 * nameValParamsMap.
2261 * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2262 * query field data values. The names match the field names that
2263 * describeCollectionService() would have returned for the query service.
2264 * @param collection is the name of the collection
2265 * @param service is the name of the query service
2266 * This method is only ever called when any of the services in the digital
2267 * library described themselves as type=query. Therefore any digital
2268 * libraries that have no query services, can just return emtpy message
2269 * strings (or even "") since this method will never be called on them
2270 * anyway. */
2271 public String query(String collection, String service,
2272 Map nameValParamsMap)
2273 {
2274 FedoraGS3RunException ex = null;
2275 // (1) obtain the requested number of maximum result documents
2276 int maxDocs = 100;
2277 try{
2278 maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2279 } catch(NumberFormatException e) {
2280 maxDocs = 100;
2281 }
2282
2283 String pids[] = {};
2284 // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2285 if(service.endsWith("TextQuery")) {
2286 try {
2287 // get the Query field:
2288 String query = (String)nameValParamsMap.get(QUERY);
2289 pids = textQuery(collection, query, maxDocs);
2290 }
2291 catch(Exception e) {
2292 LOG.error("Error in TextQuery processing: " + e);
2293 ex = new FedoraGS3RunException(
2294 "When trying to use FedoraGenericSearch for a TextQuery", e);
2295
2296 }
2297 } else { // (3) FieldQuery
2298 // first get the comma-separated lists
2299 String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2300 String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2301 // both are comma separated lists, so split both on 'comma'
2302 String[] fieldNames = listOfFieldNames.split(COMMA);
2303 String[] searchTerms = listOfSearchTerms.split(COMMA);
2304
2305 // In the fieldNames and searchTerms lists of nameValParamsMap,
2306 // each searchTerm element was matched with its correspondingly
2307 // indexed fieldName.
2308 // A new map is going to reorganise this, by putting all terms
2309 // for a particular fieldName together in a comma separated list
2310 // and associating that with the fieldName. I.e. (key, value) ->
2311 // (fieldName, comma-separated list of all terms in that field)
2312 Map map = new HashMap();
2313 for(int i = 0; i < searchTerms.length; i++) {
2314 // there may be fewer searchTerms than fieldNames (since some
2315 // fieldNames may have been left empty), so loop on searchTerms
2316 if(map.containsKey(fieldNames[i])) { // fieldName is already
2317 // in the list, so append comma with new value
2318 String termsList = (String)map.get(fieldNames[i]);
2319 termsList = termsList + COMMA + searchTerms[i];
2320 map.put(fieldNames[i], termsList);
2321 } else { // this is the first time this fieldName occurred
2322 // just put the fieldName with searchTerm as-is
2323 map.put(fieldNames[i], searchTerms[i]);
2324 }
2325 }
2326
2327 try {
2328 // For fieldquery, we search on all the fieldNames specified
2329 // - if DOC_TITLES is specified then we use Fedora's search
2330 // - for all other fieldNames specified, we use FedoraGSearch
2331 pids = fieldQuery(collection, map, maxDocs);
2332 }
2333 catch(Exception e) {
2334 LOG.error("Error in FieldQuery processing: " + e);
2335 ex = new FedoraGS3RunException(
2336 "When trying to use FedoraGenericSearch for a FieldQuery", e);
2337 }
2338 }
2339
2340 // Build Greenstone XML Query response message from
2341 // the pids (which should be document identifiers)
2342 Document doc = builder.newDocument();
2343 // <metadataList><metadata name="numDocsMatched" value="n" />
2344 // </metadataList>
2345 Element metadataList = doc.createElement(
2346 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2347 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2348 metadata.setAttribute(GSXML.NAME_ATT, NUM_DOCS_MATCHED);
2349 metadata.setAttribute(GSXML.VALUE_ATT, Integer.toString(pids.length));
2350 metadataList.appendChild(metadata);
2351
2352 metadata = doc.createElement(GSXML.METADATA_ELEM);
2353 metadata.setAttribute(GSXML.NAME_ATT, "numDocsReturned");
2354 metadata.setAttribute(GSXML.VALUE_ATT, Integer.toString(pids.length));
2355 metadataList.appendChild(metadata);
2356
2357 // <documentNodeList>
2358 // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2359 // docType='hierarchy' nodeType="leaf" />
2360 // ...
2361 // ...
2362 // </documentNodeList>
2363 Element docNodeList = doc.createElement(
2364 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2365 // for each
2366 for(int i = 0; i < pids.length; i++) {
2367 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2368 docNode.setAttribute(GSXML.NODE_ID_ATT, pids[i]);
2369 docNode.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
2370 docNode.setAttribute(GSXML.NODE_TYPE_ATT, "root");
2371 docNode.setAttribute(GSXML.NODE_RANK_ATT, "NaN");
2372
2373 docNodeList.appendChild(docNode);
2374 }
2375
2376 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2377 GSXML.REQUEST_TYPE_PROCESS, service);
2378
2379 //docNodeList.getParentNode().appendChild(metadataList); // need to add term info
2380
2381 try{
2382 return FedoraCommons.elementToString(responseMsg);
2383 }catch(TransformerException e) {
2384 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2385 + " " + e;
2386 }
2387 }
2388
2389
2390 // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2391 /** Given a URL that represents a fedoraPID, will look up the object.
2392 * If it exists, it will return the contents of the DC:Title of its datastream.
2393 * If it doesn't exist, it will return the URL as-is.
2394 * @param URL: the URL that (after modification) represents a fedoraPID to look up.
2395 * @param collection: the name of collection in which to search for the URL
2396 * representing a fedoraPID.
2397 * @return the string (representing a fedoraPID) stored in the DC:Title of the
2398 * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2399 * then the parameter URL is returned.
2400 */
2401 public String getPIDforURL(String url, String collection) {
2402 FedoraGS3RunException ex = null; // any RemoteException
2403
2404 // (1) convert url to the fedorapid
2405 // / -> _ and : -> -
2406 String fedoraPID = url.replaceAll("/", "_");
2407 fedoraPID = fedoraPID.replaceAll(":", "-");
2408 // prefix "greenstone-http:<colname>-" to the fedoraPID
2409 fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2410 //LOG.error("### fedoraPID: " + fedoraPID);
2411
2412 // (2) Look up the datastream for the fedorapid
2413 String dcTitle = "";
2414 try {
2415 dcTitle = getDCTitle(fedoraPID);
2416 } catch(Exception e) {
2417 LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2418 ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2419 }
2420 //String dc = this.getDC(fedoraPID);
2421 //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2422
2423 // (3) if fedorapid exists, extract the dc:title content.
2424 // if it doesn't exist, return url
2425 if(dcTitle.equals("")) {
2426 return url;
2427 } else {
2428 // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2429 //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2430 return dcTitle+"-1";
2431 }
2432 }
2433
2434 public static void main(String args[]) {
2435 try{
2436 // testing default constructor
2437 //FedoraGS3Connection con = new FedoraGS3Connection();
2438
2439 // testing constructor that takes properties file to show initial
2440 // fedora server values
2441 java.io.File propertyFilename
2442 = new java.io.File("fedoraGS3.properties");
2443 FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2444
2445 // DESCRIBE: serviceList, collectionList
2446 System.out.println("serviceList:\n" + con.getServiceList());
2447
2448 System.out.println("collectionList:\n" + con.getCollectionList());
2449
2450 String[] colPIDs = con.getCollections();
2451 String[] collectionNames = con.getCollectionNames(con.getCollections());
2452
2453
2454 for(int i = 0; i < collectionNames.length; i++) {
2455 System.out.println("Describing collections:\n");
2456 System.out.println(con.describeCollection(collectionNames[i]));
2457 System.out.println("Describing collection services:\n"
2458 + con.describeCollectionServices(collectionNames[i]));
2459 }
2460
2461 String[] serviceNames = con.getServiceNames();
2462 for(int i = 0; i < serviceNames.length; i++) {
2463 System.out.println("Describing " + serviceNames[i] + ":\n"
2464 + con.describeCollectionService("demo", serviceNames[i]));
2465 }
2466
2467
2468 // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2469 // along with EX of the top-level document:
2470 System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2471 System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2472
2473
2474 String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2475 System.out.println("\nGET CONTENT:");
2476 for(int i = 0; i < docIDs.length; i++) {
2477 System.out.println(con.getContent(docIDs[i]));
2478 }
2479
2480 System.out.println("\nGET META:");
2481 for(int i = 0; i < docIDs.length; i++) {
2482 System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2483 }
2484
2485 String[] getTitlesFor = {
2486 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2487 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2488 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2489 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2490 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2491 };
2492
2493 // first let's display the regular meta for top-level docs and
2494 // their sections
2495 for(int i = 0; i < getTitlesFor.length; i++) {
2496 System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2497 }
2498
2499 System.out.println("\nTitles are:");
2500 System.out.println(con.getTitleMetadata(getTitlesFor));
2501
2502 System.out.println("\nGET STRUCTURE:");
2503 for(int i = 0; i < docIDs.length; i++) {
2504 System.out.println("Descendents and numChildren:\n"
2505 + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2506 System.out.println("Parent and numSiblings:\n"
2507 + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
2508 }
2509
2510 // TEST ERROR CASES:
2511 System.out.println("\nTESTING ERROR CASES");
2512 System.out.println(con.getContent("greenstone:demo-pinky"));
2513 String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2514 "greenstone:demo-pinky" };
2515 System.out.println(con.getContent(errorCases));
2516 System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2517 System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2518
2519 System.out.println("\nCLASSIFIER BROWSE");
2520 System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2521 new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2522
2523 System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2524 String[] classNodeIDs = new String[26];
2525 for(int i = 0; i < classNodeIDs.length; i++) {
2526 int subClassifierNum = i + 1;
2527 classNodeIDs[i] = "CL1." + subClassifierNum;
2528 }
2529 System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2530 classNodeIDs, new String[]{"all"}));
2531
2532 System.out.println("Testing query services");
2533 System.out.println("TEXT QUERY:");
2534 Map formControlValsMap = new HashMap();
2535 formControlValsMap.put(MAXDOCS, "100");
2536 formControlValsMap.put(QUERY, "snails");
2537 String searchResponse
2538 = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2539 System.out.println(searchResponse);
2540
2541 System.out.println("FIELD QUERY:");
2542 formControlValsMap.clear();
2543 formControlValsMap.put(MAXDOCS, "100");
2544 formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2545 formControlValsMap.put(FIELDNAME_ATT,
2546 "allFields,docTitles,allFields,allFields");
2547 searchResponse
2548 = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2549 System.out.println(searchResponse);
2550
2551 System.exit(0);
2552 }catch(Exception e) {
2553 JOptionPane.showMessageDialog(
2554 null, e, "Error", JOptionPane.ERROR_MESSAGE);
2555 //System.err.println("ERROR: " + e);
2556 e.printStackTrace();
2557 }
2558 }
2559}
Note: See TracBrowser for help on using the repository browser.