source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 22302

Last change on this file since 22302 was 22302, checked in by ak19, 14 years ago

Comment.

File size: 101.8 KB
Line 
1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31import org.greenstone.gsdl3.util.GSXML;
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.Attr;
35import org.w3c.dom.Text;
36import org.w3c.dom.NodeList;
37import org.w3c.dom.Node;
38import org.xml.sax.InputSource;
39
40import java.io.File;
41import java.util.HashMap;
42import java.util.Properties;
43import java.util.Map;
44
45import javax.swing.JOptionPane;
46
47import org.xml.sax.SAXException;
48import java.io.UnsupportedEncodingException;
49import java.io.IOException;
50import javax.net.ssl.SSLHandshakeException;
51import java.net.ConnectException;
52import java.net.MalformedURLException;
53import java.rmi.RemoteException;
54import javax.xml.parsers.ParserConfigurationException;
55import javax.xml.transform.TransformerException;
56
57/**
58 * Class that extends FedoraConnection in order to be able to use
59 * Fedora's web services to retrieve the specific datastreams of
60 * Greenstone documents stored in Fedora's repository. This class
61 * provides methods that convert those datastreams into Greenstone3
62 * XML response messages which are returned.
63 * @author ak19
64*/
65public class FedoraGS3Connection
66 extends FedoraConnection implements FedoraToGS3Interface,
67 FedoraToGS3Interface.Constants
68{
69 /** The logging instance for this class */
70 private static final Logger LOG = Logger.getLogger(
71 FedoraGS3Connection.class.getName());
72
73 /** Default name of Fedora index */
74 private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
75
76 /** Complete list of services that are supported our FedoraGS3 would
77 * support if everything goes well. If a connection to FedoraGSearch
78 * cannot be established, the query services will no longer be
79 * available. The actual services supported are given by member
80 * variable serviceNames. */
81 protected static final String[] SERVICES = {
82 "DocumentContentRetrieve", "DocumentMetadataRetrieve",
83 "DocumentStructureRetrieve",
84 "TextQuery", "FieldQuery",
85 "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
86 };
87
88 /** List of services actually supported by our FedoraGS3 repository
89 * after construction. If FedoraGenericSearch can't be connected to,
90 * then query services will not be offered */
91 protected String[] serviceNames;
92
93 /** The object used to connect to FedoraGenericSearch, which is used
94 * for doing full-text searching */
95 protected GSearchConnection fedoraGSearch;
96
97 /** The url for the wsdl file of FedoraGSearch's web services
98 * by default this will be the Fedora server's base URL
99 * concatenated to "gsearch/services/FgsOperations?wsdl" */
100 protected String gSearchWSDLURL;
101
102 /** The last part of the gSearchWSDL URL. The first part is
103 * the same as the fedora server's base url. */
104 protected String gSearchWSDLSuffix;
105
106 /** The name of the index that FedoraGSearch will index the GS3
107 * documents into. If no name is specified in the properties file,
108 * this will default to FedoraIndex. */
109 protected String gSearchIndexName;
110
111 /** 5 argument constructor is the same as that of superclass FedoraConnection:
112 * @param protocol can be either http or https
113 * @param host is the host where the fedora server is listening
114 * @param port is the port where the fedora server is listening
115 * @param fedoraServerUsername is the username for administrative
116 * authentication required to access the fedora server.
117 * @param fedoraServerPassword is the password for administrative
118 * authentication required to access the fedora server. If no password was set
119 * when installing Fedora, leave the field "".
120 * Instantiates a FedoraGS3Connection object which connects to Fedora's
121 * web services through stub classes and tries to connect to FedoraGSearch's
122 * web services through the default WSDL location for it
123 * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
124 * call setGSearchWSDLURL(url) after the constructor instead.
125 */
126 public FedoraGS3Connection(String protocol, String host, int port,
127 String fedoraServerUsername, String fedoraServerPassword)
128 throws ParserConfigurationException, MalformedURLException,
129 SSLHandshakeException, RemoteException, AuthenticationFailedException,
130 NotAFedoraServerException, ConnectException, Exception
131 {
132 super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
133 // super() will call setInitialisationProperties(properties)
134 // And that will try to instantiate the GSearchConnection.
135 }
136
137 /** No-argument constructor which is the same as that of superclass
138 * FedoraConnection: it displays a small dialog requesting input for the
139 * host, port, administrative password and username of the fedora server.
140 * If no password was set on the fedora repository when installing it,
141 * the user can leave the password field blank. */
142 public FedoraGS3Connection()
143 throws ParserConfigurationException, MalformedURLException,
144 CancelledException, ConnectException, RemoteException,
145 SSLHandshakeException, Exception
146 {
147 super();
148 // super() will call setInitialisationProperties(properties)
149 // And that will try to instantiate the GSearchConnection.
150 }
151
152 /** Single-argument constructor which is the same as that of superclass
153 * FedoraConnection: it takes the name of the properties file where
154 * connection initialisation values may already be provided and then
155 * displays a small dialog requesting input for the host, port,
156 * administrative password and username of the fedora server showing
157 * the values in the properties file as default. If the necessary
158 * initialisation are not present in the file, the corresponding fields
159 * in the dialog will be blank.
160 * If no password was set on the fedora repository when installing it,
161 * the user can leave the password field blank. */
162 public FedoraGS3Connection(File propertiesFilename)
163 throws ParserConfigurationException, MalformedURLException,
164 CancelledException, ConnectException, RemoteException,
165 SSLHandshakeException, Exception
166 {
167 super(propertiesFilename);
168 // super() will call setInitialisationProperties(properties)
169 // And that will try to instantiate the GSearchConnection.
170 }
171
172 /** The superclass constructor calls this method passing any preset
173 * properties loaded from a propertiesFile. This method is overridden
174 * here in order to instantiate the gSearchConnection based on the
175 * - gSearchWSDLSuffix that will be appended to the fedora base url.
176 * (If one was not provided in the properties file, gSearchWSDLURL defaults
177 * to something of the form
178 * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
179 * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
180 * "gsearch/services/FgsOperations?wsdl".
181 * - name of the index into which the GS3 documents have been indexed
182 * and which FedoraGenericSearch should use to perform searches. If none is
183 * given in the properties file, then the index name defaults to "FedoraIndex".
184 * @param properties is the Properties Map loaded from a properties file
185 * (if there was any) which specifies such things as host and port of the
186 * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
187 * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
188 * to whatever the final value of this.gSearchWSDLURL' suffix is, and
189 * "gsearch.indexName" will be set to to whatever the final value of
190 * this.gSearchIndexName is.
191 */
192 protected void setInitialisationProperties(Properties properties)
193 throws ParserConfigurationException, MalformedURLException,
194 CancelledException, ConnectException, RemoteException,
195 SSLHandshakeException, Exception
196 {
197 super.setInitialisationProperties(properties);
198 // gsearchWSDL URL suffix, if not specified, defaults to
199 // "fedoragsearch/services/FgsOperations?wsdl" which is
200 // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
201 this.gSearchWSDLSuffix = properties.getProperty(
202 "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
203 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
204 // Set the property to whatever this.gSearchWSDLURL is now,
205 // so that it will be written out to the properties file again
206 properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
207
208 // Similarly for the name of the index FedoraGenericSearch should use
209 // when performing searches for GS3 docs stored in Fedora's repository.
210 this.gSearchIndexName = properties.getProperty(
211 "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
212 properties.setProperty("gsearch.indexName", this.gSearchIndexName);
213 // Create a connection to FedoraGSearch's web services:
214 initSearchFunctionality();
215 }
216
217 /** Overridden init method to work with the 5 argument constructor, so that we can
218 * bypass using setInitialisationProperties() which works with a Properties map.
219 */
220 protected void init(String protocol, String host, String port,
221 String fedoraServerUsername, String fedoraServerPassword)
222 throws ParserConfigurationException, MalformedURLException,
223 AuthenticationFailedException, RemoteException, Exception
224 {
225 super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
226 this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
227 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
228 this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
229 initSearchFunctionality();
230 }
231
232
233 /** Init method that instantiates a GSearchConnection object used
234 * to work with the separate FedoraGSearch web services.
235 * The url of the WSDL for FedoraGSearch's web services is worked out
236 * from the baseURL of the Fedora server.
237 */
238 protected void initSearchFunctionality()
239 {
240 try {
241 this.fedoraGSearch = null;
242 this.fedoraGSearch = new GSearchConnection(
243 gSearchWSDLURL, gSearchIndexName);
244 this.serviceNames = SERVICES;
245 } catch(Exception e){
246 LOG.error("Cannot connect to FedoraGSearch's web services at "
247 + gSearchWSDLURL + "\nQuery services will not be available.");
248 // If an exception occurs, something has gone wrong when
249 // trying to connect to FedoraGSearch's web services. This
250 // means, we can't offer query services, as that's provided
251 // by FedoraGSearch
252 serviceNames = null;
253 int countOfNonQueryServices = 0;
254 for(int i = 0; i < SERVICES.length; i++) {
255 // do not count query services
256 if(!SERVICES[i].toLowerCase().contains("query")) {
257 countOfNonQueryServices++;
258 }
259 }
260 // Services now supported are everything except Query services
261 serviceNames = new String[countOfNonQueryServices];
262 int j = 0;
263 for(int i = 0; i < SERVICES.length; i++) {
264 if(!SERVICES[i].toLowerCase().contains("query")) {
265 serviceNames[j] = SERVICES[i];
266 j++; // valid serviceName, so increment serviceName counter
267 }
268
269 }
270 }
271 }
272
273 /** @return the gSearchWSDLURL, the url of the WSDL for the
274 * FedoraGSearch web services */
275 public String getGSearchWSDLURL() { return gSearchWSDLURL; }
276
277 /** Sets the member variable gSearchWSDLURL that specify the location of
278 * the WSDL file of FedoraGSearch's web services. Then it attempts
279 * to instantiate a connection to those web services.
280 * @param url is the new url of the GSearch web services WSDL file */
281 public void setGSearchWSDLURL(String url) {
282 this.gSearchWSDLURL = url;
283 initSearchFunctionality();
284 }
285
286 /** @return the gSearchIndexName, the name of the index Fedora Generic
287 * Search will search in (where GS3 docs have been indexed into). */
288 public String getGSearchIndexName() { return gSearchIndexName; }
289
290 /** Sets the member variable gSearchIndexName that specifies the name
291 * of the index containing indexed GS3 documents. Then it attempts
292 * to instantiate a connection to the Fedora GSearch web services using
293 * this changed value for indexName.
294 * @param indexName is the new name of the index containing indexed GS3
295 * docs that GSearch should search in. */
296 public void setGSearchIndexName(String indexName) {
297 this.gSearchIndexName = indexName;
298 initSearchFunctionality();
299 }
300
301 /** @return the array of the services actually supported by FedoraGS3 */
302 protected String[] getServiceNames() { return this.serviceNames;}
303
304 /**
305 * For finding out if the sectionNumber is given as part of the docID.
306 * @param docID is the String that contains the docPID and may also
307 * contain the section number.
308 * @return true if the document identifier docID contains a section-
309 * number, and false if it consists solely of the docPID.
310 * That is, true is returned if
311 * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
312 * and false is returned if
313 * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
314 * */
315 protected boolean containsSectionNumber(String docID) {
316 // if there are two hyphens in the docID, then there are sections
317 // (and the section number is appended at end of docID)
318 // docID = "greenstone:colName-<docPID>-<sectionNum>"
319 return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
320 }
321
322 /** This method will extract the docPID from docID and return it.
323 * (If a sectionNumber is suffixed to the docID, the docPID which is
324 * the prefix is returned; otherwise the docID is the docPID and is
325 * returned)
326 * @param docID is the String that contains the docPID and may also
327 * contain the section number.
328 * @return only the docPID portion of the docID.
329 */
330 protected String getDocPIDFromDocID(String docID) {
331 if(containsSectionNumber(docID))
332 return docID.substring(0, docID.lastIndexOf(HYPHEN));
333 // else (if there's no sectionNumber), docID is the docPID
334 return docID;
335 }
336
337 /** This method will return the section Number, if there's any
338 * suffixed to the docID. Otherwise it will return the empty string
339 * @param docID is the String that contains the docPID and may also
340 * contain the section number.
341 * @return only the sectionID portion of the docID - if any, else "".
342 */
343 protected String getSectionIDFromDocID(String docID) {
344 if(containsSectionNumber(docID))
345 return docID.substring(
346 docID.lastIndexOf(HYPHEN)+1, docID.length());
347 return "";
348 }
349
350 /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
351 * response message that gives the metadata for each collection identified
352 * @param collIDs is an array of fedora pids identifying collections in the
353 * fedora repository
354 * @return a GS3 DocumentMetadataRetrieve response message containing the
355 * EX metadata for all the requested collections */
356 public String getCollectionMetadata(String[] collIDs) {
357 return getMetadata(collIDs, new String[] {"all"});
358 }
359
360 /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
361 * response message is returned containing the metadata for each document.
362 * @param docIDs is an array of document identifiers (docID can either be
363 * &lt;pid&gt;s items (documents) in the fedora repository, or
364 * "&lt;pid&gt;-sectionNumber".
365 * @return a GS3 DocumentMetadataRetrieve response message containing the
366 * EX, DC, DLS metadata for all the requested documents
367 * @param metadata is the list of metadata elements to be retrieved for each doc */
368 public String getDocumentMetadata(String[] docIDs, String[] metadata) {
369 return getMetadata(docIDs, metadata);
370 }
371
372 /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
373 * response message that gives the metadata for the collection identified
374 * @param collID is a fedora pid identifying a collection in its repository
375 * @return a GS3 DocumentMetadataRetrieve response message containing the
376 * EX metadata for the requested collection
377 * @param metadata is the list of metadata elements to be retrieved for each doc */
378 public String getCollectionMetadata(String collID) {
379 return getMetadata(new String[] {collID}, new String[] {"all"});
380 }
381
382 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
383 * response message containing the metadata for the document.
384 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
385 * of an item (document) in the fedora repository, or it can be
386 * "&lt;pid&gt;-sectionNumber".
387 * @return a GS3 DocumentMetadataRetrieve response message containing the
388 * EX, DC, DLS metadata for the requested document */
389 public String getDocumentMetadata(String docID, String[] metadata) {
390 return getMetadata(new String[] {docID}, metadata);
391 }
392
393 /** @return a greenstone DocumentMetadataRetrieve response for the
394 * documents or collections indicated by the docIDsOrCollIDs.
395 * @param docIDsOrCollIDs is an array of identifiers which may be either the
396 * fedora pids for collections, or otherwise may be a document identifier.
397 * In the last case, the document ID may consist of either
398 * "documentPID-sectionNumber" or may just be just fedora documentPID
399 * @param metadata is the list of metadata elements to be retrieved for each doc */
400 public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
401 {
402 Document doc = builder.newDocument();
403 FedoraGS3RunException ex = null;
404
405 Element docNodeList = doc.createElement(
406 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
407
408 try{
409 for(int i = 0; i < docIDsOrCollIDs.length; i++) {
410 // create the <documentNode> containing the metadata
411 // for each document docID
412 Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
413 docNodeList.appendChild(docNode);
414 }
415 } catch(Exception e) {
416 ex = new FedoraGS3RunException(e);
417 ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
418 }
419
420 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
421 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
422 try{
423 return FedoraCommons.elementToString(responseMsg);
424 } catch(TransformerException e) {
425 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
426 + " " + e;
427 }
428 }
429
430 /** Method that takes a new DOM document, as well as an identifier of either
431 * a collection or document (which may be a fedora pid for the collection
432 * or document, or may be the documentPid-sectionNumber for a document) and
433 * returns a documentNode element for it:
434 * &lt;documentNode&gt;&lt;metadataList&gt;
435 * &lt;metadata name=""&gt;value&lt;/metadata&gt;
436 * ...
437 * &lt;/metadataList&gt;&lt;/documentNode&gt;
438 * @return documentNode containing the metadata for the collection or
439 * document given by parameter ID
440 * @param id denotes a collection pid, a document pid or a docID of the
441 * form "documentpid-sectionNumber"
442 * @param metadata is the list of metadata elements to be retrieved for each doc */
443 protected Element getMetadata(Document doc, String id, String[] metadata)
444 throws RemoteException, UnsupportedEncodingException,
445 SAXException, IOException
446 {
447 // We're going to create the documentNode nested inside the following
448 // documentNodeList:
449 // <documentNodeList>
450 // <documentNode nodeID=""><metadataList>
451 // <metadata name="">value</metadata>
452 // </metadataList></documentNode>
453 // <documentNode>...</documentNode>
454 // </documentNodeList>
455 // <documentNodeList>
456
457 // <documentNode nodeID="docID"> - the docNode on which a structure
458 // retrieve is being performed
459 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
460 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
461 attribute.setValue(id);
462 docNode.setAttributeNode(attribute);
463
464 // <metadataList>
465 Element metadataList = doc.createElement(
466 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
467
468 String ex = "";
469 String dc = "";
470 String dls = "";
471 if(id.endsWith(_COLLECTION)) { // docID refers to a collection
472 // Obtain the "EX" datastream (extracted metadata) for the collection
473 ex = this.getEX(id);
474 }
475 else { // docID refers to a document
476 // work out the document's fedora PID and section ID, and then
477 // obtain the EX (extracted metadata) and DC datastreams for the doc
478
479 // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
480 // is the same as for pid="greenstone:<colname>-docPID"
481 // That is, <Section id="1"> refers to the toplevel document docPID
482 // If requested for top-level document, there may also be DLS meta
483 String sectionID = getSectionIDFromDocID(id);
484 String docPID = getDocPIDFromDocID(id);
485 if(sectionID.equals("") || sectionID.equals("1")) {
486 // metadata of toplevel document is requested
487 ex = this.getEX(docPID); // slightly faster than doing
488 //getSectionEXMetadata(docID, "1")
489 dc = this.getDC(docPID);
490 dls = this.getDLS(docPID);
491 }
492 else {
493 ex = getSectionEXMetadata(docPID, sectionID);
494 dc = getSectionDCMetadata(docPID, sectionID);
495 }
496 }
497
498 String metafields = "";
499 for(int i = 0; i < metadata.length; i++) {
500 metafields = metafields + metadata[i] + "|";
501 }
502
503 // Adding in metadata sets in alphabetical order
504 // DC metadata for a top-level document is different from EX, DLS:
505 // only the element's namespace prefix is "dc", the rest of a tagname
506 // is unknown.
507 if(!dc.equals("")) {
508 addMetadataWithNamespacedTagNames(doc, metadataList,
509 dc, DC, metafields);
510 }
511
512 // Check if we were supposed to process dls and dc metadata
513 // as well. We only ever do this for top-level documents,
514 // in which case, dls and dc will be non-empty strings
515 if(!dls.equals("")) {
516 addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
517 }
518
519 // we definitely have an EX metadatastream for each
520 // collection object, top-level document object,
521 // and document section item
522 addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
523
524 // now the metadataList has been built up
525 docNode.appendChild(metadataList);
526
527 return docNode; // return <documentNode> containing the metadata
528 }
529
530 /** This method retrieves all the metadata elements in the metaDataStream
531 * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
532 * metadataSetNS is the namespace of each tag, and creates a new element of
533 * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
534 * each. Each of these are then appended to the metadataList parameter.
535 * @param doc is the Document object using which the new metadata Elements
536 * are to be constructed
537 * @param metadataList is the &lt;metadataList&gt; Element to which the new
538 * metadata Elements are to be appended as children.
539 * @param metaDatastream the metadata datastream in string form (e.g. the
540 * Dublin Core metadata stored in the Fedora repository).
541 * @param metadataSet is the constant datastream identifier, e.g. "DC".
542 * At present this method applies to the DC metadata and any others like it
543 * where each tagname is different except for the constant dc: namespace.
544 * @param metafields is a | separated string containing the metadatafields to
545 * extract or "all" if all fields are requested
546 */
547 protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
548 String metaDatastream, String metadataSet, String metafields)
549 throws SAXException, IOException
550 {
551 Document src = builder.parse(
552 new InputSource(new StringReader(metaDatastream)));
553
554 // The following doesn't work for some reason: to retrieve all elements
555 // whose namespace prefix starts with "dc", we pass "*" for localName
556 //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
557
558 // Longer way: get the children of the root document
559 NodeList children = src.getDocumentElement().getChildNodes();
560
561 for(int i = 0; i < children.getLength(); i++) {
562 String nodeName = children.item(i).getNodeName();
563 // check that the nodename starts with the metadataSet ("dc") namespace,
564 // which simultaneously ensures that the node's an element:
565 if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
566 // need to have a period for Greenstone instead of Fedora's colon
567 nodeName = nodeName.replace(COLON, PERIOD);
568 if(metadataSet.equals(DC)) { // dc:title -> dc.Title
569 nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
570 + nodeName.substring(4);
571 }
572
573 // get the requested metadata fields
574 if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
575 Element metatag = (Element)children.item(i);
576 String value = FedoraCommons.getValue(metatag);
577 // <dc:tagname>value</dc:tagname>
578 // we're going to put this in our metadata element as
579 // <metadata name="dc.Tagname">value</metadata>
580
581 // create metadata of (name, value) pairs in target DOM (doc)
582 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
583 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
584
585 attribute.setValue(nodeName);
586 metadata.setAttributeNode(attribute);
587 Text content = doc.createTextNode(value);
588 metadata.appendChild(content);
589 metadataList.appendChild(metadata);
590 }
591 }
592 }
593 }
594
595 /** This method retrieves all the metadata elements in the metaDataStream
596 * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt;
597 * where "namespace" is the namespace prefix of each tag, and metadataName
598 * is the name of the metadata (like author, title). For each element
599 * it creates a corresponding new element of the form
600 * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
601 * Each of these are then appended to the metadataList parameter.
602 * @param doc is the Document object using which the new metadata Elements
603 * are to be constructed
604 * @param metadataList is the &lt;metadataList&gt; Element to which the new
605 * metadata Elements are to be appended as children.
606 * @param metaDatastream the metadata datastream in string form (e.g. the
607 * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
608 * repository).
609 * @param metadataSet is the constant datastream identifier,
610 * e.g. "DLS" or "EX".
611 * At present this method applies to the DLS and EX metadata as they have
612 * constant tagnames throughout.
613 * @param metafields is a | separated string containing the metadatafields to
614 * extract or "all" if all fields are requested.
615 */
616 protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
617 String metaDatastream, String metadataSet, String metafields)
618 throws SAXException, IOException
619 {
620 // Namespace prefix can be "ex:" or "dls:"
621 String namespacePrefix = "";
622 if(!metadataSet.equals(EX)) {
623 // need to have a period for Greenstone instead of Fedora's colon
624 namespacePrefix = metadataSet.toLowerCase() + PERIOD;
625 }
626
627 Document src = builder.parse(
628 new InputSource(new StringReader(metaDatastream)));
629 NodeList metaTags = src.getElementsByTagName(
630 metadataSet.toLowerCase()+COLON+METADATA);
631 // Looking for tagnames: <ex:metadata> or <dls:metadata>
632
633 for(int i = 0; i < metaTags.getLength(); i++) {
634 Element metatag = (Element)metaTags.item(i);
635
636 // extract the metadata of (name, value) pairs from src DOM
637 // look for <metadata name="name">value</metadata>
638 String name = metatag.hasAttribute(NAME) ?
639 metatag.getAttribute(NAME) : "";
640 // sometimes, there are several metadata for the same name, in this
641 // case, look for a qualifier and append its value to the name to
642 // distinguish it uniquely:
643 if(metatag.hasAttribute(QUALIFIER)) {
644 name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
645 }
646 name = namespacePrefix + name; // prefix with namespace, if any
647 if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
648 String value = FedoraCommons.getValue(metatag);
649
650 // create metadata of (name, value) pairs in target DOM (doc)
651 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
652 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
653 attribute.setValue(name);
654 metadata.setAttributeNode(attribute);
655 Text content = doc.createTextNode(value);
656 metadata.appendChild(content);
657
658 metadataList.appendChild(metadata);
659 }
660 }
661 }
662
663 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
664 * response message containing ONLY the Title metadata for the document.
665 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
666 * of an item (document) in the fedora repository, or it can be
667 * "&lt;pid&gt;-sectionNumber".
668 * @return a GS3 DocumentMetadataRetrieve response message containing the
669 * Title metadata for the requested document */
670 public String getTitleMetadata(String docID) {
671 return getTitleMetadata(new String[] { docID });
672 }
673
674 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
675 * response message containing ONLY the Title metadata for the documents.
676 * @param docIDs is a list of document identifiers (where docID can either be
677 * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
678 * "&lt;pid&gt;-sectionNumber".
679 * @return a GS3 DocumentMetadataRetrieve response message containing the
680 * Title metadata for all the requested documents */
681 public String getTitleMetadata(String[] docIDs) {
682 // Must create message of the following form:
683 // <documentNodeList><documentNode nodeID="docID">
684 // <metadataList><metadata name="Title">sometitle</metadata>
685 // </metadataList></documentNode>
686
687 Document doc = builder.newDocument();
688 FedoraGS3RunException ex = null;
689
690 Element docNodeList = doc.createElement(
691 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
692 try{
693 for(int i = 0; i < docIDs.length; i++) {
694 Element docNode = getTitleMetadata(doc, docIDs[i]);
695 docNodeList.appendChild(docNode);
696 }
697 }catch(Exception e) {
698 ex = new FedoraGS3RunException(e);
699 //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
700 ex.setSpecifics("EX metadata datastream");
701 }
702
703 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
704 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
705 try{
706 return FedoraCommons.elementToString(responseMsg);
707 } catch(TransformerException e) {
708 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
709 + " " + e;
710 }
711 }
712
713 /** Method that takes a new DOM document, as well as an identifier of either
714 * a document or document section and returns a documentNode element containing
715 * the title metadata for it:
716 * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
717 * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
718 * &lt;/metadataList&gt;&lt;/documentNode&gt;
719 * @return documentNode containing the metadata for the collection or
720 * document given by parameter ID
721 * @param docID denotes the id of a document or a document section, so id
722 * is either a document-pid or it's of the form documentpid-sectionNumber */
723 protected Element getTitleMetadata(Document doc, String docID)
724 throws RemoteException, UnsupportedEncodingException,
725 SAXException, IOException
726 {
727 // Returns a docNode element of the following form:
728 // <documentNode nodeID="docID">
729 // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
730 // </documentNode>
731
732 // <documentNode nodeID="docID">
733 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
734 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
735 attribute.setValue(docID);
736 docNode.setAttributeNode(attribute);
737
738 // <metadataList>
739 Element metaList = doc.createElement(
740 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
741 // <metadata name="Title">
742 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
743 // if we connect it all up (append children), we can immediately add
744 // the name attribute into the metadata element:
745 metaList.appendChild(metadata);
746 docNode.appendChild(metaList);
747 metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
748
749 String title = "";
750 String sectionID = getSectionIDFromDocID(docID);
751 String docPID = getDocPIDFromDocID(docID);
752
753 // check if title of toplevel document is requested
754 if(sectionID.equals(""))
755 title = this.getDocTitle(docPID);
756 else { // title of document section
757 title = this.getSectionTitle(docPID, sectionID);
758 }
759
760 metadata.appendChild(doc.createTextNode(title));
761
762 return docNode;
763 }
764
765 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
766 * containing the requested portion of the document structure of the documents
767 * indicated by docIDs:
768 * @param docID is the document identifier of the document whose hierarchical
769 * structure is requested. The name of the collection is already included in the
770 * docID for a Fedora DL.
771 * @param structure - strings specifying the required structure of the document.
772 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
773 * @param info - strings specifying the required structural info of the document.
774 * It can be any combination of: siblingPosition, numSiblings, numChildren.
775 */
776 public String getDocumentStructure(String docID, String[] structure, String[] info) {
777 return getStructure(new String[]{docID}, structure, info);
778 }
779
780
781 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
782 * containing the requested portion of the document structure of the documents
783 * indicated by docIDs:
784 * @param docIDs is an array of document identifiers of documents whose
785 * hierarchical structures are requested. The name of the collection is already
786 * included in the docID for a Fedora DL.
787 * @param structure - strings specifying the required structure of each document.
788 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
789 * @param info - strings specifying the required structural info of each document.
790 * It can be any combination of: siblingPosition, numSiblings, numChildren.
791 */
792 public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
793 return getStructure(docIDs, structure, info);
794 }
795
796 /**
797 * Returns a greenstone3 DocumentStructureRetrieve XML response message
798 * containing the document structures for the given docIDs.
799 * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
800 * greenstone formatted XML is returned. The requested section of the table
801 * of contents (TOC) for a document is converted into the greenstone3 xml
802 * format that is returned upon DocumentStructureRetrieve requests.
803 * @param docIDs the documentIDs for which the section's structure is returned;
804 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
805 * @param structure - the structure of the sections to return. Can be any combination of:
806 * ancestors, parent, siblings, children, descendants, entire.
807 * @param infos - strings containing any combination of the values: numChildren, numSiblings,
808 * siblingPosition. The requested info gets added as attributes to the returned root element.
809 * @return a greenstone3 DocumentStructureRetrieve XML response message in
810 * String format with the structure of the docIDs requested.
811 */
812 protected String getStructure(String[] docIDs, String[] structure, String[] infos)
813 {
814 Document doc = builder.newDocument();
815 FedoraGS3RunException ex = null;
816 // <documentNodeList>
817 Element docNodeList = doc.createElement(
818 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
819
820 try{
821 // append the <documentNodes> for the docIDs
822 // to the docNodeList
823 //getStructureElement(docNodeList, docIDs, levels);
824 getStructureElement(docNodeList, docIDs, structure, infos);
825 } catch(Exception e) {
826 ex = new FedoraGS3RunException(e);
827 ex.setSpecifics("(requested portion of) TOC datastream");
828 }
829 // insert our <documentNodeList> into a GS3 response message
830 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
831 GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
832 try{
833 return FedoraCommons.elementToString(responseMsg);
834 } catch(TransformerException e) {
835 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
836 + " " + e;
837 }
838 }
839
840
841 /** Given a &lt;documentNodeList&gt; portion of a greenstone3
842 * DocumentStructureRetrieve XML response message, this method will populate
843 * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
844 * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
845 * the doc structures are appended.
846 * @param docIDs the documentIDs for which the section's structure is returned;
847 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
848 * @param structures - the structure of the sections to return. Can be any combination of:
849 * ancestors, parent, siblings, children, descendants, entire.
850 * @param infos - a string containing any combination of the values: numChildren, numSiblings,
851 * siblingPosition. The requested info gets added as attributes to the returned root element.
852 */
853 protected void getStructureElement(Element docNodeList, String[] docIDs,
854 String[] structures, String[] infos)
855 throws RemoteException, UnsupportedEncodingException, SAXException,
856 IOException
857 {
858 // Make one string out of requested structure components, and one string from info components
859 String structure = "";
860 String info = "";
861 for(int i = 0; i < structures.length; i++) {
862 structure = structure + structures[i] + "|";
863 }
864 for(int i = 0; i < infos.length; i++) {
865 info = info + infos[i] + "|";
866 }
867
868 // process each docID
869 for(int i = 0; i < docIDs.length; i++) {
870 // work out the document's fedora PID and section ID
871 String sectionID = getSectionIDFromDocID(docIDs[i]);
872 String docPID = getDocPIDFromDocID(docIDs[i]);
873 if(sectionID.equals("")) {
874 sectionID = "1";
875 }
876
877 // get the required section, along with children or descendants
878 Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
879 Document doc = docNodeList.getOwnerDocument();
880
881 // copy-and-convert that structure into a structure format for GS3
882 Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
883
884 if(!info.equals("")) {
885 // <nodeStructureInfo>
886 // <info name="" value="" />
887 // <info name="" value="" />
888 // ...
889 // </nodeStructureInfo>
890 Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+GSXML.INFO_ATT);
891 Element root = srcDocElement.getOwnerDocument().getDocumentElement();
892
893 if(root.hasAttribute("numSiblings")) {
894 String numSiblings = root.getAttribute("numSiblings");
895 Element infoEl = doc.createElement(GSXML.INFO_ATT);
896 infoEl.setAttribute(GSXML.NAME_ATT, "numSiblings");
897 infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
898 nodeStructureInfo.appendChild(infoEl);
899 }
900
901 if(root.hasAttribute("siblingPosition")) {
902 String siblingPosition = root.getAttribute("siblingPosition");
903 Element infoEl = doc.createElement(GSXML.INFO_ATT);
904 infoEl.setAttribute(GSXML.NAME_ATT, "siblingPosition");
905 infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
906 nodeStructureInfo.appendChild(infoEl);
907 }
908
909 if(root.hasAttribute("numChildren")) {
910 String numChildren = root.getAttribute("numChildren");
911 Element infoEl = doc.createElement(GSXML.INFO_ATT);
912 infoEl.setAttribute(GSXML.NAME_ATT, "numChildren");
913 infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
914 nodeStructureInfo.appendChild(infoEl);
915 }
916 docNode.appendChild(nodeStructureInfo);
917 }
918
919 // add it to our list of documentNodes
920 docNodeList.appendChild(docNode);
921 }
922 }
923
924
925 /**
926 * Takes the portion of the XML document outlining the structure of the
927 * document (section)--in the format this is stored in Fedora--and returns
928 * Greenstone 3 DOM XML format for outlining document structure.
929 * @return a &lt;documentNode&gt; element that contains a greenstone3
930 * DocumentStructureRetrieve XML corresponding to the parameter Element section
931 * (which is in fedora XML), for the document indicated by docID.
932 * @param requestingDocID is the identifier of the document for which the
933 * structure was requested. It's this document's children or descendants that
934 * will be returned. Note that this is not always the same as (clear from)
935 * parameter docID.
936 * @param docID is the documentID for which the section's structure is
937 * returned where docID = "docPID-sectionNumber".
938 * @param section - the fedora section XML that is being mirrored in
939 * greenstone3 format.
940 */
941 protected Element getStructure(Document doc, String requestingDocID,
942 String docID, Element section)
943 {
944 // we want to mirror the section's DOM (given in fedora XML) in
945 // greenstone3's XML for a DocumentStructureRetrieve response.
946
947 // <documentNode nodeID="docID"> - the docNode on which a structure retrieve
948 // is being performed
949 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
950 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
951 attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
952 docNode.setAttributeNode(attribute);
953
954 // <nodeStructure>
955 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
956
957 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
958 Element rootNode = createDocNodeFromSubsection(doc, section, docID);
959
960 // fills in the subtree of the rootNode in our nodeStructure element
961 createDocStructure(doc, section, rootNode, docID);
962 //where section represents the root section
963
964 nodeStructure.appendChild(rootNode);
965 docNode.appendChild(nodeStructure);
966 return docNode;
967 }
968
969
970 /** Recursive method that creates a documentStructure mirroring parameter
971 * section, starting from parameter parent down to all descendants
972 * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
973 * for the docPID whose substructure is to be mirrored
974 * @param parent is the XML documentNode in the greenstone repository whose
975 * descendants created by this method will correspond to the descendants of
976 * parameter section.
977 * @param doc is the document containing the parent;
978 * @param docPID is the prefix of all nodeIDs in the parent's structure
979 */
980 protected void createDocStructure(
981 Document doc, Element section, Element parent, String docPID)
982 {
983 // get the section's children (if any)
984 NodeList children = section.getChildNodes();
985 for(int i = 0; i < children.getLength(); i++) {
986 Node n = children.item(i);
987
988 if(n.getNodeName().equals(SECTION_ELEMENT)) {
989 //then we know it's an element AND that its tagname is "Section"
990 Element subsection = (Element)n;
991 Element child = createDocNodeFromSubsection(doc, subsection, docPID);
992 parent.appendChild(child);
993
994 // recursion call on newly found child-element and subsection
995 createDocStructure(doc, subsection, child, docPID);
996 }
997 }
998 }
999
1000 /** Given a particular subsection element, this method creates a
1001 * Greenstone3 DocumentNode element that mirrors it.
1002 * @param doc is the document that will contain the created DocumentNode
1003 * @param docID is the prefix of all nodeIDs in the parent's structure
1004 * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1005 * TOC for the docPID which will be mirrored in the greenstone XML
1006 * documentNode that will be returned.
1007 * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1008 * &lt;Section&gt; element passed as parameter subSection. */
1009 protected Element createDocNodeFromSubsection(
1010 Document doc, Element subSection, String docID)
1011 {
1012 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1013 Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1014 docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1015 docNode.setAttributeNode(docType);
1016
1017 Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1018 String sectionID = subSection.hasAttribute(ID) ?
1019 subSection.getAttribute(ID) : "";
1020 if(sectionID.equals("1")
1021 && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1022 // reset the attribute without the section number (just "docID" may be important for democlient?)
1023 nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1024 } else {
1025 nodeID.setValue(docID + HYPHEN + sectionID);
1026 }
1027 //nodeID.setValue(docID + HYPHEN + sectionID);
1028 docNode.setAttributeNode(nodeID);
1029
1030 Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1031 if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1032 nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1033 }
1034 docNode.setAttributeNode(nodeType);
1035 return docNode;
1036 }
1037
1038
1039 /** Given an identifier that is either a docPID or a concatenation of
1040 * docPID+sectionID, this method works out the fedora assigned docPID and
1041 * sectionID and then calls getContentBody(docPID, sectionID) with those.
1042 * @param docID is expected to be of the form
1043 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1044 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1045 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1046 * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1047 * is returned! */
1048 public String getContent(String docID) {
1049 return this.getContent(new String[]{docID});
1050 }
1051
1052 /** Given an identifier that is a concatenation of docID+sectionID, this
1053 * method works out the fedora assigned docPID and sectionID and then calls
1054 * getContentBody(docPID, sectionID) with those.
1055 * @param docIDs is an array of document identifiers of the form
1056 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1057 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1058 * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1059 public String getContent(String[] docIDs) {
1060 Document doc = builder.newDocument();
1061 FedoraGS3RunException ex = null;
1062
1063 //<documentNodeList>
1064 Element docNodeList = doc.createElement(
1065 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1066
1067 try{
1068 for(int i = 0; i < docIDs.length; i++) {
1069 // get the sectionID and docPID from the docID
1070 String sectionID = this.removePrefix(
1071 getSectionIDFromDocID(docIDs[i]), SECTION);
1072 String docPID = getDocPIDFromDocID(docIDs[i]);
1073 if(sectionID.equals("")) // if no section is specified, get
1074 sectionID = "1"; // get the content for Section id="1"
1075
1076 // Get the contents for the requested section of document docPID
1077 String sectionContent = this.getContentBody(docPID, sectionID);
1078
1079 // set the nodeID attribute
1080 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1081 Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1082
1083 nodeId.setValue(docIDs[i]); // just set the docID which will contain
1084 // the docPID (and sectionID if already present)
1085
1086 docNode.setAttributeNode(nodeId);
1087 // set the text content to what was retrieved
1088 Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1089 Text textNode = doc.createTextNode(sectionContent.trim());
1090
1091 nodeContent.appendChild(textNode);
1092 docNode.appendChild(nodeContent);
1093 //add the documentNode to the docNodeList
1094 docNodeList.appendChild(docNode);
1095 }
1096 } catch(Exception e) {
1097 ex = new FedoraGS3RunException(e);
1098 ex.setSpecifics("requested doc Section datastream");
1099 }
1100 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1101 GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1102 try{
1103 return FedoraCommons.elementToString(responseMsg);
1104 } catch(TransformerException e) {
1105 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1106 + " " + e;
1107 }
1108 }
1109
1110 /** Gets the contents of a textNode from a section.
1111 * @return the text content of a section.
1112 * @param docPID the pid of the document from which a section's text is to
1113 * be retrieved.
1114 * @param sectionID is the section identifier of the document denoted by
1115 * docPID whose text is to be returned.
1116 */
1117 protected String getContentBody(String docPID, String sectionID)
1118 throws RemoteException, UnsupportedEncodingException,
1119 SAXException, IOException
1120 {
1121 String section = this.getSection(docPID, sectionID);
1122
1123 // the content is nested inside a <Section> element,
1124 // we extract it from there:
1125 InputSource source = new InputSource(new StringReader(section));
1126 Document doc = builder.parse(source);
1127
1128 // The document Element is the <Section> we want.
1129 // Get its text contents:
1130 section = FedoraCommons.getValue(doc.getDocumentElement());
1131
1132 // we are going to remove all occurrences of "_httpdocimg_/"
1133 // that precede associated filenames, because that's a GS3
1134 // defined macro for resolving relative urls. It won't help
1135 // with documents stored in fedora.
1136 section = section.replaceAll(GS3FilePathMacro+"/", "");
1137 return section;
1138 }
1139
1140 /** Here we create the greenstone's response message element:
1141 * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1142 * @return a greenstone response-message element.
1143 * @param doc - the Document object which should me used to create the
1144 * &lt;message&gt; and &lt;response&gt; elements
1145 * @param content - the element that is to be nested inside &lt;response&gt;
1146 * @param ex - any exception that occurred when trying to create
1147 * the content parameter
1148 * @param responseType - the value for the type attribute of &lt;response&gt;,
1149 * such as "describe", "retrieve", "browse", "query"...
1150 * @param originator - indiates the collectionName or service (like
1151 * DocumentContentRetrieve) from where this response message originates
1152 */
1153 protected Element createResponseMessage(Document doc, Element content,
1154 Exception ex, String responseType, String originator)
1155 {
1156 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1157 // from = "FedoraGS3"
1158 Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1159 attribute.setValue(originator);
1160 response.setAttributeNode(attribute);
1161
1162 // type = "describe" or "process" - whatever's given in requestType:
1163 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1164 attribute.setValue(responseType);
1165 response.setAttributeNode(attribute);
1166
1167 if(content != null)
1168 response.appendChild(content);
1169
1170 // we'll create an error element for RemoteExceptions (web service problems)
1171 // and UnsupportedEncodingExceptions and
1172 if(ex != null) {
1173 Element error = doc.createElement(GSXML.ERROR_ELEM);
1174 error.appendChild(doc.createTextNode(ex.getMessage()));
1175 // now append the error to the <response> element (after
1176 // the content element whatever that was)
1177 response.appendChild(error);
1178 }
1179
1180 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1181 message.appendChild(response);
1182 doc.appendChild(message);
1183 return message;
1184 }
1185
1186 /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1187 * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1188 * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1189 * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1190 * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1191 * @param doc - the Document object which should me used to create the
1192 * &lt;serviceList&gt; element */
1193 protected Element createServiceList(Document doc)
1194 {
1195 Element serviceList = doc.createElement(
1196 GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1197
1198 for(int i = 0; i < serviceNames.length; i++) {
1199 // create the <service name="serviceName[i]" type="servicetype" />
1200 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1201
1202 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1203 attribute.setValue(serviceNames[i]);
1204 service.setAttributeNode(attribute);
1205
1206 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1207 if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1208 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1209 else if(serviceNames[i].contains("Query")) // search services
1210 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1211 else
1212 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1213 service.setAttributeNode(attribute);
1214
1215 // add the service element to the serviceList element
1216 // <serviceList><service /></serviceList>
1217 serviceList.appendChild(service);
1218 }
1219 return serviceList;
1220 }
1221
1222 /** @return a GS3 response message for a describe services request:
1223 * indicating the list of services supported by the Fedora-Greenstone
1224 * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1225 * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1226 * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1227 * serviceNames. */
1228 public String getServiceList()
1229 {
1230 Document doc = builder.newDocument();
1231 Element serviceList = createServiceList(doc);
1232 // make <serviceList> the body of the responseMessage:
1233 // <message><response><serviceList></response></message>
1234 Element responseMsg = createResponseMessage(doc, serviceList, null,
1235 GSXML.REQUEST_TYPE_DESCRIBE, "");
1236 try {
1237 return FedoraCommons.elementToString(responseMsg);
1238 }catch(TransformerException e) {
1239 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1240 + " " + e;
1241 }
1242 }
1243
1244 /** @return a GS3 describe response message listing the collections and
1245 * collection-specific metadata stored in the Fedora-Greenstone repository. */
1246 public String getCollectionList()
1247 {
1248 Document doc = builder.newDocument();
1249 FedoraGS3RunException ex = null; // any RemoteException
1250
1251 // create the <collectionList /> element
1252 Element collectionList = doc.createElement(
1253 GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1254 try{
1255 String[] collectionNames = this.getCollectionNames(
1256 this.getCollections()); // this line could throw RemoteException
1257 for(int i = 0; i < collectionNames.length; i++) {
1258 // create the <collection name="somename" /> element
1259 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1260 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1261 attribute.setValue(collectionNames[i]);
1262 collection.setAttributeNode(attribute);
1263
1264 // append the <collection> element as child of <collectionList>
1265 collectionList.appendChild(collection);
1266
1267 //if(collection.hasAttribute(GSXML.NAME_ATT))
1268 //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1269 }
1270 } catch(RemoteException e) { // if this happens, perhaps it's because it
1271 // can't find Greenstone collections in fedora repository?
1272 ex = new FedoraGS3RunException(e);
1273 ex.setSpecifics(
1274 "greenstone collections in fedora repository");
1275 }
1276
1277 // make <collectionList> the body of the responseMessage:
1278 // <message><response><collectionList></response></message>
1279 Element responseMsg = createResponseMessage(doc, collectionList, ex,
1280 GSXML.REQUEST_TYPE_DESCRIBE, "");
1281 try{
1282 return FedoraCommons.elementToString(responseMsg);
1283 }catch(TransformerException e) {
1284 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1285 + " " + e;
1286 }
1287 }
1288
1289 /** @return a GS3 describe response message for a collection in the
1290 * Fedora-Greenstone repository.
1291 * @param collectionName - the name of the collection that is to be described.
1292 * It will be converted to a fedora collection pid, which is of the form
1293 * "greenstone:&lt;collectionName&gt;-collection". */
1294 public String describeCollection(String collectionName)
1295 {
1296 Document doc = builder.newDocument();
1297 FedoraGS3RunException ex = null;
1298
1299 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1300 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1301 attribute.setValue(collectionName);
1302 collection.setAttributeNode(attribute);
1303
1304 //<displayItem assigned="true" lang="en" name="name">
1305 //"some display name"</displayItem>
1306 Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1307
1308 attribute = doc.createAttribute(GSXML.LANG_ATT);
1309 attribute.setValue(this.lang);
1310 displayItem.setAttributeNode(attribute);
1311
1312 attribute = doc.createAttribute(GSXML.NAME_ATT);
1313 attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1314 displayItem.setAttributeNode(attribute);
1315
1316 try{
1317 Text textNode = doc.createTextNode(
1318 this.getCollectionTitle(getCollectionPID(collectionName)));
1319 displayItem.appendChild(textNode);
1320 } catch(Exception e) {
1321 // can't find Greenstone collections in fedora repository or problem
1322 // getting their titles from their metadata datastream?
1323 ex = new FedoraGS3RunException(e);
1324 ex.setSpecifics("greenstone collections or their metadata"
1325 + "in the fedora repository");
1326 }
1327 // now append the displayItem element as child of the collection element
1328 collection.appendChild(displayItem);
1329 // get the <serviceList> and add it into the collection description.
1330 // Services for all collections in the FedoraGS3 repository are the
1331 // same, offering a ClassifierBrowse to browse titles by starting letter
1332 // and DocRetrieve services: Content, Metadata and Structure.
1333
1334 Element serviceList = createServiceList(doc);
1335 collection.appendChild(serviceList);
1336
1337 Element responseMsg = createResponseMessage(doc, collection, ex,
1338 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1339 try{
1340 return FedoraCommons.elementToString(responseMsg);
1341 }catch(TransformerException e) {
1342 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1343 + " " + e;
1344 }
1345 }
1346
1347 /** @return a GS3 describe response message for the services of a collection
1348 * in the Fedora-Greenstone repository. So far, these services are the same for
1349 * all fedora collections: they are the services given in member variable
1350 * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1351 * ClassifierBrowseMetadataRetrieve.
1352 * All collections in this Digital Library (Fedora Repository) share the
1353 * same services, so this method returns the same services as getServiceList();
1354 * @param collectionName - the name of the collection whose services are to
1355 * be described. It will be converted to a fedora collection pid, which is of
1356 * the form "greenstone:&lt;collectionName&gt;-collection". */
1357 public String describeCollectionServices(String collectionName)
1358 {
1359 Document doc = builder.newDocument();
1360
1361 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1362 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1363 attribute.setValue(collectionName);
1364 collection.setAttributeNode(attribute);
1365
1366 Element serviceList = createServiceList(doc);
1367 collection.appendChild(serviceList);
1368
1369 Element responseMsg = createResponseMessage(doc, collection, null,
1370 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1371 try{
1372 return FedoraCommons.elementToString(responseMsg);
1373 }catch(TransformerException e) {
1374 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1375 + " " + e;
1376 }
1377 }
1378
1379 /** All collections in this Digital Library (Fedora Repository) share
1380 * the same services, so this method returns the same as
1381 * describeCollectionService(collName, serviceName).
1382 * @return a GS3 describe response message for the requested service
1383 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1384 * return nothing special except their names; browse (and any query)
1385 * return more complex XML responses.
1386 * @param serviceName - the name of the service in the collection which is to
1387 * be described.*/
1388 public String describeService(String serviceName)
1389 {
1390 // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1391 // we return:
1392 // <message><response from="<name>Retrieve" type="describe">
1393 // <service name="<name>Retrieve" type="retrieve" /></response></message>
1394 // But for browse (and any query) service, we return the data necessary
1395 // for displaying it
1396
1397 Document doc = this.builder.newDocument();
1398 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1399 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1400 attribute.setValue(serviceName);
1401 service.setAttributeNode(attribute);
1402
1403 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1404
1405 if(serviceName.toLowerCase().endsWith("retrieve")) {
1406 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1407 }
1408 else if(serviceName.toLowerCase().contains("browse")) {
1409 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1410
1411 // we need name and description <displayItem> elements
1412 Element displayItem
1413 = createNameValuePairElement(doc,
1414 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1415 service.appendChild(displayItem);
1416
1417 displayItem = createNameValuePairElement(doc,
1418 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1419 "Browse pre-defined classification hierarchies");
1420 service.appendChild(displayItem);
1421
1422 // now need a classifierList
1423 Element classifierList = doc.createElement(
1424 GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1425
1426 int classifierNum = 1;
1427 // append a <classifier content="some letter" name="CL#">
1428 // for each letter of the alphabet:
1429 Element classifier = createClassifierElement(doc, "TitleByLetter",
1430 classifierNum++, "titles by letter", "Browse titles by letter");
1431 // now add this <classifier> to the <classifierList>
1432 classifierList.appendChild(classifier);
1433
1434 // ANY MORE CLASSIFIERS? ADD THEM HERE
1435
1436 service.appendChild(classifierList);
1437 } // ELSE check for whether it is a query service
1438 else if(serviceName.toLowerCase().contains("query")) {
1439 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1440 if(serviceName.equals("TextQuery")) {
1441 describeTextQueryService(service);
1442 } else if(serviceName.equals("FieldQuery")) {
1443 describeFieldQueryService(service);
1444 }
1445 }
1446
1447 // don't forget to add the type attribute to the service!
1448 service.setAttributeNode(attribute);
1449
1450 String from = serviceName;
1451
1452 Element responseMsg = createResponseMessage(doc, service, null,
1453 GSXML.REQUEST_TYPE_DESCRIBE, from);
1454 try{
1455 return FedoraCommons.elementToString(responseMsg);
1456 }catch(TransformerException e) {
1457 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1458 + " " + e;
1459 }
1460 }
1461
1462 /** Appends children to the parameter service Element that make the
1463 * final service Element into a describe response XML for FedoraGS3's
1464 * TextQuery service.
1465 * @param service is the service Element that is being filled out. */
1466 protected void describeTextQueryService(Element service) {
1467 Document doc = service.getOwnerDocument();
1468 // we need name, submit (button) and description <displayItem> elements
1469 Element displayItem = createNameValuePairElement(doc,
1470 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1471 "Text Search");
1472 service.appendChild(displayItem);
1473
1474 displayItem = createNameValuePairElement(doc,
1475 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1476 service.appendChild(displayItem);
1477
1478 displayItem = createNameValuePairElement(doc,
1479 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1480 "Title and full-text search service");
1481 service.appendChild(displayItem);
1482
1483 //create the <paramList>
1484 Element paramList = doc.createElement(
1485 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1486
1487 // we ignore granularity to search at: it will always be
1488 // document and section level
1489 // we ignore casefolding: always on (that is, case is irrelevant)
1490 // we ignore document display order: always ranked
1491
1492 // Constructing the following:
1493 // <param default="100" name="maxDocs" type="integer">
1494 // <displayItem name="name">Maximum hits to return</displayItem>
1495 // </param>
1496 Element param = doc.createElement(GSXML.PARAM_ELEM);
1497
1498 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1499 attribute.setValue(MAXDOCS);
1500 param.setAttributeNode(attribute);
1501
1502 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1503 attribute.setValue("100");
1504 param.setAttributeNode(attribute);
1505
1506 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1507 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1508 param.setAttributeNode(attribute);
1509
1510 displayItem = createNameValuePairElement(doc,
1511 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1512 "Maximum hits to return");
1513 param.appendChild(displayItem);
1514
1515 paramList.appendChild(param);
1516
1517 // Constructing the following:
1518 // <param name="query" type="string">
1519 // <displayItem name="name">Query string</displayItem>
1520 // </param>
1521 param = doc.createElement(GSXML.PARAM_ELEM);
1522
1523 attribute = doc.createAttribute(GSXML.NAME_ATT);
1524 attribute.setValue(QUERY);
1525 param.setAttributeNode(attribute);
1526
1527 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1528 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1529 param.setAttributeNode(attribute);
1530
1531 displayItem = createNameValuePairElement(doc,
1532 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1533 "Query string");
1534 param.appendChild(displayItem);
1535
1536 paramList.appendChild(param);
1537
1538 service.appendChild(paramList);
1539 }
1540
1541 /** Appends children to the parameter service Element that make the
1542 * final service Element into a describe response XML for FedoraGS3's
1543 * FieldQuery service.
1544 * @param service is the service Element that is being filled out. */
1545 protected void describeFieldQueryService(Element service) {
1546 Document doc = service.getOwnerDocument();
1547 // we need name, submit (button) and description <displayItem> elements
1548 Element displayItem = createNameValuePairElement(doc,
1549 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1550 "Form Search");
1551 service.appendChild(displayItem);
1552
1553 displayItem = createNameValuePairElement(doc,
1554 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1555 service.appendChild(displayItem);
1556
1557 displayItem = createNameValuePairElement(doc,
1558 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1559 "Simple fielded search");
1560 service.appendChild(displayItem);
1561
1562 //create the <paramList>
1563 Element paramList = doc.createElement(
1564 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1565
1566 // we ignore granularity to search at: it will always be
1567 // document and section level
1568 // we ignore casefolding: always on (that is, case is irrelevant)
1569 // we ignore document display order: always ranked
1570
1571 // Constructing the following:
1572 // <param default="100" name="maxDocs" type="integer">
1573 // <displayItem name="name">Maximum hits to return</displayItem>
1574 // </param>
1575 Element param = doc.createElement(GSXML.PARAM_ELEM);
1576
1577 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1578 attribute.setValue(MAXDOCS);
1579 param.setAttributeNode(attribute);
1580
1581 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1582 attribute.setValue("100");
1583 param.setAttributeNode(attribute);
1584
1585 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1586 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1587 param.setAttributeNode(attribute);
1588
1589 displayItem = createNameValuePairElement(doc,
1590 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1591 "Maximum hits to return");
1592 param.appendChild(displayItem);
1593
1594 paramList.appendChild(param);
1595
1596 // Constructing the following:
1597 // <param name="simpleField" occurs="4" type="multi">
1598 // <displayItem name="name"></displayItem>
1599 //
1600 // <param name="query" type="string">
1601 // <displayItem name="name">Word or phrase </displayItem>
1602 // </param>
1603 //
1604 // <param default="allFields" name="fieldname" type="enum_single">
1605 // <displayItem name="name">in field</displayItem>
1606 //
1607 // <option name="docTitles">
1608 // <displayItem name="name">document titles</displayItem>
1609 // </option>
1610 // <option name="allTitles">
1611 // <displayItem name="name">document and section titles</displayItem>
1612 // </option>
1613 // <option name="fullText">
1614 // <displayItem name="name">full text</displayItem>
1615 // </option>
1616 // <option name="all">
1617 // <displayItem name="name">titles and full text</displayItem>
1618 // </option>
1619 // <option name="">
1620 // <displayItem name="name"></displayItem>
1621 // </option>
1622 // </param>
1623 // </param>
1624 Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1625 attribute = doc.createAttribute(GSXML.NAME_ATT);
1626 attribute.setValue(SIMPLEFIELD_ATT);
1627 rowOfParams.setAttributeNode(attribute);
1628
1629 // we want the row of controls to occur multiple times
1630 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1631 attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1632 rowOfParams.setAttributeNode(attribute);
1633
1634 attribute = doc.createAttribute(OCCURS_ATT);
1635 attribute.setValue("4"); // we want this row to occur 4 times
1636 rowOfParams.setAttributeNode(attribute);
1637
1638 // <param name="query" type="string">
1639 // <displayItem name="name">Word or phrase </displayItem>
1640 // </param>
1641 param = doc.createElement(GSXML.PARAM_ELEM);
1642
1643 attribute = doc.createAttribute(GSXML.NAME_ATT);
1644 attribute.setValue(QUERY);
1645 param.setAttributeNode(attribute);
1646
1647 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1648 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1649 param.setAttributeNode(attribute);
1650
1651 displayItem = createNameValuePairElement(doc,
1652 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1653 "Word or phrase");
1654 param.appendChild(displayItem);
1655 rowOfParams.appendChild(param);
1656
1657 // <param default="allFields" name="fieldName" type="enum_single">
1658 // <displayItem name="name">in field</displayItem>
1659 param = doc.createElement(GSXML.PARAM_ELEM);
1660 attribute = doc.createAttribute(GSXML.NAME_ATT);
1661 attribute.setValue(FIELDNAME_ATT);
1662 param.setAttributeNode(attribute);
1663
1664 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1665 attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1666 param.setAttributeNode(attribute);
1667
1668 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1669 attribute.setValue(ALL_FIELDS);
1670 param.setAttributeNode(attribute);
1671
1672 displayItem = createNameValuePairElement(doc,
1673 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1674 "in field");
1675 param.appendChild(displayItem);
1676
1677 String[] searchFieldNames
1678 = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1679 String[] searchFieldDisplay = {"all titles and full-text",
1680 "document titles only", "document and section titles",
1681 "full-text only"};
1682
1683 // for each fieldName create an option element and insert
1684 // the option into the enum_multi drop-down param:
1685 // <option name="fieldName">
1686 // <displayItem name="name">fieldName</displayItem>
1687 // </option>
1688 for(int i = 0; i < searchFieldNames.length; i++) {
1689 Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1690 attribute = doc.createAttribute(GSXML.NAME_ATT);
1691 attribute.setValue(searchFieldNames[i]);
1692 option.setAttributeNode(attribute);
1693
1694 displayItem = createNameValuePairElement(doc,
1695 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1696 searchFieldDisplay[i]);
1697 option.appendChild(displayItem);
1698 param.appendChild(option); // add option to the drop-down box
1699 }
1700
1701 rowOfParams.appendChild(param);
1702 paramList.appendChild(rowOfParams);
1703 service.appendChild(paramList);
1704 }
1705
1706 /**
1707 * @return a GS3 describe response message for the requested service
1708 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1709 * return nothing special except their names; browse (and any query)
1710 * return more complex XML responses.
1711 * All collections in this Digital Library (Fedora Repository) share
1712 * the same services, so this method returns the same as
1713 * describeService(serviceName).
1714 * @param collectionName - the name of the collection whose service is to
1715 * be described. It will be converted to a fedora collection pid, which is of
1716 * the form "greenstone:&lt;collectionName&gt;-collection".
1717 * @param serviceName - the name of the service in the collection which is to
1718 * be described. */
1719 public String describeCollectionService(String collectionName,
1720 String serviceName) {
1721 // collectionName can be ignored, because all services are FedoraGS3
1722 // services and are not unique to any particular (greenstone) collection.
1723 return describeService(serviceName);
1724 }
1725
1726 /** This method performs the implemented browse operation: allowing the
1727 * user to browse the titles of documents in the given collection by letter
1728 * and returning the results.
1729 * @param collectionName is the name of the collection whose documents
1730 * starting with the given letter will be returned.
1731 * @param classifierIDs are the ids of the classifiers on which to browse. In
1732 * this case, the classifier indicates whether we browse titles by letter, or
1733 * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
1734 * @param structures - the requested browse substructure. Can be any combination
1735 * of ancestors, parent, siblings, children, descendants.
1736 * @param infos - the requested structural info. Can be numSiblings,
1737 * siblingPosition, numChildren.
1738 * @return a GS3 ClassifierBrowse response message which lists all
1739 * the documents that start with the letter indicated by parameter classifier.
1740 */
1741 public String browse(String collectionName, String[] classifierIDs,
1742 String[] structures, String[] infos)
1743 {
1744 // Construct one string from the structures and structural info arrays
1745 String structure = "";
1746 String info = "";
1747 for(int i = 0; i < structures.length; i++) {
1748 structure = structure + structures[i] + "|";
1749 }
1750 for(int i = 0; i < infos.length; i++) {
1751 info = info + infos[i] + "|";
1752 }
1753
1754 Document doc = builder.newDocument();
1755 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1756
1757 // <classifierNodeList>
1758 Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1759
1760 for(int i = 0; i < classifierIDs.length; i++) {
1761 if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1762 browseTitlesByLetterClassifier(doc, classifierNodeList,
1763 collectionName, classifierIDs[i],
1764 structure, info);
1765 }
1766 }
1767
1768 Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1769 GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
1770 try {
1771 return FedoraCommons.elementToString(responseMsg);
1772 } catch(TransformerException e) {
1773 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1774 + " " + e;
1775 }
1776 }
1777
1778 /** CL1 browsing classifier: browsing titles by starting letter.
1779 * The browsing structure is retrieved.
1780 * @param doc - the document object that will contain the CL1 browsing structure.
1781 * @param classifierNodeList - the classifiers will be added to this nodeList.
1782 * @param collectionName - name of the collection through which we are browsing CL1.
1783 * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1784 * a letter.
1785 * @param structure - the requested browse substructure. Can be any combination
1786 * of ancestors, parent, siblings, children, descendants. siblings not yet implemented.
1787 * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1788 * numChildren.
1789 * @return the classifierNodeList with the CL1 classifier browse structure.
1790 */
1791 public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1792 String collectionName, String classifierID,
1793 String structure, String info)
1794 {
1795 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1796
1797 // TODO
1798 if(structure.indexOf("siblings") != -1) {
1799 LOG.error("Structure: siblings. Not yet implemented\n");
1800 }
1801
1802 if(structure.indexOf("entire") != -1) {
1803 structure = structure + "ancestors|descendants";
1804 }
1805
1806 // Structure of ancestors and children only at this stage
1807 int firstLevel = classifierID.indexOf('.');
1808 int secondLevel = classifierID.lastIndexOf('.');
1809
1810 // <nodeStructure>
1811 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1812
1813 // requested classifier node
1814 Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1815 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1816 attribute.setValue(classifierID);
1817 classNode.setAttributeNode(attribute);
1818
1819 if(firstLevel == -1) { // CL1 - toplevel node
1820 Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1821
1822 classifierNodeList.appendChild(classNode);
1823 classNode.appendChild(nodeStructure);
1824
1825 nodeStructure.appendChild(root);
1826 if(structure.indexOf("descendants") != -1) {
1827 getTitlesByLetterStructure(collectionName, root, classifierID, true);
1828 } else if(structure.indexOf("children") != -1) {
1829 getTitlesByLetterStructure(collectionName, root, classifierID, false);
1830 }
1831 }
1832 else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1833
1834 if(structure.indexOf("parent") != -1 || structure.indexOf("ancestors") != -1) {
1835 String toplevelID = classifierID.substring(0, firstLevel);
1836 Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1837 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1838 attribute.setValue(toplevelID);
1839 toplevelNode.setAttributeNode(attribute);
1840 Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1841
1842 classifierNodeList.appendChild(toplevelNode);
1843 toplevelNode.appendChild(nodeStructure);
1844
1845 nodeStructure.appendChild(node);
1846 node.appendChild(classNode);
1847 } else {
1848 Element node = (Element)classNode.cloneNode(true);
1849 classifierNodeList.appendChild(node);
1850 node.appendChild(nodeStructure);
1851 nodeStructure.appendChild(classNode);
1852 }
1853
1854 int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1855 char ch = (char)(num - 1 + 'A');
1856 if(structure.indexOf("descendants") != -1) {
1857 getTitlesForLetter(ch, collectionName, classNode, "descendants");
1858 } else if(structure.indexOf("children") != -1) {
1859 getTitlesForLetter(ch, collectionName, classNode, "children");
1860 }
1861 }
1862 else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1863 LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1864 }
1865
1866 return classifierNodeList;
1867 }
1868
1869 /** Creates a (CL1) subclassifier element for the docs whose titles start with
1870 * the given letter.
1871 * @param ch - the starting letter of the document titles to retrieve.
1872 * @param collectionName - name of the collection through which we are browsing CL1.
1873 * @param classifierNode - the docNodes found will be appended to this node.
1874 * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1875 * gets descendants of any documents found, otherwise gets just the children.
1876 * @return the given classifierNode which will have the child (or descendant) documents
1877 * appended to it.
1878 */
1879 public Element getTitlesForLetter(char ch, String collectionName,
1880 Element classifierNode, String depthStructure)
1881 {
1882 Document doc = classifierNode.getOwnerDocument();
1883 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1884
1885
1886 // Retrieve the document structure for each subClassifierID:
1887 // all the documents that begin with its letter.
1888 String letter = String.valueOf(ch);
1889 try {
1890 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1891 if(docPIDs.length == 0) {
1892 return classifierNode; // skip letters that don't have any kids
1893 }
1894
1895 for(int i = 0; i < docPIDs.length; i++) {
1896 // work out the document's fedora PID and section ID
1897 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1898 String docPID = getDocPIDFromDocID(docPIDs[i]);
1899
1900 // get the required section, along with children or descendants
1901 Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1902
1903 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1904 Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1905
1906 // fills in the subtree of the rootNode in our nodeStructure element
1907 createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1908 classifierNode.appendChild(docRootNode);
1909 }
1910 } catch(Exception e) {
1911 ex = new FedoraGS3RunException(e);
1912 ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1913 }
1914
1915 return classifierNode;
1916 }
1917
1918
1919 /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1920 * starting letter of the alphabet. X is each letter of the alphabet for which there
1921 * are matching document titles.
1922 * @param collectionName - name of the collection through which we are browsing CL1.
1923 * @param classifierNode - the docNodes found will be appended to this node.
1924 * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1925 * the IDs for the subclassifiers (CL.x).
1926 * @param getDescendants - if true, get descendants of any documents found, otherwise
1927 * get just the children.
1928 * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1929 * the alphabet that are represented in the document titles.
1930 */
1931 public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1932 String classifierID, boolean getDescendants)
1933 {
1934 Document doc = classifierNode.getOwnerDocument();
1935 FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1936
1937 // We're going to loop to the end of the alphabet
1938 int count = 1;
1939 for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1940 // Retrieve the document structure for each subClassifierID:
1941 // all the documents that begin with its letter.
1942 String letter = String.valueOf(ch);
1943 try {
1944 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1945 if(docPIDs.length == 0) {
1946 continue; // skip letters that don't have any kids
1947 }
1948
1949 // <classifierNode nodeID="CL1.x">
1950 Element subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
1951 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1952 attribute.setValue(classifierID+"."+count);
1953 subClassifier.setAttributeNode(attribute);
1954 classifierNode.appendChild(subClassifier);
1955
1956 if(getDescendants) { // get the documents
1957
1958 // append the <docNodes> for the docPIDs found as children
1959 // of subclassifier
1960
1961 for(int i = 0; i < docPIDs.length; i++) {
1962 // work out the document's fedora PID and section ID
1963 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1964 String docPID = getDocPIDFromDocID(docPIDs[i]);
1965
1966 // get the required section, along with children or descendants
1967 Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
1968
1969 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1970 Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
1971
1972 // fills in the subtree of the rootNode in our nodeStructure element
1973 createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
1974 subClassifier.appendChild(rootNode);
1975 }
1976 }
1977 } catch(Exception e) {
1978 ex = new FedoraGS3RunException(e);
1979 ex.setSpecifics("requested portion of TOC file or "
1980 + "trouble with fielded search ");
1981 }
1982 }
1983 return classifierNode;
1984 }
1985
1986
1987 /** This method performs something equivalent to a greenstone3
1988 * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
1989 * @param classNodeIDs array of classifierNode IDs for which the metadata
1990 * needs to be returned.
1991 * @param metafields are the classifier metadata fields that are to be returned.
1992 * At present this method ignores them/pretends the requested metafields are
1993 * "all" and always returns the Title meta for the requested classifier nodes
1994 * (because that is all the metadata this Fedora classifier has at present).
1995 * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
1996 * lists the metadata for all the classifierNodes passed as parameter.*/
1997 public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
1998 {
1999 Document doc = this.builder.newDocument();
2000 // <classifierNodeList>
2001 Element classifierNodeList = doc.createElement(
2002 GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2003
2004 // create <classifierNode><metadataList><metadata>s
2005 // </metadataList></classifierNode> for all letters of the alphabet
2006 for(int i = 0; i < classNodeIDs.length; i++) {
2007 // strip ID of everything before the first '.' (i.e. remove "CL#.")
2008 int index = classNodeIDs[i].indexOf('.');
2009 String subClassifierNumber = classNodeIDs[i].substring(index+1);
2010 index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2011 if(index != -1) {
2012 subClassifierNumber = subClassifierNumber.substring(0, index);
2013 }
2014 int subClassifierNum = Integer.parseInt(subClassifierNumber);
2015 String classifierName = "";
2016 if(subClassifierNum == 0) { // no document titles started with a letter
2017 classifierName = "A-Z";
2018 } else {
2019 char letter = (char)('A' + subClassifierNum - 1); // A = 1
2020 classifierName = String.valueOf(letter);
2021 }
2022
2023 // <classifierNode nodeID="CL#.subNum">
2024 Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2025 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2026 attribute.setValue(classNodeIDs[i]);
2027 classifierNode.setAttributeNode(attribute);
2028
2029 // <metadataList>
2030 Element metadataList = doc.createElement(
2031 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2032
2033 // at least one metadata element: that of the title of this
2034 // classifierNode:
2035 // <metadata name="Title">letter</metadata>
2036 Element metadata = this.createNameValuePairElement(doc,
2037 GSXML.METADATA_ELEM, "Title", classifierName);
2038
2039 // now connect up everything
2040 metadataList.appendChild(metadata);
2041 classifierNode.appendChild(metadataList);
2042 classifierNodeList.appendChild(classifierNode);
2043 }
2044
2045 Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2046 GSXML.REQUEST_TYPE_PROCESS, //collName +
2047 "ClassifierBrowseMetadataRetrieve");
2048 try{
2049 return FedoraCommons.elementToString(responseMsg);
2050 }catch(TransformerException e) {
2051 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2052 + " " + e;
2053 }
2054 }
2055
2056 /** @return a newly created element of the following format:
2057 * &lt;classifier content="somecontent" name="CL+num"&gt;
2058 * &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2059 * &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2060 * &lt;/classifier&gt;
2061 * @param doc - the document used to create the element
2062 * @param content - value of the content attribute
2063 * @param classifierNum - the number suffixed to the CL, together forming
2064 * the classifier Node's ID
2065 * @param displayNameVal is the bodytext of a named displayItem element
2066 * @param displayDescrVal is the bodytext of a displayItem element with
2067 * description */
2068 protected Element createClassifierElement(Document doc, String content,
2069 int classifierNum, String displayNameVal, String displayDescrVal)
2070 {
2071 final String CL = "CL";
2072 Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2073 // content attribute
2074 Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2075 att.setValue(content);
2076 classifier.setAttributeNode(att);
2077 // name attribute
2078 att = doc.createAttribute(GSXML.NAME_ATT);
2079 att.setValue(CL + classifierNum);
2080 classifier.setAttributeNode(att);
2081
2082 // now create the displayItem children for classifier:
2083 // <displayItem name="name">#letter</displayItem>
2084 // <displayItem name="description">Browse titles starting with #letter</displayItem>
2085 Element displayItem = createNameValuePairElement(doc,
2086 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2087 classifier.appendChild(displayItem);
2088 displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2089 GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2090 classifier.appendChild(displayItem);
2091
2092 return classifier;
2093 }
2094
2095
2096 /** @return a newly created element of the following format:
2097 * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2098 * @param doc - the document used to create the element
2099 * @param elementName - the tag name
2100 * @param name - value of attribute name
2101 * @param value - the body text of the element */
2102 protected Element createNameValuePairElement(Document doc, String elementName,
2103 String name, String value) {
2104 // <elementName name="somename">"some display value"</elementName>
2105 Element element = doc.createElement(elementName);
2106 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2107 attribute.setValue(name);
2108 element.setAttributeNode(attribute);
2109
2110 element.appendChild(doc.createTextNode(value));
2111 return element;
2112 }
2113
2114 /**
2115 * @param collection is the collection to search in
2116 * @param query is the query term to search for. It won't specify the
2117 * indexed field to search in, which will mean that GSearch will
2118 * search all default indexed fields.
2119 * @param maxDocs is the maximum number of results to return (which
2120 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2121 */
2122 public String[] textQuery(String collection, String query,
2123 int maxDocs)
2124 throws Exception
2125 {
2126 // no need to search there is no query or query is empty spaces
2127 if(query.trim().equals(""))
2128 return new String[]{};
2129
2130 // QUERY value won't specify indexed field to search, Fedora
2131 // Gsearch will take that as meaning all default indexed fields.
2132 // Params to search() method below: string of fielded query terms;
2133 // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2134 query = query + " " + "PID" + COLON + GREENSTONE;
2135
2136 String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2137 // now we have the XML returned by FedoraGSearch, get the pids
2138 // of the documents returned (if any)
2139 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2140 collection, searchResult);
2141 return pids;
2142 }
2143
2144 /**
2145 * This method performs a fieldquery, searching for x number of phrases
2146 * in each of the 4 indexed fields.
2147 * @param collection is the collection to search in
2148 * @param nameValParamsMap is a Map of several(key, value) entries,
2149 * 4 of which we're concerned with here:
2150 * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2151 * - the values are a comma separated list of terms (phrases or single
2152 * words) to search that field in. There may be more than 1 or
2153 * there may be none (in which case there may be N empty values or
2154 * spaces separated by commas).
2155 * @param maxDocs is the maximum number of results to return (which
2156 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2157 * */
2158 public String[] fieldQuery(String collection, Map nameValParamsMap,
2159 int maxDocs)
2160 throws Exception
2161 {
2162 // we're going to maintain a list of UNIQUE pids that were returned
2163 // in search results. Hence we use Set:
2164 java.util.Set set = new java.util.HashSet();
2165
2166 // (1) Use Fedora's search to search document titles, if they were
2167 // specified:
2168 String[] docTitlepids = {};
2169
2170 String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2171 if(docTitleTerms != null) { // no doc titles may have been specified
2172 String[] phrases = docTitleTerms.split(COMMA);
2173
2174 // search the individual phrases first:
2175 for(int i = 0; i < phrases.length; i++) {
2176 if(phrases.equals("") || phrases.equals(" "))
2177 continue; //skip when there are no terms
2178 docTitlepids = this.searchDocumentTitles(
2179 collection, phrases[i], false);
2180 for(int j = 0; j < docTitlepids.length; j++)
2181 set.add(docTitlepids[j]);
2182 }
2183 }
2184 // (2) use FedoraGSearch to search doc AND section titles, and
2185 // fulltext (in case these were specified in nameValParamsMap):
2186 String searchResult = this.fedoraGSearch.search(
2187 nameValParamsMap, 1, maxDocs);
2188
2189 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2190 collection, searchResult);
2191
2192 for(int i = 0; i < pids.length; i++)
2193 set.add(pids[i]);
2194
2195 pids = null;
2196 pids = new String[set.size()];
2197 set.toArray(pids); // unique pids
2198 return pids;
2199 }
2200
2201 /** @return a String representing Greenstone3 XML for a query process
2202 * response returning the results for the query denoted by parameter
2203 * nameValParamsMap.
2204 * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2205 * query field data values. The names match the field names that
2206 * describeCollectionService() would have returned for the query service.
2207 * @param collection is the name of the collection
2208 * @param service is the name of the query service
2209 * This method is only ever called when any of the services in the digital
2210 * library described themselves as type=query. Therefore any digital
2211 * libraries that have no query services, can just return emtpy message
2212 * strings (or even "") since this method will never be called on them
2213 * anyway. */
2214 public String query(String collection, String service,
2215 Map nameValParamsMap)
2216 {
2217 FedoraGS3RunException ex = null;
2218 // (1) obtain the requested number of maximum result documents
2219 int maxDocs = 100;
2220 try{
2221 maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2222 } catch(NumberFormatException e) {
2223 maxDocs = 100;
2224 }
2225
2226 String pids[] = {};
2227 // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2228 if(service.endsWith("TextQuery")) {
2229 try {
2230 // get the Query field:
2231 String query = (String)nameValParamsMap.get(QUERY);
2232 pids = textQuery(collection, query, maxDocs);
2233 }
2234 catch(Exception e) {
2235 LOG.error("Error in TextQuery processing: " + e);
2236 ex = new FedoraGS3RunException(
2237 "When trying to use FedoraGenericSearch for a TextQuery", e);
2238
2239 }
2240 } else { // (3) FieldQuery
2241 // first get the comma-separated lists
2242 String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2243 String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2244 // both are comma separated lists, so split both on 'comma'
2245 String[] fieldNames = listOfFieldNames.split(COMMA);
2246 String[] searchTerms = listOfSearchTerms.split(COMMA);
2247
2248 // In the fieldNames and searchTerms lists of nameValParamsMap,
2249 // each searchTerm element was matched with its correspondingly
2250 // indexed fieldName.
2251 // A new map is going to reorganise this, by putting all terms
2252 // for a particular fieldName together in a comma separated list
2253 // and associating that with the fieldName. I.e. (key, value) ->
2254 // (fieldName, comma-separated list of all terms in that field)
2255 Map map = new HashMap();
2256 for(int i = 0; i < searchTerms.length; i++) {
2257 // there may be fewer searchTerms than fieldNames (since some
2258 // fieldNames may have been left empty), so loop on searchTerms
2259 if(map.containsKey(fieldNames[i])) { // fieldName is already
2260 // in the list, so append comma with new value
2261 String termsList = (String)map.get(fieldNames[i]);
2262 termsList = termsList + COMMA + searchTerms[i];
2263 map.put(fieldNames[i], termsList);
2264 } else { // this is the first time this fieldName occurred
2265 // just put the fieldName with searchTerm as-is
2266 map.put(fieldNames[i], searchTerms[i]);
2267 }
2268 }
2269
2270 try {
2271 // For fieldquery, we search on all the fieldNames specified
2272 // - if DOC_TITLES is specified then we use Fedora's search
2273 // - for all other fieldNames specified, we use FedoraGSearch
2274 pids = fieldQuery(collection, map, maxDocs);
2275 }
2276 catch(Exception e) {
2277 LOG.error("Error in FieldQuery processing: " + e);
2278 ex = new FedoraGS3RunException(
2279 "When trying to use FedoraGenericSearch for a FieldQuery", e);
2280 }
2281 }
2282
2283 // Build Greenstone XML Query response message for from
2284 // the pids (which should be document identifiers)
2285 Document doc = builder.newDocument();
2286 // <metadataList><metadata name="numDocsMatched" value="n" />
2287 // </metadataList>
2288 Element metadataList = doc.createElement(
2289 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2290 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2291
2292 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2293 attribute.setValue(NUM_DOCS_MATCHED);
2294 metadata.setAttributeNode(attribute);
2295
2296 attribute = doc.createAttribute(GSXML.VALUE_ATT);
2297 attribute.setValue(Integer.toString(pids.length));
2298 metadata.setAttributeNode(attribute);
2299
2300 metadataList.appendChild(metadata);
2301
2302 // <documentNodeList>
2303 // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2304 // docType='hierarchy' nodeType="leaf" />
2305 // ...
2306 // ...
2307 // </documentNodeList>
2308 Element docNodeList = doc.createElement(
2309 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2310 // for each
2311 for(int i = 0; i < pids.length; i++) {
2312 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2313 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2314 attribute.setValue(pids[i]);
2315 docNode.setAttributeNode(attribute);
2316
2317 attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2318 attribute.setValue("hierarchy");
2319 docNode.setAttributeNode(attribute);
2320
2321 attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2322 attribute.setValue("root");
2323 docNode.setAttributeNode(attribute);
2324 docNodeList.appendChild(docNode);
2325 }
2326
2327 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2328 GSXML.REQUEST_TYPE_PROCESS, service);
2329 try{
2330 return FedoraCommons.elementToString(responseMsg);
2331 }catch(TransformerException e) {
2332 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2333 + " " + e;
2334 }
2335 }
2336
2337 public static void main(String args[]) {
2338 try{
2339 // testing default constructor
2340 //FedoraGS3Connection con = new FedoraGS3Connection();
2341
2342 // testing constructor that takes properties file to show initial
2343 // fedora server values
2344 java.io.File propertyFilename
2345 = new java.io.File("fedoraGS3.properties");
2346 FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2347
2348 // DESCRIBE: serviceList, collectionList
2349 System.out.println("serviceList:\n" + con.getServiceList());
2350
2351 System.out.println("collectionList:\n" + con.getCollectionList());
2352
2353 String[] colPIDs = con.getCollections();
2354 String[] collectionNames = con.getCollectionNames(con.getCollections());
2355
2356
2357 for(int i = 0; i < collectionNames.length; i++) {
2358 System.out.println("Describing collections:\n");
2359 System.out.println(con.describeCollection(collectionNames[i]));
2360 System.out.println("Describing collection services:\n"
2361 + con.describeCollectionServices(collectionNames[i]));
2362 }
2363
2364 String[] serviceNames = con.getServiceNames();
2365 for(int i = 0; i < serviceNames.length; i++) {
2366 System.out.println("Describing " + serviceNames[i] + ":\n"
2367 + con.describeCollectionService("demo", serviceNames[i]));
2368 }
2369
2370
2371 // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2372 // along with EX of the top-level document:
2373 System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2374 System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2375
2376
2377 String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2378 System.out.println("\nGET CONTENT:");
2379 for(int i = 0; i < docIDs.length; i++) {
2380 System.out.println(con.getContent(docIDs[i]));
2381 }
2382
2383 System.out.println("\nGET META:");
2384 for(int i = 0; i < docIDs.length; i++) {
2385 System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2386 }
2387
2388 String[] getTitlesFor = {
2389 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2390 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2391 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2392 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2393 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2394 };
2395
2396 // first let's display the regular meta for top-level docs and
2397 // their sections
2398 for(int i = 0; i < getTitlesFor.length; i++) {
2399 System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2400 }
2401
2402 System.out.println("\nTitles are:");
2403 System.out.println(con.getTitleMetadata(getTitlesFor));
2404
2405 System.out.println("\nGET STRUCTURE:");
2406 for(int i = 0; i < docIDs.length; i++) {
2407 System.out.println("Descendents and numChildren:\n"
2408 + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {"numChildren"}));
2409 System.out.println("Parent and numSiblings:\n"
2410 + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {"numSiblings"}));
2411 }
2412
2413 // TEST ERROR CASES:
2414 System.out.println("\nTESTING ERROR CASES");
2415 System.out.println(con.getContent("greenstone:demo-pinky"));
2416 String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2417 "greenstone:demo-pinky" };
2418 System.out.println(con.getContent(errorCases));
2419 System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2420 System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {"numChildren"}));
2421
2422 System.out.println("\nCLASSIFIER BROWSE");
2423 System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2424 new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2425
2426 System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2427 String[] classNodeIDs = new String[26];
2428 for(int i = 0; i < classNodeIDs.length; i++) {
2429 int subClassifierNum = i + 1;
2430 classNodeIDs[i] = "CL1." + subClassifierNum;
2431 }
2432 System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2433 classNodeIDs, new String[]{"all"}));
2434
2435 System.out.println("Testing query services");
2436 System.out.println("TEXT QUERY:");
2437 Map formControlValsMap = new HashMap();
2438 formControlValsMap.put(MAXDOCS, "100");
2439 formControlValsMap.put(QUERY, "snails");
2440 String searchResponse
2441 = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2442 System.out.println(searchResponse);
2443
2444 System.out.println("FIELD QUERY:");
2445 formControlValsMap.clear();
2446 formControlValsMap.put(MAXDOCS, "100");
2447 formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2448 formControlValsMap.put(FIELDNAME_ATT,
2449 "allFields,docTitles,allFields,allFields");
2450 searchResponse
2451 = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2452 System.out.println(searchResponse);
2453
2454 System.exit(0);
2455 }catch(Exception e) {
2456 JOptionPane.showMessageDialog(
2457 null, e, "Error", JOptionPane.ERROR_MESSAGE);
2458 //System.err.println("ERROR: " + e);
2459 e.printStackTrace();
2460 }
2461 }
2462}
Note: See TracBrowser for help on using the repository browser.