source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26171

Last change on this file since 26171 was 26171, checked in by ak19, 12 years ago

Uncommitted changes from ages back to fedoraGS3 classes to get greenstone to work as an interface to fedora repository backend.

File size: 104.7 KB
Line 
1/**
2 *#########################################################################
3 * FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4 * of the Greenstone digital library suite from the New Zealand Digital
5 * Library Project at the * University of Waikato, New Zealand.
6 * <BR><BR>
7 * Copyright (C) 2008 New Zealand Digital Library Project
8 * <BR><BR>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 * <BR><BR>
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *########################################################################
19 */
20
21package org.greenstone.fedora.services;
22
23
24import java.io.StringReader;
25
26import org.apache.log4j.Logger;
27import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31import org.greenstone.gsdl3.util.GSXML;
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.Attr;
35import org.w3c.dom.Text;
36import org.w3c.dom.NodeList;
37import org.w3c.dom.Node;
38import org.xml.sax.InputSource;
39
40import java.io.File;
41import java.util.HashMap;
42import java.util.Properties;
43import java.util.Map;
44
45import javax.swing.JOptionPane;
46
47import org.xml.sax.SAXException;
48import java.io.UnsupportedEncodingException;
49import java.io.IOException;
50import javax.net.ssl.SSLHandshakeException;
51import java.net.ConnectException;
52import java.net.MalformedURLException;
53import java.rmi.RemoteException;
54import javax.xml.parsers.ParserConfigurationException;
55import javax.xml.transform.TransformerException;
56
57/**
58 * Class that extends FedoraConnection in order to be able to use
59 * Fedora's web services to retrieve the specific datastreams of
60 * Greenstone documents stored in Fedora's repository. This class
61 * provides methods that convert those datastreams into Greenstone3
62 * XML response messages which are returned.
63 * @author ak19
64*/
65public class FedoraGS3Connection
66 extends FedoraConnection implements FedoraToGS3Interface,
67 FedoraToGS3Interface.Constants
68{
69 /** The logging instance for this class */
70 private static final Logger LOG = Logger.getLogger(
71 FedoraGS3Connection.class.getName());
72
73 /** Default name of Fedora index */
74 private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
75
76 /** Complete list of services that are supported our FedoraGS3 would
77 * support if everything goes well. If a connection to FedoraGSearch
78 * cannot be established, the query services will no longer be
79 * available. The actual services supported are given by member
80 * variable serviceNames. */
81 protected static final String[] SERVICES = {
82 "DocumentContentRetrieve", "DocumentMetadataRetrieve",
83 "DocumentStructureRetrieve",
84 "TextQuery", "FieldQuery",
85 "ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
86 };
87
88 /** List of services actually supported by our FedoraGS3 repository
89 * after construction. If FedoraGenericSearch can't be connected to,
90 * then query services will not be offered */
91 protected String[] serviceNames;
92
93 /** The object used to connect to FedoraGenericSearch, which is used
94 * for doing full-text searching */
95 protected GSearchConnection fedoraGSearch;
96
97 /** The url for the wsdl file of FedoraGSearch's web services
98 * by default this will be the Fedora server's base URL
99 * concatenated to "gsearch/services/FgsOperations?wsdl" */
100 protected String gSearchWSDLURL;
101
102 /** The last part of the gSearchWSDL URL. The first part is
103 * the same as the fedora server's base url. */
104 protected String gSearchWSDLSuffix;
105
106 /** The name of the index that FedoraGSearch will index the GS3
107 * documents into. If no name is specified in the properties file,
108 * this will default to FedoraIndex. */
109 protected String gSearchIndexName;
110
111 /** 5 argument constructor is the same as that of superclass FedoraConnection:
112 * @param protocol can be either http or https
113 * @param host is the host where the fedora server is listening
114 * @param port is the port where the fedora server is listening
115 * @param fedoraServerUsername is the username for administrative
116 * authentication required to access the fedora server.
117 * @param fedoraServerPassword is the password for administrative
118 * authentication required to access the fedora server. If no password was set
119 * when installing Fedora, leave the field "".
120 * Instantiates a FedoraGS3Connection object which connects to Fedora's
121 * web services through stub classes and tries to connect to FedoraGSearch's
122 * web services through the default WSDL location for it
123 * ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
124 * call setGSearchWSDLURL(url) after the constructor instead.
125 */
126 public FedoraGS3Connection(String protocol, String host, int port,
127 String fedoraServerUsername, String fedoraServerPassword)
128 throws ParserConfigurationException, MalformedURLException,
129 SSLHandshakeException, RemoteException, AuthenticationFailedException,
130 NotAFedoraServerException, ConnectException, Exception
131 {
132 super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
133 // super() will call setInitialisationProperties(properties)
134 // And that will try to instantiate the GSearchConnection.
135 }
136
137 /** No-argument constructor which is the same as that of superclass
138 * FedoraConnection: it displays a small dialog requesting input for the
139 * host, port, administrative password and username of the fedora server.
140 * If no password was set on the fedora repository when installing it,
141 * the user can leave the password field blank. */
142 public FedoraGS3Connection()
143 throws ParserConfigurationException, MalformedURLException,
144 CancelledException, ConnectException, RemoteException,
145 SSLHandshakeException, Exception
146 {
147 super();
148 // super() will call setInitialisationProperties(properties)
149 // And that will try to instantiate the GSearchConnection.
150 }
151
152 /** Single-argument constructor which is the same as that of superclass
153 * FedoraConnection: it takes the name of the properties file where
154 * connection initialisation values may already be provided and then
155 * displays a small dialog requesting input for the host, port,
156 * administrative password and username of the fedora server showing
157 * the values in the properties file as default. If the necessary
158 * initialisation are not present in the file, the corresponding fields
159 * in the dialog will be blank.
160 * If no password was set on the fedora repository when installing it,
161 * the user can leave the password field blank. */
162 public FedoraGS3Connection(File propertiesFilename)
163 throws ParserConfigurationException, MalformedURLException,
164 CancelledException, ConnectException, RemoteException,
165 SSLHandshakeException, Exception
166 {
167 super(propertiesFilename);
168 // super() will call setInitialisationProperties(properties)
169 // And that will try to instantiate the GSearchConnection.
170 }
171
172 /** The superclass constructor calls this method passing any preset
173 * properties loaded from a propertiesFile. This method is overridden
174 * here in order to instantiate the gSearchConnection based on the
175 * - gSearchWSDLSuffix that will be appended to the fedora base url.
176 * (If one was not provided in the properties file, gSearchWSDLURL defaults
177 * to something of the form
178 * "http://&lt;fedorahost:port&gt;/fedoragsearch/services/FgsOperations?wsdl"
179 * which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
180 * "gsearch/services/FgsOperations?wsdl".
181 * - name of the index into which the GS3 documents have been indexed
182 * and which FedoraGenericSearch should use to perform searches. If none is
183 * given in the properties file, then the index name defaults to "FedoraIndex".
184 * @param properties is the Properties Map loaded from a properties file
185 * (if there was any) which specifies such things as host and port of the
186 * FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
187 * At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
188 * to whatever the final value of this.gSearchWSDLURL' suffix is, and
189 * "gsearch.indexName" will be set to to whatever the final value of
190 * this.gSearchIndexName is.
191 */
192 protected void setInitialisationProperties(Properties properties)
193 throws ParserConfigurationException, MalformedURLException,
194 CancelledException, ConnectException, RemoteException,
195 SSLHandshakeException, Exception
196 {
197 super.setInitialisationProperties(properties);
198 // gsearchWSDL URL suffix, if not specified, defaults to
199 // "fedoragsearch/services/FgsOperations?wsdl" which is
200 // concatenated to the baseURL of fedora to give the gsearchWSDLURL.
201 this.gSearchWSDLSuffix = properties.getProperty(
202 "gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
203 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
204 // Set the property to whatever this.gSearchWSDLURL is now,
205 // so that it will be written out to the properties file again
206 properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
207
208 // Similarly for the name of the index FedoraGenericSearch should use
209 // when performing searches for GS3 docs stored in Fedora's repository.
210 this.gSearchIndexName = properties.getProperty(
211 "gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
212 properties.setProperty("gsearch.indexName", this.gSearchIndexName);
213 // Create a connection to FedoraGSearch's web services:
214 initSearchFunctionality();
215 }
216
217 /** Overridden init method to work with the 5 argument constructor, so that we can
218 * bypass using setInitialisationProperties() which works with a Properties map.
219 */
220 protected void init(String protocol, String host, String port,
221 String fedoraServerUsername, String fedoraServerPassword)
222 throws ParserConfigurationException, MalformedURLException,
223 AuthenticationFailedException, RemoteException, Exception
224 {
225 super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
226 this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
227 this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
228 this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
229 initSearchFunctionality();
230 }
231
232
233 /** Init method that instantiates a GSearchConnection object used
234 * to work with the separate FedoraGSearch web services.
235 * The url of the WSDL for FedoraGSearch's web services is worked out
236 * from the baseURL of the Fedora server.
237 */
238 protected void initSearchFunctionality()
239 {
240 try {
241 this.fedoraGSearch = null;
242 this.fedoraGSearch = new GSearchConnection(
243 gSearchWSDLURL, gSearchIndexName);
244 this.serviceNames = SERVICES;
245 } catch(Exception e){
246 LOG.error("Cannot connect to FedoraGSearch's web services at "
247 + gSearchWSDLURL + "\nQuery services will not be available.");
248 // If an exception occurs, something has gone wrong when
249 // trying to connect to FedoraGSearch's web services. This
250 // means, we can't offer query services, as that's provided
251 // by FedoraGSearch
252 serviceNames = null;
253 int countOfNonQueryServices = 0;
254 for(int i = 0; i < SERVICES.length; i++) {
255 // do not count query services
256 if(!SERVICES[i].toLowerCase().contains("query")) {
257 countOfNonQueryServices++;
258 }
259 }
260 // Services now supported are everything except Query services
261 serviceNames = new String[countOfNonQueryServices];
262 int j = 0;
263 for(int i = 0; i < SERVICES.length; i++) {
264 if(!SERVICES[i].toLowerCase().contains("query")) {
265 serviceNames[j] = SERVICES[i];
266 j++; // valid serviceName, so increment serviceName counter
267 }
268
269 }
270 }
271 }
272
273 /** @return the gSearchWSDLURL, the url of the WSDL for the
274 * FedoraGSearch web services */
275 public String getGSearchWSDLURL() { return gSearchWSDLURL; }
276
277 /** Sets the member variable gSearchWSDLURL that specify the location of
278 * the WSDL file of FedoraGSearch's web services. Then it attempts
279 * to instantiate a connection to those web services.
280 * @param url is the new url of the GSearch web services WSDL file */
281 public void setGSearchWSDLURL(String url) {
282 this.gSearchWSDLURL = url;
283 initSearchFunctionality();
284 }
285
286 /** @return the gSearchIndexName, the name of the index Fedora Generic
287 * Search will search in (where GS3 docs have been indexed into). */
288 public String getGSearchIndexName() { return gSearchIndexName; }
289
290 /** Sets the member variable gSearchIndexName that specifies the name
291 * of the index containing indexed GS3 documents. Then it attempts
292 * to instantiate a connection to the Fedora GSearch web services using
293 * this changed value for indexName.
294 * @param indexName is the new name of the index containing indexed GS3
295 * docs that GSearch should search in. */
296 public void setGSearchIndexName(String indexName) {
297 this.gSearchIndexName = indexName;
298 initSearchFunctionality();
299 }
300
301 /** @return the array of the services actually supported by FedoraGS3 */
302 protected String[] getServiceNames() { return this.serviceNames;}
303
304 /**
305 * For finding out if the sectionNumber is given as part of the docID.
306 * @param docID is the String that contains the docPID and may also
307 * contain the section number.
308 * @return true if the document identifier docID contains a section-
309 * number, and false if it consists solely of the docPID.
310 * That is, true is returned if
311 * <pre>docID = "greenstone:colName-&lt;docPID&gt;-&lt;sectionNum&gt;"</pre>
312 * and false is returned if
313 * <pre>docID = "greenstone:colName-&lt;docPID&gt;"</pre>
314 * */
315 protected boolean containsSectionNumber(String docID) {
316 // if there are two hyphens in the docID, then there are sections
317 // (and the section number is appended at end of docID)
318 // docID = "greenstone:colName-<docPID>-<sectionNum>"
319 return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
320 }
321
322 /** This method will extract the docPID from docID and return it.
323 * (If a sectionNumber is suffixed to the docID, the docPID which is
324 * the prefix is returned; otherwise the docID is the docPID and is
325 * returned)
326 * @param docID is the String that contains the docPID and may also
327 * contain the section number.
328 * @return only the docPID portion of the docID.
329 */
330 protected String getDocPIDFromDocID(String docID) {
331 if(containsSectionNumber(docID))
332 return docID.substring(0, docID.lastIndexOf(HYPHEN));
333 // else (if there's no sectionNumber), docID is the docPID
334 return docID;
335 }
336
337 /** This method will return the section Number, if there's any
338 * suffixed to the docID. Otherwise it will return the empty string
339 * @param docID is the String that contains the docPID and may also
340 * contain the section number.
341 * @return only the sectionID portion of the docID - if any, else "".
342 */
343 protected String getSectionIDFromDocID(String docID) {
344 if(containsSectionNumber(docID))
345 return docID.substring(
346 docID.lastIndexOf(HYPHEN)+1, docID.length());
347 return "";
348 }
349
350 /** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
351 * response message that gives the metadata for each collection identified
352 * @param collIDs is an array of fedora pids identifying collections in the
353 * fedora repository
354 * @return a GS3 DocumentMetadataRetrieve response message containing the
355 * EX metadata for all the requested collections */
356 public String getCollectionMetadata(String[] collIDs) {
357 return getMetadata(collIDs, new String[] {"all"});
358 }
359
360 /** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
361 * response message is returned containing the metadata for each document.
362 * @param docIDs is an array of document identifiers (docID can either be
363 * &lt;pid&gt;s items (documents) in the fedora repository, or
364 * "&lt;pid&gt;-sectionNumber".
365 * @return a GS3 DocumentMetadataRetrieve response message containing the
366 * EX, DC, DLS metadata for all the requested documents
367 * @param metadata is the list of metadata elements to be retrieved for each doc */
368 public String getDocumentMetadata(String[] docIDs, String[] metadata) {
369 return getMetadata(docIDs, metadata);
370 }
371
372 /** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
373 * response message that gives the metadata for the collection identified
374 * @param collID is a fedora pid identifying a collection in its repository
375 * @return a GS3 DocumentMetadataRetrieve response message containing the
376 * EX metadata for the requested collection
377 * @param metadata is the list of metadata elements to be retrieved for each doc */
378 public String getCollectionMetadata(String collID) {
379 return getMetadata(new String[] {collID}, new String[] {"all"});
380 }
381
382 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
383 * response message containing the metadata for the document.
384 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
385 * of an item (document) in the fedora repository, or it can be
386 * "&lt;pid&gt;-sectionNumber".
387 * @return a GS3 DocumentMetadataRetrieve response message containing the
388 * EX, DC, DLS metadata for the requested document */
389 public String getDocumentMetadata(String docID, String[] metadata) {
390 return getMetadata(new String[] {docID}, metadata);
391 }
392
393 /** @return a greenstone DocumentMetadataRetrieve response for the
394 * documents or collections indicated by the docIDsOrCollIDs.
395 * @param docIDsOrCollIDs is an array of identifiers which may be either the
396 * fedora pids for collections, or otherwise may be a document identifier.
397 * In the last case, the document ID may consist of either
398 * "documentPID-sectionNumber" or may just be just fedora documentPID
399 * @param metadata is the list of metadata elements to be retrieved for each doc */
400 public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
401 {
402 Document doc = builder.newDocument();
403 FedoraGS3RunException ex = null;
404
405 Element docNodeList = doc.createElement(
406 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
407
408 try{
409 for(int i = 0; i < docIDsOrCollIDs.length; i++) {
410 // create the <documentNode> containing the metadata
411 // for each document docID
412 Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
413 docNodeList.appendChild(docNode);
414 }
415 } catch(Exception e) {
416 ex = new FedoraGS3RunException(e);
417 ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
418 }
419
420 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
421 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
422 try{
423 return FedoraCommons.elementToString(responseMsg);
424 } catch(TransformerException e) {
425 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
426 + " " + e;
427 }
428 }
429
430 /** Method that takes a new DOM document, as well as an identifier of either
431 * a collection or document (which may be a fedora pid for the collection
432 * or document, or may be the documentPid-sectionNumber for a document) and
433 * returns a documentNode element for it:
434 * &lt;documentNode&gt;&lt;metadataList&gt;
435 * &lt;metadata name=""&gt;value&lt;/metadata&gt;
436 * ...
437 * &lt;/metadataList&gt;&lt;/documentNode&gt;
438 * @return documentNode containing the metadata for the collection or
439 * document given by parameter ID
440 * @param id denotes a collection pid, a document pid or a docID of the
441 * form "documentpid-sectionNumber"
442 * @param metadata is the list of metadata elements to be retrieved for each doc */
443 protected Element getMetadata(Document doc, String id, String[] metadata)
444 throws RemoteException, UnsupportedEncodingException,
445 SAXException, IOException
446 {
447 // We're going to create the documentNode nested inside the following
448 // documentNodeList:
449 // <documentNodeList>
450 // <documentNode nodeID=""><metadataList>
451 // <metadata name="">value</metadata>
452 // </metadataList></documentNode>
453 // <documentNode>...</documentNode>
454 // </documentNodeList>
455 // <documentNodeList>
456
457 // <documentNode nodeID="docID"> - the docNode on which a metadata
458 // retrieve is being performed
459 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
460 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
461 attribute.setValue(id);
462 docNode.setAttributeNode(attribute);
463
464 // <metadataList>
465 Element metadataList = doc.createElement(
466 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
467
468 String ex = "";
469 String dc = "";
470 String dls = "";
471 if(id.endsWith(_COLLECTION)) { // docID refers to a collection
472 // Obtain the "EX" datastream (extracted metadata) for the collection
473 ex = this.getEX(id);
474 }
475 else { // docID refers to a document
476 // work out the document's fedora PID and section ID, and then
477 // obtain the EX (extracted metadata) and DC datastreams for the doc
478
479 // Note that EX/DC for pid="greenstone:<colname>-docPID-1"
480 // is the same as for pid="greenstone:<colname>-docPID"
481 // That is, <Section id="1"> refers to the toplevel document docPID
482 // If requested for top-level document, there may also be DLS meta
483 String sectionID = getSectionIDFromDocID(id);
484 String docPID = getDocPIDFromDocID(id);
485 if(sectionID.equals("") || sectionID.equals("1")) {
486 // metadata of toplevel document is requested
487 ex = this.getEX(docPID); // slightly faster than doing
488 //getSectionEXMetadata(docID, "1")
489 dc = this.getDC(docPID);
490 dls = this.getDLS(docPID);
491 }
492 else {
493 ex = getSectionEXMetadata(docPID, sectionID);
494 dc = getSectionDCMetadata(docPID, sectionID);
495 }
496 }
497
498 String metafields = "";
499 for(int i = 0; i < metadata.length; i++) {
500 metafields = metafields + metadata[i] + "|";
501 }
502
503 // Adding in metadata sets in alphabetical order
504 // DC metadata for a top-level document is different from EX, DLS:
505 // only the element's namespace prefix is "dc", the rest of a tagname
506 // is unknown.
507 if(!dc.equals("")) {
508 addMetadataWithNamespacedTagNames(doc, metadataList,
509 dc, DC, metafields);
510 }
511
512 // Check if we were supposed to process dls and dc metadata
513 // as well. We only ever do this for top-level documents,
514 // in which case, dls and dc will be non-empty strings
515 if(!dls.equals("")) {
516 addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
517 }
518
519 // we definitely have an EX metadatastream for each
520 // collection object, top-level document object,
521 // and document section item
522 addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
523
524 // now the metadataList has been built up
525 docNode.appendChild(metadataList);
526
527 return docNode; // return <documentNode> containing the metadata
528 }
529
530 /** This method retrieves all the metadata elements in the metaDataStream
531 * parameter of the form &lt;"metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; where
532 * metadataSetNS is the namespace of each tag, and creates a new element of
533 * the form &lt;metadata name="metadataSetNS:metadata"&gt;"value"&lt;/metadata&gt; for
534 * each. Each of these are then appended to the metadataList parameter.
535 * @param doc is the Document object using which the new metadata Elements
536 * are to be constructed
537 * @param metadataList is the &lt;metadataList&gt; Element to which the new
538 * metadata Elements are to be appended as children.
539 * @param metaDatastream the metadata datastream in string form (e.g. the
540 * Dublin Core metadata stored in the Fedora repository).
541 * @param metadataSet is the constant datastream identifier, e.g. "DC".
542 * At present this method applies to the DC metadata and any others like it
543 * where each tagname is different except for the constant dc: namespace.
544 * @param metafields is a | separated string containing the metadatafields to
545 * extract or "all" if all fields are requested
546 */
547 protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
548 String metaDatastream, String metadataSet, String metafields)
549 throws SAXException, IOException
550 {
551 Document src = builder.parse(
552 new InputSource(new StringReader(metaDatastream)));
553
554 // The following doesn't work for some reason: to retrieve all elements
555 // whose namespace prefix starts with "dc", we pass "*" for localName
556 //NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
557
558 // Longer way: get the children of the root document
559 NodeList children = src.getDocumentElement().getChildNodes();
560
561 for(int i = 0; i < children.getLength(); i++) {
562 String nodeName = children.item(i).getNodeName();
563 // check that the nodename starts with the metadataSet ("dc") namespace,
564 // which simultaneously ensures that the node's an element:
565 if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
566 // need to have a period for Greenstone instead of Fedora's colon
567 nodeName = nodeName.replace(COLON, PERIOD);
568 if(metadataSet.equals(DC)) { // dc:title -> dc.Title
569 nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
570 + nodeName.substring(4);
571 }
572
573 // get the requested metadata fields
574 if(metafields.indexOf("all") != -1 || metafields.indexOf(nodeName) != -1) {
575 Element metatag = (Element)children.item(i);
576 String value = FedoraCommons.getValue(metatag);
577 // <dc:tagname>value</dc:tagname>
578 // we're going to put this in our metadata element as
579 // <metadata name="dc.Tagname">value</metadata>
580
581 // create metadata of (name, value) pairs in target DOM (doc)
582 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
583 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
584
585 attribute.setValue(nodeName);
586 metadata.setAttributeNode(attribute);
587 Text content = doc.createTextNode(value);
588 metadata.appendChild(content);
589 metadataList.appendChild(metadata);
590 }
591 }
592 }
593 }
594
595 /** This method retrieves all the metadata elements in the metaDataStream
596 * of the form &lt;"namespace:"metadata name="metadataName"&gt;value&lt;/metadata&gt;
597 * where "namespace" is the namespace prefix of each tag, and metadataName
598 * is the name of the metadata (like author, title). For each element
599 * it creates a corresponding new element of the form
600 * &lt;metadata name="namespace:metadataName"&gt;value&lt;/metadata&gt;.
601 * Each of these are then appended to the metadataList parameter.
602 * @param doc is the Document object using which the new metadata Elements
603 * are to be constructed
604 * @param metadataList is the &lt;metadataList&gt; Element to which the new
605 * metadata Elements are to be appended as children.
606 * @param metaDatastream the metadata datastream in string form (e.g. the
607 * EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
608 * repository).
609 * @param metadataSet is the constant datastream identifier,
610 * e.g. "DLS" or "EX".
611 * At present this method applies to the DLS and EX metadata as they have
612 * constant tagnames throughout.
613 * @param metafields is a | separated string containing the metadatafields to
614 * extract or "all" if all fields are requested.
615 */
616 protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
617 String metaDatastream, String metadataSet, String metafields)
618 throws SAXException, IOException
619 {
620 // Namespace prefix can be "ex:" or "dls:"
621 String namespacePrefix = "";
622 if(!metadataSet.equals(EX)) {
623 // need to have a period for Greenstone instead of Fedora's colon
624 namespacePrefix = metadataSet.toLowerCase() + PERIOD;
625 }
626
627 Document src = builder.parse(
628 new InputSource(new StringReader(metaDatastream)));
629 NodeList metaTags = src.getElementsByTagName(
630 metadataSet.toLowerCase()+COLON+METADATA);
631 // Looking for tagnames: <ex:metadata> or <dls:metadata>
632
633 for(int i = 0; i < metaTags.getLength(); i++) {
634 Element metatag = (Element)metaTags.item(i);
635
636 // extract the metadata of (name, value) pairs from src DOM
637 // look for <metadata name="name">value</metadata>
638 String name = metatag.hasAttribute(NAME) ?
639 metatag.getAttribute(NAME) : "";
640 // sometimes, there are several metadata for the same name, in this
641 // case, look for a qualifier and append its value to the name to
642 // distinguish it uniquely:
643 if(metatag.hasAttribute(QUALIFIER)) {
644 name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
645 }
646 name = namespacePrefix + name; // prefix with namespace, if any
647 if(metafields.indexOf("all") != -1 || metafields.indexOf(name) != -1) {
648 String value = FedoraCommons.getValue(metatag);
649
650 // create metadata of (name, value) pairs in target DOM (doc)
651 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
652 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
653 attribute.setValue(name);
654 metadata.setAttributeNode(attribute);
655 Text content = doc.createTextNode(value);
656 metadata.appendChild(content);
657
658 metadataList.appendChild(metadata);
659 }
660 }
661 }
662
663 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
664 * response message containing ONLY the Title metadata for the document.
665 * @param docID is a document identifier (docID can either be a &lt;pid&gt;
666 * of an item (document) in the fedora repository, or it can be
667 * "&lt;pid&gt;-sectionNumber".
668 * @return a GS3 DocumentMetadataRetrieve response message containing the
669 * Title metadata for the requested document */
670 public String getTitleMetadata(String docID) {
671 return getTitleMetadata(new String[] { docID });
672 }
673
674 /** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
675 * response message containing ONLY the Title metadata for the documents.
676 * @param docIDs is a list of document identifiers (where docID can either be
677 * a &lt;pid&gt; of an item (document) in the fedora repository, or it can be
678 * "&lt;pid&gt;-sectionNumber".
679 * @return a GS3 DocumentMetadataRetrieve response message containing the
680 * Title metadata for all the requested documents */
681 public String getTitleMetadata(String[] docIDs) {
682 // Must create message of the following form:
683 // <documentNodeList><documentNode nodeID="docID">
684 // <metadataList><metadata name="Title">sometitle</metadata>
685 // </metadataList></documentNode>
686
687 Document doc = builder.newDocument();
688 FedoraGS3RunException ex = null;
689
690 Element docNodeList = doc.createElement(
691 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
692 try{
693 for(int i = 0; i < docIDs.length; i++) {
694 Element docNode = getTitleMetadata(doc, docIDs[i]);
695 docNodeList.appendChild(docNode);
696 }
697 }catch(Exception e) {
698 ex = new FedoraGS3RunException(e);
699 //ex.setSpecifics("EX metadata datastream PID: |" + docIDs[i] + "|"); // for debugging PID
700 ex.setSpecifics("EX metadata datastream");
701 }
702
703 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
704 GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
705 try{
706 return FedoraCommons.elementToString(responseMsg);
707 } catch(TransformerException e) {
708 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
709 + " " + e;
710 }
711 }
712
713 /** Method that takes a new DOM document, as well as an identifier of either
714 * a document or document section and returns a documentNode element containing
715 * the title metadata for it:
716 * &lt;documentNode nodeID="docID"&gt;&lt;metadataList&gt;
717 * &lt;metadata name="Title"&gt;sometitle&lt;/metadata&gt;
718 * &lt;/metadataList&gt;&lt;/documentNode&gt;
719 * @return documentNode containing the metadata for the collection or
720 * document given by parameter ID
721 * @param docID denotes the id of a document or a document section, so id
722 * is either a document-pid or it's of the form documentpid-sectionNumber */
723 protected Element getTitleMetadata(Document doc, String docID)
724 throws RemoteException, UnsupportedEncodingException,
725 SAXException, IOException
726 {
727 // Returns a docNode element of the following form:
728 // <documentNode nodeID="docID">
729 // <metadataList><metadata name="Title">sometitle</metadata></metadataList>
730 // </documentNode>
731
732 // <documentNode nodeID="docID">
733 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
734 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
735 attribute.setValue(docID);
736 docNode.setAttributeNode(attribute);
737
738 // <metadataList>
739 Element metaList = doc.createElement(
740 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
741 // <metadata name="Title">
742 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
743 // if we connect it all up (append children), we can immediately add
744 // the name attribute into the metadata element:
745 metaList.appendChild(metadata);
746 docNode.appendChild(metaList);
747 metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
748
749 String title = "";
750 String sectionID = getSectionIDFromDocID(docID);
751 String docPID = getDocPIDFromDocID(docID);
752
753 // check if title of toplevel document is requested
754 if(sectionID.equals(""))
755 title = this.getDocTitle(docPID);
756 else { // title of document section
757 title = this.getSectionTitle(docPID, sectionID);
758 }
759
760 metadata.appendChild(doc.createTextNode(title));
761
762 return docNode;
763 }
764
765 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
766 * containing the requested portion of the document structure of the documents
767 * indicated by docIDs:
768 * @param docID is the document identifier of the document whose hierarchical
769 * structure is requested. The name of the collection is already included in the
770 * docID for a Fedora DL.
771 * @param structure - strings specifying the required structure of the document.
772 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
773 * @param info - strings specifying the required structural info of the document.
774 * It can be any combination of: siblingPosition, numSiblings, numChildren.
775 */
776 public String getDocumentStructure(String docID, String[] structure, String[] info) {
777 return getStructure(new String[]{docID}, structure, info);
778 }
779
780
781 /** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
782 * containing the requested portion of the document structure of the documents
783 * indicated by docIDs:
784 * @param docIDs is an array of document identifiers of documents whose
785 * hierarchical structures are requested. The name of the collection is already
786 * included in the docID for a Fedora DL.
787 * @param structure - strings specifying the required structure of each document.
788 * It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
789 * @param info - strings specifying the required structural info of each document.
790 * It can be any combination of: siblingPosition, numSiblings, numChildren.
791 */
792 public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
793 return getStructure(docIDs, structure, info);
794 }
795
796 /**
797 * Returns a greenstone3 DocumentStructureRetrieve XML response message
798 * containing the document structures for the given docIDs.
799 * Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
800 * greenstone formatted XML is returned. The requested section of the table
801 * of contents (TOC) for a document is converted into the greenstone3 xml
802 * format that is returned upon DocumentStructureRetrieve requests.
803 * @param docIDs the documentIDs for which the section's structure is returned;
804 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
805 * @param structure - the structure of the sections to return. Can be any combination of:
806 * ancestors, parent, siblings, children, descendants, entire.
807 * @param infos - strings containing any combination of the values: numChildren, numSiblings,
808 * siblingPosition. The requested info gets added as attributes to the returned root element.
809 * @return a greenstone3 DocumentStructureRetrieve XML response message in
810 * String format with the structure of the docIDs requested.
811 */
812 protected String getStructure(String[] docIDs, String[] structure, String[] infos)
813 {
814 Document doc = builder.newDocument();
815 FedoraGS3RunException ex = null;
816 // <documentNodeList>
817 Element docNodeList = doc.createElement(
818 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
819
820 try{
821 // append the <documentNodes> for the docIDs
822 // to the docNodeList
823 //getStructureElement(docNodeList, docIDs, levels);
824 getStructureElement(docNodeList, docIDs, structure, infos);
825 } catch(Exception e) {
826 ex = new FedoraGS3RunException(e);
827 ex.setSpecifics("(requested portion of) TOC datastream");
828 }
829 // insert our <documentNodeList> into a GS3 response message
830 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
831 GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
832 try{
833 return FedoraCommons.elementToString(responseMsg);
834 } catch(TransformerException e) {
835 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
836 + " " + e;
837 }
838 }
839
840
841 /** Given a &lt;documentNodeList&gt; portion of a greenstone3
842 * DocumentStructureRetrieve XML response message, this method will populate
843 * it with the &lt;documentNodes&gt; that represent the structure of the given docIDs.
844 * @param docNodeList is a &lt;documentNodeList&gt; to which &lt;documentNodes&gt; of
845 * the doc structures are appended.
846 * @param docIDs the documentIDs for which the section's structure is returned;
847 * where a docID is either a fedora pid &lt;docPID&gt; or &lt;docPID&gt;-&lt;sectionNumber&gt;.
848 * @param structures - the structure of the sections to return. Can be any combination of:
849 * ancestors, parent, siblings, children, descendants, entire.
850 * @param infos - a string containing any combination of the values: numChildren, numSiblings,
851 * siblingPosition. The requested info gets added as attributes to the returned root element.
852 */
853 protected void getStructureElement(Element docNodeList, String[] docIDs,
854 String[] structures, String[] infos)
855 throws RemoteException, UnsupportedEncodingException, SAXException,
856 IOException
857 {
858 // Make one string out of requested structure components, and one string from info components
859 String structure = "";
860 String info = "";
861 for(int i = 0; i < structures.length; i++) {
862 structure = structure + structures[i] + "|";
863 }
864 for(int i = 0; i < infos.length; i++) {
865 info = info + infos[i] + "|";
866 }
867
868 // process each docID
869 for(int i = 0; i < docIDs.length; i++) {
870 // work out the document's fedora PID and section ID
871 String sectionID = getSectionIDFromDocID(docIDs[i]);
872 String docPID = getDocPIDFromDocID(docIDs[i]);
873 if(sectionID.equals("")) {
874 sectionID = "1";
875 }
876
877 // get the required section, along with children or descendants
878 Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
879 Document doc = docNodeList.getOwnerDocument();
880
881 // copy-and-convert that structure into a structure format for GS3
882 Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
883
884 if(!info.equals("")) {
885 // <nodeStructureInfo>
886 // <info name="" value="" />
887 // <info name="" value="" />
888 // ...
889 // </nodeStructureInfo>
890 Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+GSXML.INFO_ATT);
891 Element root = srcDocElement.getOwnerDocument().getDocumentElement();
892
893 if(root.hasAttribute("numSiblings")) {
894 String numSiblings = root.getAttribute("numSiblings");
895 Element infoEl = doc.createElement(GSXML.INFO_ATT);
896 infoEl.setAttribute(GSXML.NAME_ATT, "numSiblings");
897 infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
898 nodeStructureInfo.appendChild(infoEl);
899 }
900
901 if(root.hasAttribute("siblingPosition")) {
902 String siblingPosition = root.getAttribute("siblingPosition");
903 Element infoEl = doc.createElement(GSXML.INFO_ATT);
904 infoEl.setAttribute(GSXML.NAME_ATT, "siblingPosition");
905 infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
906 nodeStructureInfo.appendChild(infoEl);
907 }
908
909 if(root.hasAttribute("numChildren")) {
910 String numChildren = root.getAttribute("numChildren");
911 Element infoEl = doc.createElement(GSXML.INFO_ATT);
912 infoEl.setAttribute(GSXML.NAME_ATT, "numChildren");
913 infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
914 nodeStructureInfo.appendChild(infoEl);
915 }
916 docNode.appendChild(nodeStructureInfo);
917 }
918
919 // add it to our list of documentNodes
920 docNodeList.appendChild(docNode);
921 }
922 }
923
924
925 /**
926 * Takes the portion of the XML document outlining the structure of the
927 * document (section)--in the format this is stored in Fedora--and returns
928 * Greenstone 3 DOM XML format for outlining document structure.
929 * @return a &lt;documentNode&gt; element that contains a greenstone3
930 * DocumentStructureRetrieve XML corresponding to the parameter Element section
931 * (which is in fedora XML), for the document indicated by docID.
932 * @param requestingDocID is the identifier of the document for which the
933 * structure was requested. It's this document's children or descendants that
934 * will be returned. Note that this is not always the same as (clear from)
935 * parameter docID.
936 * @param docID is the documentID for which the section's structure is
937 * returned where docID = "docPID-sectionNumber".
938 * @param section - the fedora section XML that is being mirrored in
939 * greenstone3 format.
940 */
941 protected Element getStructure(Document doc, String requestingDocID,
942 String docID, Element section)
943 {
944 // we want to mirror the section's DOM (given in fedora XML) in
945 // greenstone3's XML for a DocumentStructureRetrieve response.
946
947 // <documentNode nodeID="docID"> - the docNode on which a structure retrieve
948 // is being performed
949 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
950 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
951 attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
952 docNode.setAttributeNode(attribute);
953
954 // <nodeStructure>
955 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
956
957 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
958 Element rootNode = createDocNodeFromSubsection(doc, section, docID);
959
960 // fills in the subtree of the rootNode in our nodeStructure element
961 createDocStructure(doc, section, rootNode, docID);
962 //where section represents the root section
963
964 nodeStructure.appendChild(rootNode);
965 docNode.appendChild(nodeStructure);
966 return docNode;
967 }
968
969
970 /** Recursive method that creates a documentStructure mirroring parameter
971 * section, starting from parameter parent down to all descendants
972 * @param section is the XML &lt;Section&gt; in the fedora repository's TOC
973 * for the docPID whose substructure is to be mirrored
974 * @param parent is the XML documentNode in the greenstone repository whose
975 * descendants created by this method will correspond to the descendants of
976 * parameter section.
977 * @param doc is the document containing the parent;
978 * @param docPID is the prefix of all nodeIDs in the parent's structure
979 */
980 protected void createDocStructure(
981 Document doc, Element section, Element parent, String docPID)
982 {
983 // get the section's children (if any)
984 NodeList children = section.getChildNodes();
985 for(int i = 0; i < children.getLength(); i++) {
986 Node n = children.item(i);
987
988 if(n.getNodeName().equals(SECTION_ELEMENT)) {
989 //then we know it's an element AND that its tagname is "Section"
990 Element subsection = (Element)n;
991 Element child = createDocNodeFromSubsection(doc, subsection, docPID);
992 parent.appendChild(child);
993
994 // recursion call on newly found child-element and subsection
995 createDocStructure(doc, subsection, child, docPID);
996 }
997 }
998 }
999
1000 /** Given a particular subsection element, this method creates a
1001 * Greenstone3 DocumentNode element that mirrors it.
1002 * @param doc is the document that will contain the created DocumentNode
1003 * @param docID is the prefix of all nodeIDs in the parent's structure
1004 * @param subSection is the XML &lt;Section&gt; in the fedora repository's
1005 * TOC for the docPID which will be mirrored in the greenstone XML
1006 * documentNode that will be returned.
1007 * @return a greenstone &lt;documentNode&gt; that represents the fedora TOC's
1008 * &lt;Section&gt; element passed as parameter subSection. */
1009 protected Element createDocNodeFromSubsection(
1010 Document doc, Element subSection, String docID)
1011 {
1012 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1013 Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1014 docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1015 docNode.setAttributeNode(docType);
1016
1017 Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1018 String sectionID = subSection.hasAttribute(ID) ?
1019 subSection.getAttribute(ID) : "";
1020 if(sectionID.equals("1")
1021 && subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1022 // reset the attribute without the section number (just "docID" may be important for democlient?)
1023 nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1024 } else {
1025 nodeID.setValue(docID + HYPHEN + sectionID);
1026 }
1027 //nodeID.setValue(docID + HYPHEN + sectionID);
1028 docNode.setAttributeNode(nodeID);
1029
1030 Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1031 if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1032 nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1033 }
1034 docNode.setAttributeNode(nodeType);
1035 return docNode;
1036 }
1037
1038
1039 /** Given an identifier that is either a docPID or a concatenation of
1040 * docPID+sectionID, this method works out the fedora assigned docPID and
1041 * sectionID and then calls getContentBody(docPID, sectionID) with those.
1042 * @param docID is expected to be of the form
1043 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;" or
1044 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;"
1045 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1046 * "greenstone:&lt;collectionName&gt;-1" ("greenstone:&lt;collectionName&gt;-Section1")
1047 * is returned! */
1048 public String getContent(String docID) {
1049 return this.getContent(new String[]{docID});
1050 }
1051
1052 /** Given an identifier that is a concatenation of docID+sectionID, this
1053 * method works out the fedora assigned docPID and sectionID and then calls
1054 * getContentBody(docPID, sectionID) with those.
1055 * @param docIDs is an array of document identifiers of the form
1056 * "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;-&lt;sectionNumber&gt;"
1057 * If it is "greenstone:&lt;collectionName&gt;-&lt;docPID&gt;", then the content for
1058 * "greenstone:&lt;collectionName&gt;-Section1" is returned! */
1059 public String getContent(String[] docIDs) {
1060 Document doc = builder.newDocument();
1061 FedoraGS3RunException ex = null;
1062
1063 //<documentNodeList>
1064 Element docNodeList = doc.createElement(
1065 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1066
1067 try{
1068 for(int i = 0; i < docIDs.length; i++) {
1069 // get the sectionID and docPID from the docID
1070 String sectionID = this.removePrefix(
1071 getSectionIDFromDocID(docIDs[i]), SECTION);
1072 String docPID = getDocPIDFromDocID(docIDs[i]);
1073 if(sectionID.equals("")) // if no section is specified, get
1074 sectionID = "1"; // get the content for Section id="1"
1075
1076 // Get the contents for the requested section of document docPID
1077 String sectionContent = this.getContentBody(docPID, sectionID);
1078
1079 // set the nodeID attribute
1080 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1081 Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1082
1083 nodeId.setValue(docIDs[i]); // just set the docID which will contain
1084 // the docPID (and sectionID if already present)
1085
1086 docNode.setAttributeNode(nodeId);
1087 // set the text content to what was retrieved
1088 Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1089 Text textNode = doc.createTextNode(sectionContent.trim());
1090
1091 nodeContent.appendChild(textNode);
1092 docNode.appendChild(nodeContent);
1093 //add the documentNode to the docNodeList
1094 docNodeList.appendChild(docNode);
1095 }
1096 } catch(Exception e) {
1097 ex = new FedoraGS3RunException(e);
1098 ex.setSpecifics("requested doc Section datastream");
1099 }
1100 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1101 GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1102 try{
1103 return FedoraCommons.elementToString(responseMsg);
1104 } catch(TransformerException e) {
1105 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1106 + " " + e;
1107 }
1108 }
1109
1110 /** Gets the contents of a textNode from a section.
1111 * @return the text content of a section.
1112 * @param docPID the pid of the document from which a section's text is to
1113 * be retrieved.
1114 * @param sectionID is the section identifier of the document denoted by
1115 * docPID whose text is to be returned.
1116 */
1117 protected String getContentBody(String docPID, String sectionID)
1118 throws RemoteException, UnsupportedEncodingException,
1119 SAXException, IOException
1120 {
1121 String section = this.getSection(docPID, sectionID);
1122
1123 // the content is nested inside a <Section> element,
1124 // we extract it from there:
1125 InputSource source = new InputSource(new StringReader(section));
1126 Document doc = builder.parse(source);
1127
1128 // The document Element is the <Section> we want.
1129 // Get its text contents:
1130 section = FedoraCommons.getValue(doc.getDocumentElement());
1131
1132 // we are going to remove all occurrences of "_httpdocimg_/"
1133 // that precede associated filenames, because that's a GS3
1134 // defined macro for resolving relative urls. It won't help
1135 // with documents stored in fedora.
1136 section = section.replaceAll(GS3FilePathMacro+"/", "");
1137 return section;
1138 }
1139
1140 /** Here we create the greenstone's response message element:
1141 * &lt;message&lg;&lt;response&gt;&lt;content&gt;&lt;/response&gt;&lt;/message&gt;
1142 * @return a greenstone response-message element.
1143 * @param doc - the Document object which should me used to create the
1144 * &lt;message&gt; and &lt;response&gt; elements
1145 * @param content - the element that is to be nested inside &lt;response&gt;
1146 * @param ex - any exception that occurred when trying to create
1147 * the content parameter
1148 * @param responseType - the value for the type attribute of &lt;response&gt;,
1149 * such as "describe", "retrieve", "browse", "query"...
1150 * @param originator - indiates the collectionName or service (like
1151 * DocumentContentRetrieve) from where this response message originates
1152 */
1153 protected Element createResponseMessage(Document doc, Element content,
1154 Exception ex, String responseType, String originator)
1155 {
1156 Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1157 // from = "FedoraGS3"
1158 Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1159 attribute.setValue(originator);
1160 response.setAttributeNode(attribute);
1161
1162 // type = "describe" or "process" - whatever's given in requestType:
1163 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1164 attribute.setValue(responseType);
1165 response.setAttributeNode(attribute);
1166
1167 if(content != null)
1168 response.appendChild(content);
1169
1170 // we'll create an error element for RemoteExceptions (web service problems)
1171 // and UnsupportedEncodingExceptions and
1172 if(ex != null) {
1173 Element error = doc.createElement(GSXML.ERROR_ELEM);
1174 error.appendChild(doc.createTextNode(ex.getMessage()));
1175 // now append the error to the <response> element (after
1176 // the content element whatever that was)
1177 response.appendChild(error);
1178 }
1179
1180 Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1181 message.appendChild(response);
1182 doc.appendChild(message);
1183 return message;
1184 }
1185
1186 /** @return a &lt;serviceList&gt; Element as defined by GS3: containing all the
1187 * services (denoted by &lt;service&gt; elements) that are supported by FedoraGS3.
1188 * At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1189 * DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1190 * ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1191 * @param doc - the Document object which should me used to create the
1192 * &lt;serviceList&gt; element */
1193 protected Element createServiceList(Document doc)
1194 {
1195 Element serviceList = doc.createElement(
1196 GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1197
1198 for(int i = 0; i < serviceNames.length; i++) {
1199 // create the <service name="serviceName[i]" type="servicetype" />
1200 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1201
1202 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1203 attribute.setValue(serviceNames[i]);
1204 service.setAttributeNode(attribute);
1205
1206 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1207 if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1208 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1209 else if(serviceNames[i].contains("Query")) // search services
1210 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1211 else
1212 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1213 service.setAttributeNode(attribute);
1214
1215 // add the service element to the serviceList element
1216 // <serviceList><service /></serviceList>
1217 serviceList.appendChild(service);
1218 }
1219 return serviceList;
1220 }
1221
1222 /** @return a GS3 response message for a describe services request:
1223 * indicating the list of services supported by the Fedora-Greenstone
1224 * interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1225 * DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1226 * ClassifierBrowseMetadataRetrieve - as indicated by member variable
1227 * serviceNames. */
1228 public String getServiceList()
1229 {
1230 Document doc = builder.newDocument();
1231 Element serviceList = createServiceList(doc);
1232 // make <serviceList> the body of the responseMessage:
1233 // <message><response><serviceList></response></message>
1234 Element responseMsg = createResponseMessage(doc, serviceList, null,
1235 GSXML.REQUEST_TYPE_DESCRIBE, "");
1236 try {
1237 return FedoraCommons.elementToString(responseMsg);
1238 }catch(TransformerException e) {
1239 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1240 + " " + e;
1241 }
1242 }
1243
1244 /** @return a GS3 describe response message listing the collections and
1245 * collection-specific metadata stored in the Fedora-Greenstone repository. */
1246 public String getCollectionList()
1247 {
1248 Document doc = builder.newDocument();
1249 FedoraGS3RunException ex = null; // any RemoteException
1250
1251 // create the <collectionList /> element
1252 Element collectionList = doc.createElement(
1253 GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1254 try{
1255 String[] collectionNames = this.getCollectionNames(
1256 this.getCollections()); // this line could throw RemoteException
1257 for(int i = 0; i < collectionNames.length; i++) {
1258 // create the <collection name="somename" /> element
1259 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1260 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1261 attribute.setValue(collectionNames[i]);
1262 collection.setAttributeNode(attribute);
1263
1264 // append the <collection> element as child of <collectionList>
1265 collectionList.appendChild(collection);
1266
1267 //if(collection.hasAttribute(GSXML.NAME_ATT))
1268 //LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1269 }
1270 } catch(RemoteException e) { // if this happens, perhaps it's because it
1271 // can't find Greenstone collections in fedora repository?
1272 ex = new FedoraGS3RunException(e);
1273 ex.setSpecifics(
1274 "greenstone collections in fedora repository");
1275 }
1276
1277 // make <collectionList> the body of the responseMessage:
1278 // <message><response><collectionList></response></message>
1279 Element responseMsg = createResponseMessage(doc, collectionList, ex,
1280 GSXML.REQUEST_TYPE_DESCRIBE, "");
1281 try{
1282 return FedoraCommons.elementToString(responseMsg);
1283 }catch(TransformerException e) {
1284 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1285 + " " + e;
1286 }
1287 }
1288
1289 /** @return a GS3 describe response message for a collection in the
1290 * Fedora-Greenstone repository.
1291 * @param collectionName - the name of the collection that is to be described.
1292 * It will be converted to a fedora collection pid, which is of the form
1293 * "greenstone:&lt;collectionName&gt;-collection". */
1294 public String describeCollection(String collectionName)
1295 {
1296 Document doc = builder.newDocument();
1297 FedoraGS3RunException ex = null;
1298
1299 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1300 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1301 attribute.setValue(collectionName);
1302 collection.setAttributeNode(attribute);
1303
1304 //<displayItem assigned="true" lang="en" name="name">
1305 //"some display name"</displayItem>
1306 Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1307
1308 attribute = doc.createAttribute(GSXML.LANG_ATT);
1309 attribute.setValue(this.lang);
1310 displayItem.setAttributeNode(attribute);
1311
1312 attribute = doc.createAttribute(GSXML.NAME_ATT);
1313 attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1314 displayItem.setAttributeNode(attribute);
1315
1316 try{
1317 Text textNode = doc.createTextNode(
1318 this.getCollectionTitle(getCollectionPID(collectionName)));
1319 displayItem.appendChild(textNode);
1320 } catch(Exception e) {
1321 // can't find Greenstone collections in fedora repository or problem
1322 // getting their titles from their metadata datastream?
1323 ex = new FedoraGS3RunException(e);
1324 ex.setSpecifics("greenstone collections or their metadata"
1325 + "in the fedora repository");
1326 }
1327 // now append the displayItem element as child of the collection element
1328 collection.appendChild(displayItem);
1329 // get the <serviceList> and add it into the collection description.
1330 // Services for all collections in the FedoraGS3 repository are the
1331 // same, offering a ClassifierBrowse to browse titles by starting letter
1332 // and DocRetrieve services: Content, Metadata and Structure.
1333
1334 Element serviceList = createServiceList(doc);
1335 collection.appendChild(serviceList);
1336
1337 Element responseMsg = createResponseMessage(doc, collection, ex,
1338 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1339 try{
1340 return FedoraCommons.elementToString(responseMsg);
1341 }catch(TransformerException e) {
1342 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1343 + " " + e;
1344 }
1345 }
1346
1347 /** @return a GS3 describe response message for the services of a collection
1348 * in the Fedora-Greenstone repository. So far, these services are the same for
1349 * all fedora collections: they are the services given in member variable
1350 * serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1351 * ClassifierBrowseMetadataRetrieve.
1352 * All collections in this Digital Library (Fedora Repository) share the
1353 * same services, so this method returns the same services as getServiceList();
1354 * @param collectionName - the name of the collection whose services are to
1355 * be described. It will be converted to a fedora collection pid, which is of
1356 * the form "greenstone:&lt;collectionName&gt;-collection". */
1357 public String describeCollectionServices(String collectionName)
1358 {
1359 Document doc = builder.newDocument();
1360
1361 Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1362 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1363 attribute.setValue(collectionName);
1364 collection.setAttributeNode(attribute);
1365
1366 Element serviceList = createServiceList(doc);
1367 collection.appendChild(serviceList);
1368
1369 Element responseMsg = createResponseMessage(doc, collection, null,
1370 GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1371 try{
1372 return FedoraCommons.elementToString(responseMsg);
1373 }catch(TransformerException e) {
1374 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1375 + " " + e;
1376 }
1377 }
1378
1379 /** All collections in this Digital Library (Fedora Repository) share
1380 * the same services, so this method returns the same as
1381 * describeCollectionService(collName, serviceName).
1382 * @return a GS3 describe response message for the requested service
1383 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1384 * return nothing special except their names; browse (and any query)
1385 * return more complex XML responses.
1386 * @param serviceName - the name of the service in the collection which is to
1387 * be described.*/
1388 public String describeService(String serviceName)
1389 {
1390 // For all the *retrieve* services (incl ClassifierBrowseMetadataRetrieve)
1391 // we return:
1392 // <message><response from="<name>Retrieve" type="describe">
1393 // <service name="<name>Retrieve" type="retrieve" /></response></message>
1394 // But for browse (and any query) service, we return the data necessary
1395 // for displaying it
1396
1397 Document doc = this.builder.newDocument();
1398 Element service = doc.createElement(GSXML.SERVICE_ELEM);
1399 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1400 attribute.setValue(serviceName);
1401 service.setAttributeNode(attribute);
1402
1403 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1404
1405 if(serviceName.toLowerCase().endsWith("retrieve")) {
1406 attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1407 }
1408 else if(serviceName.toLowerCase().contains("browse")) {
1409 attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1410
1411 // we need name and description <displayItem> elements
1412 Element displayItem
1413 = createNameValuePairElement(doc,
1414 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1415 service.appendChild(displayItem);
1416
1417 displayItem = createNameValuePairElement(doc,
1418 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1419 "Browse pre-defined classification hierarchies");
1420 service.appendChild(displayItem);
1421
1422 // now need a classifierList
1423 Element classifierList = doc.createElement(
1424 GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1425
1426 int classifierNum = 1;
1427 // append a <classifier content="some letter" name="CL#">
1428 // for each letter of the alphabet:
1429 Element classifier = createClassifierElement(doc, "TitleByLetter",
1430 classifierNum++, "titles by letter", "Browse titles by letter");
1431 // now add this <classifier> to the <classifierList>
1432 classifierList.appendChild(classifier);
1433
1434 // ANY MORE CLASSIFIERS? ADD THEM HERE
1435
1436 service.appendChild(classifierList);
1437 } // ELSE check for whether it is a query service
1438 else if(serviceName.toLowerCase().contains("query")) {
1439 attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1440 if(serviceName.equals("TextQuery")) {
1441 describeTextQueryService(service);
1442 } else if(serviceName.equals("FieldQuery")) {
1443 describeFieldQueryService(service);
1444 }
1445 }
1446
1447 // don't forget to add the type attribute to the service!
1448 service.setAttributeNode(attribute);
1449
1450 String from = serviceName;
1451
1452 Element responseMsg = createResponseMessage(doc, service, null,
1453 GSXML.REQUEST_TYPE_DESCRIBE, from);
1454 try{
1455 return FedoraCommons.elementToString(responseMsg);
1456 }catch(TransformerException e) {
1457 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1458 + " " + e;
1459 }
1460 }
1461
1462 /** Appends children to the parameter service Element that make the
1463 * final service Element into a describe response XML for FedoraGS3's
1464 * TextQuery service.
1465 * @param service is the service Element that is being filled out. */
1466 protected void describeTextQueryService(Element service) {
1467 Document doc = service.getOwnerDocument();
1468 // we need name, submit (button) and description <displayItem> elements
1469 Element displayItem = createNameValuePairElement(doc,
1470 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1471 "Text Search");
1472 service.appendChild(displayItem);
1473
1474 displayItem = createNameValuePairElement(doc,
1475 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1476 service.appendChild(displayItem);
1477
1478 displayItem = createNameValuePairElement(doc,
1479 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1480 "Title and full-text search service");
1481 service.appendChild(displayItem);
1482
1483 //create the <paramList>
1484 Element paramList = doc.createElement(
1485 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1486
1487 // we ignore granularity to search at: it will always be
1488 // document and section level
1489 // we ignore casefolding: always on (that is, case is irrelevant)
1490 // we ignore document display order: always ranked
1491
1492 // Constructing the following:
1493 // <param default="100" name="maxDocs" type="integer">
1494 // <displayItem name="name">Maximum hits to return</displayItem>
1495 // </param>
1496 Element param = doc.createElement(GSXML.PARAM_ELEM);
1497
1498 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1499 attribute.setValue(MAXDOCS);
1500 param.setAttributeNode(attribute);
1501
1502 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1503 attribute.setValue("100");
1504 param.setAttributeNode(attribute);
1505
1506 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1507 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1508 param.setAttributeNode(attribute);
1509
1510 displayItem = createNameValuePairElement(doc,
1511 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1512 "Maximum hits to return");
1513 param.appendChild(displayItem);
1514
1515 paramList.appendChild(param);
1516
1517 // Constructing the following:
1518 // <param name="query" type="string">
1519 // <displayItem name="name">Query string</displayItem>
1520 // </param>
1521 param = doc.createElement(GSXML.PARAM_ELEM);
1522
1523 attribute = doc.createAttribute(GSXML.NAME_ATT);
1524 attribute.setValue(QUERY);
1525 param.setAttributeNode(attribute);
1526
1527 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1528 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1529 param.setAttributeNode(attribute);
1530
1531 displayItem = createNameValuePairElement(doc,
1532 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1533 "Query string");
1534 param.appendChild(displayItem);
1535
1536 paramList.appendChild(param);
1537
1538 service.appendChild(paramList);
1539 }
1540
1541 /** Appends children to the parameter service Element that make the
1542 * final service Element into a describe response XML for FedoraGS3's
1543 * FieldQuery service.
1544 * @param service is the service Element that is being filled out. */
1545 protected void describeFieldQueryService(Element service) {
1546 Document doc = service.getOwnerDocument();
1547 // we need name, submit (button) and description <displayItem> elements
1548 Element displayItem = createNameValuePairElement(doc,
1549 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1550 "Form Search");
1551 service.appendChild(displayItem);
1552
1553 displayItem = createNameValuePairElement(doc,
1554 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1555 service.appendChild(displayItem);
1556
1557 displayItem = createNameValuePairElement(doc,
1558 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1559 "Simple fielded search");
1560 service.appendChild(displayItem);
1561
1562 //create the <paramList>
1563 Element paramList = doc.createElement(
1564 GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1565
1566 // we ignore granularity to search at: it will always be
1567 // document and section level
1568 // we ignore casefolding: always on (that is, case is irrelevant)
1569 // we ignore document display order: always ranked
1570
1571 // Constructing the following:
1572 // <param default="100" name="maxDocs" type="integer">
1573 // <displayItem name="name">Maximum hits to return</displayItem>
1574 // </param>
1575 Element param = doc.createElement(GSXML.PARAM_ELEM);
1576
1577 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1578 attribute.setValue(MAXDOCS);
1579 param.setAttributeNode(attribute);
1580
1581 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1582 attribute.setValue("100");
1583 param.setAttributeNode(attribute);
1584
1585 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1586 attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1587 param.setAttributeNode(attribute);
1588
1589 displayItem = createNameValuePairElement(doc,
1590 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1591 "Maximum hits to return");
1592 param.appendChild(displayItem);
1593
1594 paramList.appendChild(param);
1595
1596 // Constructing the following:
1597 // <param name="simpleField" occurs="4" type="multi">
1598 // <displayItem name="name"></displayItem>
1599 //
1600 // <param name="query" type="string">
1601 // <displayItem name="name">Word or phrase </displayItem>
1602 // </param>
1603 //
1604 // <param default="allFields" name="fieldname" type="enum_single">
1605 // <displayItem name="name">in field</displayItem>
1606 //
1607 // <option name="docTitles">
1608 // <displayItem name="name">document titles</displayItem>
1609 // </option>
1610 // <option name="allTitles">
1611 // <displayItem name="name">document and section titles</displayItem>
1612 // </option>
1613 // <option name="fullText">
1614 // <displayItem name="name">full text</displayItem>
1615 // </option>
1616 // <option name="all">
1617 // <displayItem name="name">titles and full text</displayItem>
1618 // </option>
1619 // <option name="">
1620 // <displayItem name="name"></displayItem>
1621 // </option>
1622 // </param>
1623 // </param>
1624 Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1625 attribute = doc.createAttribute(GSXML.NAME_ATT);
1626 attribute.setValue(SIMPLEFIELD_ATT);
1627 rowOfParams.setAttributeNode(attribute);
1628
1629 // we want the row of controls to occur multiple times
1630 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1631 attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1632 rowOfParams.setAttributeNode(attribute);
1633
1634 attribute = doc.createAttribute(OCCURS_ATT);
1635 attribute.setValue("4"); // we want this row to occur 4 times
1636 rowOfParams.setAttributeNode(attribute);
1637
1638 // <param name="query" type="string">
1639 // <displayItem name="name">Word or phrase </displayItem>
1640 // </param>
1641 param = doc.createElement(GSXML.PARAM_ELEM);
1642
1643 attribute = doc.createAttribute(GSXML.NAME_ATT);
1644 attribute.setValue(QUERY);
1645 param.setAttributeNode(attribute);
1646
1647 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1648 attribute.setValue(GSXML.PARAM_TYPE_STRING);
1649 param.setAttributeNode(attribute);
1650
1651 displayItem = createNameValuePairElement(doc,
1652 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1653 "Word or phrase");
1654 param.appendChild(displayItem);
1655 rowOfParams.appendChild(param);
1656
1657 // <param default="allFields" name="fieldName" type="enum_single">
1658 // <displayItem name="name">in field</displayItem>
1659 param = doc.createElement(GSXML.PARAM_ELEM);
1660 attribute = doc.createAttribute(GSXML.NAME_ATT);
1661 attribute.setValue(FIELDNAME_ATT);
1662 param.setAttributeNode(attribute);
1663
1664 attribute = doc.createAttribute(GSXML.TYPE_ATT);
1665 attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1666 param.setAttributeNode(attribute);
1667
1668 attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1669 attribute.setValue(ALL_FIELDS);
1670 param.setAttributeNode(attribute);
1671
1672 displayItem = createNameValuePairElement(doc,
1673 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1674 "in field");
1675 param.appendChild(displayItem);
1676
1677 String[] searchFieldNames
1678 = {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1679 String[] searchFieldDisplay = {"all titles and full-text",
1680 "document titles only", "document and section titles",
1681 "full-text only"};
1682
1683 // for each fieldName create an option element and insert
1684 // the option into the enum_multi drop-down param:
1685 // <option name="fieldName">
1686 // <displayItem name="name">fieldName</displayItem>
1687 // </option>
1688 for(int i = 0; i < searchFieldNames.length; i++) {
1689 Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1690 attribute = doc.createAttribute(GSXML.NAME_ATT);
1691 attribute.setValue(searchFieldNames[i]);
1692 option.setAttributeNode(attribute);
1693
1694 displayItem = createNameValuePairElement(doc,
1695 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1696 searchFieldDisplay[i]);
1697 option.appendChild(displayItem);
1698 param.appendChild(option); // add option to the drop-down box
1699 }
1700
1701 rowOfParams.appendChild(param);
1702 paramList.appendChild(rowOfParams);
1703 service.appendChild(paramList);
1704 }
1705
1706 /**
1707 * @return a GS3 describe response message for the requested service
1708 * of the given collection. DocumentContent/Metadata/StructureRetrieve
1709 * return nothing special except their names; browse (and any query)
1710 * return more complex XML responses.
1711 * All collections in this Digital Library (Fedora Repository) share
1712 * the same services, so this method returns the same as
1713 * describeService(serviceName).
1714 * @param collectionName - the name of the collection whose service is to
1715 * be described. It will be converted to a fedora collection pid, which is of
1716 * the form "greenstone:&lt;collectionName&gt;-collection".
1717 * @param serviceName - the name of the service in the collection which is to
1718 * be described. */
1719 public String describeCollectionService(String collectionName,
1720 String serviceName) {
1721 // collectionName can be ignored, because all services are FedoraGS3
1722 // services and are not unique to any particular (greenstone) collection.
1723 return describeService(serviceName);
1724 }
1725
1726 /** This method performs the implemented browse operation: allowing the
1727 * user to browse the titles of documents in the given collection by letter
1728 * and returning the results.
1729 * @param collectionName is the name of the collection whose documents
1730 * starting with the given letter will be returned.
1731 * @param classifierIDs are the ids of the classifiers on which to browse. In
1732 * this case, the classifier indicates whether we browse titles by letter, or
1733 * browse (documents) by collection; and it is of the form &lt;CL(letter)&gt;.
1734 * @param structures - the requested browse substructure. Can be any combination
1735 * of ancestors, parent, siblings, children, descendants.
1736 * @param infos - the requested structural info. Can be numSiblings,
1737 * siblingPosition, numChildren.
1738 * @return a GS3 ClassifierBrowse response message which lists all
1739 * the documents that start with the letter indicated by parameter classifier.
1740 */
1741 public String browse(String collectionName, String[] classifierIDs,
1742 String[] structures, String[] infos)
1743 {
1744 // Construct one string from the structures and structural info arrays
1745 String structure = "";
1746 String info = "";
1747 for(int i = 0; i < structures.length; i++) {
1748 structure = structure + structures[i] + "|";
1749 }
1750 for(int i = 0; i < infos.length; i++) {
1751 info = info + infos[i] + "|";
1752 }
1753
1754 Document doc = builder.newDocument();
1755 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1756
1757 // <classifierNodeList>
1758 Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1759
1760 for(int i = 0; i < classifierIDs.length; i++) {
1761 if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1762 browseTitlesByLetterClassifier(doc, classifierNodeList,
1763 collectionName, classifierIDs[i],
1764 structure, info);
1765 }
1766 }
1767
1768 Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1769 GSXML.REQUEST_TYPE_DESCRIBE, /*collectionName+/ */"ClassifierBrowse");
1770 try {
1771 return FedoraCommons.elementToString(responseMsg);
1772 } catch(TransformerException e) {
1773 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1774 + " " + e;
1775 }
1776 }
1777
1778 /** CL1 browsing classifier: browsing titles by starting letter.
1779 * The browsing structure is retrieved.
1780 * @param doc - the document object that will contain the CL1 browsing structure.
1781 * @param classifierNodeList - the classifiers will be added to this nodeList.
1782 * @param collectionName - name of the collection through which we are browsing CL1.
1783 * @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1784 * a letter.
1785 * @param structure - the requested browse substructure. Can be any combination of
1786 * ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1787 * @param info - the requested structural info. Can be numSiblings, siblingPosition,
1788 * numChildren.
1789 * @return the classifierNodeList with the CL1 classifier browse structure.
1790 */
1791 public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1792 String collectionName, String classifierID,
1793 String structure, String info)
1794 {
1795 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1796
1797 if(structure.indexOf("entire") != -1) {
1798 structure = structure + "ancestors|descendants";
1799 }
1800
1801 // Structure of ancestors and children only at this stage
1802 int firstLevel = classifierID.indexOf('.');
1803 int secondLevel = classifierID.lastIndexOf('.');
1804
1805 // <nodeStructure>
1806 Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1807
1808 // requested classifier node
1809 Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1810 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1811 attribute.setValue(classifierID);
1812 classNode.setAttributeNode(attribute);
1813
1814 if(firstLevel == -1) { // CL1 - toplevel node
1815 Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1816
1817 classifierNodeList.appendChild(classNode);
1818 classNode.appendChild(nodeStructure);
1819
1820 nodeStructure.appendChild(root);
1821 if(structure.indexOf("descendants") != -1) {
1822 getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1823 } else if(structure.indexOf("children") != -1) {
1824 getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1825 }
1826 // nothing to be done for siblings
1827 }
1828 else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1829
1830 if(structure.indexOf("parent") != -1
1831 || structure.indexOf("ancestors") != -1
1832 || structure.indexOf("siblings") != -1) {
1833 String toplevelID = classifierID.substring(0, firstLevel);
1834 Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1835 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1836 attribute.setValue(toplevelID);
1837 toplevelNode.setAttributeNode(attribute);
1838 Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1839
1840 classifierNodeList.appendChild(toplevelNode);
1841 toplevelNode.appendChild(nodeStructure);
1842 nodeStructure.appendChild(node);
1843
1844 if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1845 getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1846 // pass the requested node (classNode) so that it is attached in the correct
1847 // location among its siblings, and to ensure that it is not recreated.
1848 // getTitlesByLetterStructure() will append classNode to node
1849 } else {
1850 node.appendChild(classNode);
1851 }
1852 } else {
1853 Element node = (Element)classNode.cloneNode(true);
1854 classifierNodeList.appendChild(node);
1855 node.appendChild(nodeStructure);
1856 nodeStructure.appendChild(classNode);
1857 }
1858
1859 int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1860 char ch = (char)(num - 1 + 'A');
1861 if(structure.indexOf("descendants") != -1) {
1862 getTitlesForLetter(ch, collectionName, classNode, "descendants");
1863 } else if(structure.indexOf("children") != -1) {
1864 getTitlesForLetter(ch, collectionName, classNode, "children");
1865 }
1866 }
1867 else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1868 LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1869 }
1870
1871 return classifierNodeList;
1872 }
1873
1874 /** Creates a (CL1) subclassifier element for the docs whose titles start with
1875 * the given letter.
1876 * @param ch - the starting letter of the document titles to retrieve.
1877 * @param collectionName - name of the collection through which we are browsing CL1.
1878 * @param classifierNode - the docNodes found will be appended to this node.
1879 * @param depthStructure - can be descendants or children. Specifies what to retrieve:
1880 * gets descendants of any documents found, otherwise gets just the children.
1881 * @return the given classifierNode which will have the child (or descendant) documents
1882 * appended to it.
1883 */
1884 public Element getTitlesForLetter(char ch, String collectionName,
1885 Element classifierNode, String depthStructure)
1886 {
1887 Document doc = classifierNode.getOwnerDocument();
1888 FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1889
1890
1891 // Retrieve the document structure for each subClassifierID:
1892 // all the documents that begin with its letter.
1893 String letter = String.valueOf(ch);
1894 try {
1895 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1896 if(docPIDs.length == 0) {
1897 return classifierNode; // skip letters that don't have any kids
1898 }
1899
1900 for(int i = 0; i < docPIDs.length; i++) {
1901 // work out the document's fedora PID and section ID
1902 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1903 String docPID = getDocPIDFromDocID(docPIDs[i]);
1904
1905 // get the required section, along with children or descendants
1906 Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1907
1908 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1909 Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1910
1911 // fills in the subtree of the rootNode in our nodeStructure element
1912 createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1913 classifierNode.appendChild(docRootNode);
1914 }
1915 } catch(Exception e) {
1916 ex = new FedoraGS3RunException(e);
1917 ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1918 }
1919
1920 return classifierNode;
1921 }
1922
1923
1924 /** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1925 * starting letter of the alphabet. X is each letter of the alphabet for which there
1926 * are matching document titles.
1927 * @param collectionName - name of the collection through which we are browsing CL1.
1928 * @param classifierNode - the docNodes found will be appended to this node.
1929 * @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1930 * the IDs for the subclassifiers (CL.x).
1931 * @param getDescendants - if true, get descendants of any documents found, otherwise
1932 * get just the children.
1933 * @param wantedSibling - the node (already created) whose siblings are requested. We
1934 * need to make sure not to recreate this node when creating its sibling nodes.
1935 * @return the given classifierNode, with the CL.x subclassifiers for the letters of
1936 * the alphabet that are represented in the document titles.
1937 */
1938 public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1939 String classifierID, boolean getDescendants,
1940 Element wantedSibling)
1941 {
1942 String ID = "";
1943 if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1944 ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1945 }
1946
1947 Document doc = classifierNode.getOwnerDocument();
1948 FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1949
1950 // We're going to loop to the end of the alphabet
1951 int count = 1;
1952 for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1953 // Retrieve the document structure for each subClassifierID:
1954 // all the documents that begin with its letter.
1955 String letter = String.valueOf(ch);
1956 try {
1957 String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1958 if(docPIDs.length == 0) {
1959 continue; // skip letters that don't have any kids
1960 }
1961 Element subClassifier = null;
1962 if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1963 // already have the requested node, don't recreate it
1964 subClassifier = wantedSibling;
1965 } else {
1966 // <classifierNode nodeID="CL1.x">
1967 subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
1968 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1969 attribute.setValue(classifierID+"."+count);
1970 subClassifier.setAttributeNode(attribute);
1971 }
1972 classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
1973
1974 if(getDescendants) { // get the documents
1975
1976 // append the <docNodes> for the docPIDs found as children
1977 // of subclassifier
1978
1979 for(int i = 0; i < docPIDs.length; i++) {
1980 // work out the document's fedora PID and section ID
1981 String sectionID = getSectionIDFromDocID(docPIDs[i]);
1982 String docPID = getDocPIDFromDocID(docPIDs[i]);
1983
1984 // get the required section, along with children or descendants
1985 Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
1986
1987 // <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1988 Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
1989
1990 // fills in the subtree of the rootNode in our nodeStructure element
1991 createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
1992 subClassifier.appendChild(rootNode);
1993 }
1994 }
1995 } catch(Exception e) {
1996 ex = new FedoraGS3RunException(e);
1997 ex.setSpecifics("requested portion of TOC file or "
1998 + "trouble with fielded search ");
1999 }
2000 }
2001 return classifierNode;
2002 }
2003
2004
2005 /** This method performs something equivalent to a greenstone3
2006 * ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2007 * @param classNodeIDs array of classifierNode IDs for which the metadata
2008 * needs to be returned.
2009 * @param metafields are the classifier metadata fields that are to be returned.
2010 * At present this method ignores them/pretends the requested metafields are
2011 * "all" and always returns the Title meta for the requested classifier nodes
2012 * (because that is all the metadata this Fedora classifier has at present).
2013 * @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2014 * lists the metadata for all the classifierNodes passed as parameter.*/
2015 public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2016 {
2017 Document doc = this.builder.newDocument();
2018 // <classifierNodeList>
2019 Element classifierNodeList = doc.createElement(
2020 GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2021
2022 // create <classifierNode><metadataList><metadata>s
2023 // </metadataList></classifierNode> for all letters of the alphabet
2024 for(int i = 0; i < classNodeIDs.length; i++) {
2025 // strip ID of everything before the first '.' (i.e. remove "CL#.")
2026 int index = classNodeIDs[i].indexOf('.');
2027 String subClassifierNumber = classNodeIDs[i].substring(index+1);
2028 index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2029 if(index != -1) {
2030 subClassifierNumber = subClassifierNumber.substring(0, index);
2031 }
2032 int subClassifierNum = Integer.parseInt(subClassifierNumber);
2033 String classifierName = "";
2034 if(subClassifierNum == 0) { // no document titles started with a letter
2035 classifierName = "A-Z";
2036 } else {
2037 char letter = (char)('A' + subClassifierNum - 1); // A = 1
2038 classifierName = String.valueOf(letter);
2039 }
2040
2041 // <classifierNode nodeID="CL#.subNum">
2042 Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2043 Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2044 attribute.setValue(classNodeIDs[i]);
2045 classifierNode.setAttributeNode(attribute);
2046
2047 // <metadataList>
2048 Element metadataList = doc.createElement(
2049 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2050
2051 // at least one metadata element: that of the title of this
2052 // classifierNode:
2053 // <metadata name="Title">letter</metadata>
2054 Element metadata = this.createNameValuePairElement(doc,
2055 GSXML.METADATA_ELEM, "Title", classifierName);
2056
2057 // now connect up everything
2058 metadataList.appendChild(metadata);
2059 classifierNode.appendChild(metadataList);
2060 classifierNodeList.appendChild(classifierNode);
2061 }
2062
2063 Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2064 GSXML.REQUEST_TYPE_PROCESS, //collName +
2065 "ClassifierBrowseMetadataRetrieve");
2066 try{
2067 return FedoraCommons.elementToString(responseMsg);
2068 }catch(TransformerException e) {
2069 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2070 + " " + e;
2071 }
2072 }
2073
2074 /** @return a newly created element of the following format:
2075 * &lt;classifier content="somecontent" name="CL+num"&gt;
2076 * &lt;displayItem name="name"&gt;someClassifierName&lt;/displayItem&gt;
2077 * &lt;displayItem name="description"&gt;Browse by classifier name&lt;/displayItem&gt;
2078 * &lt;/classifier&gt;
2079 * @param doc - the document used to create the element
2080 * @param content - value of the content attribute
2081 * @param classifierNum - the number suffixed to the CL, together forming
2082 * the classifier Node's ID
2083 * @param displayNameVal is the bodytext of a named displayItem element
2084 * @param displayDescrVal is the bodytext of a displayItem element with
2085 * description */
2086 protected Element createClassifierElement(Document doc, String content,
2087 int classifierNum, String displayNameVal, String displayDescrVal)
2088 {
2089 final String CL = "CL";
2090 Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2091 // content attribute
2092 Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2093 att.setValue(content);
2094 classifier.setAttributeNode(att);
2095 // name attribute
2096 att = doc.createAttribute(GSXML.NAME_ATT);
2097 att.setValue(CL + classifierNum);
2098 classifier.setAttributeNode(att);
2099
2100 // now create the displayItem children for classifier:
2101 // <displayItem name="name">#letter</displayItem>
2102 // <displayItem name="description">Browse titles starting with #letter</displayItem>
2103 Element displayItem = createNameValuePairElement(doc,
2104 GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2105 classifier.appendChild(displayItem);
2106 displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2107 GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2108 classifier.appendChild(displayItem);
2109
2110 return classifier;
2111 }
2112
2113
2114 /** @return a newly created element of the following format:
2115 * &lt;elementName name="somename"&gt;"some display value"&lt;/elementName&gt;
2116 * @param doc - the document used to create the element
2117 * @param elementName - the tag name
2118 * @param name - value of attribute name
2119 * @param value - the body text of the element */
2120 protected Element createNameValuePairElement(Document doc, String elementName,
2121 String name, String value) {
2122 // <elementName name="somename">"some display value"</elementName>
2123 Element element = doc.createElement(elementName);
2124 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2125 attribute.setValue(name);
2126 element.setAttributeNode(attribute);
2127
2128 element.appendChild(doc.createTextNode(value));
2129 return element;
2130 }
2131
2132 /**
2133 * @param collection is the collection to search in
2134 * @param query is the query term to search for. It won't specify the
2135 * indexed field to search in, which will mean that GSearch will
2136 * search all default indexed fields.
2137 * @param maxDocs is the maximum number of results to return (which
2138 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2139 */
2140 public String[] textQuery(String collection, String query,
2141 int maxDocs)
2142 throws Exception
2143 {
2144 // no need to search there is no query or query is empty spaces
2145 if(query.trim().equals(""))
2146 return new String[]{};
2147
2148 // QUERY value won't specify indexed field to search, Fedora
2149 // Gsearch will take that as meaning all default indexed fields.
2150 // Params to search() method below: string of fielded query terms;
2151 // hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2152 query = query + " " + "PID" + COLON + GREENSTONE;
2153
2154 String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2155 // now we have the XML returned by FedoraGSearch, get the pids
2156 // of the documents returned (if any)
2157 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2158 collection, searchResult);
2159 return pids;
2160 }
2161
2162 /**
2163 * This method performs a fieldquery, searching for x number of phrases
2164 * in each of the 4 indexed fields.
2165 * @param collection is the collection to search in
2166 * @param nameValParamsMap is a Map of several(key, value) entries,
2167 * 4 of which we're concerned with here:
2168 * - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2169 * - the values are a comma separated list of terms (phrases or single
2170 * words) to search that field in. There may be more than 1 or
2171 * there may be none (in which case there may be N empty values or
2172 * spaces separated by commas).
2173 * @param maxDocs is the maximum number of results to return (which
2174 * at present we consider equivalent to FedoraGSearch's hitpageSize).
2175 * */
2176 public String[] fieldQuery(String collection, Map nameValParamsMap,
2177 int maxDocs)
2178 throws Exception
2179 {
2180 // we're going to maintain a list of UNIQUE pids that were returned
2181 // in search results. Hence we use Set:
2182 java.util.Set set = new java.util.HashSet();
2183
2184 // (1) Use Fedora's search to search document titles, if they were
2185 // specified:
2186 String[] docTitlepids = {};
2187
2188 String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2189 if(docTitleTerms != null) { // no doc titles may have been specified
2190 String[] phrases = docTitleTerms.split(COMMA);
2191
2192 // search the individual phrases first:
2193 for(int i = 0; i < phrases.length; i++) {
2194 if(phrases.equals("") || phrases.equals(" "))
2195 continue; //skip when there are no terms
2196 docTitlepids = this.searchDocumentTitles(
2197 collection, phrases[i], false);
2198 for(int j = 0; j < docTitlepids.length; j++)
2199 set.add(docTitlepids[j]);
2200 }
2201 }
2202 // (2) use FedoraGSearch to search doc AND section titles, and
2203 // fulltext (in case these were specified in nameValParamsMap):
2204 String searchResult = this.fedoraGSearch.search(
2205 nameValParamsMap, 1, maxDocs);
2206
2207 String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2208 collection, searchResult);
2209
2210 for(int i = 0; i < pids.length; i++)
2211 set.add(pids[i]);
2212
2213 pids = null;
2214 pids = new String[set.size()];
2215 set.toArray(pids); // unique pids
2216 return pids;
2217 }
2218
2219 /** @return a String representing Greenstone3 XML for a query process
2220 * response returning the results for the query denoted by parameter
2221 * nameValParamsMap.
2222 * @param nameValParamsMap is a Hashmap of name and value pairs for all the
2223 * query field data values. The names match the field names that
2224 * describeCollectionService() would have returned for the query service.
2225 * @param collection is the name of the collection
2226 * @param service is the name of the query service
2227 * This method is only ever called when any of the services in the digital
2228 * library described themselves as type=query. Therefore any digital
2229 * libraries that have no query services, can just return emtpy message
2230 * strings (or even "") since this method will never be called on them
2231 * anyway. */
2232 public String query(String collection, String service,
2233 Map nameValParamsMap)
2234 {
2235 FedoraGS3RunException ex = null;
2236 // (1) obtain the requested number of maximum result documents
2237 int maxDocs = 100;
2238 try{
2239 maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2240 } catch(NumberFormatException e) {
2241 maxDocs = 100;
2242 }
2243
2244 String pids[] = {};
2245 // (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2246 if(service.endsWith("TextQuery")) {
2247 try {
2248 // get the Query field:
2249 String query = (String)nameValParamsMap.get(QUERY);
2250 pids = textQuery(collection, query, maxDocs);
2251 }
2252 catch(Exception e) {
2253 LOG.error("Error in TextQuery processing: " + e);
2254 ex = new FedoraGS3RunException(
2255 "When trying to use FedoraGenericSearch for a TextQuery", e);
2256
2257 }
2258 } else { // (3) FieldQuery
2259 // first get the comma-separated lists
2260 String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2261 String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2262 // both are comma separated lists, so split both on 'comma'
2263 String[] fieldNames = listOfFieldNames.split(COMMA);
2264 String[] searchTerms = listOfSearchTerms.split(COMMA);
2265
2266 // In the fieldNames and searchTerms lists of nameValParamsMap,
2267 // each searchTerm element was matched with its correspondingly
2268 // indexed fieldName.
2269 // A new map is going to reorganise this, by putting all terms
2270 // for a particular fieldName together in a comma separated list
2271 // and associating that with the fieldName. I.e. (key, value) ->
2272 // (fieldName, comma-separated list of all terms in that field)
2273 Map map = new HashMap();
2274 for(int i = 0; i < searchTerms.length; i++) {
2275 // there may be fewer searchTerms than fieldNames (since some
2276 // fieldNames may have been left empty), so loop on searchTerms
2277 if(map.containsKey(fieldNames[i])) { // fieldName is already
2278 // in the list, so append comma with new value
2279 String termsList = (String)map.get(fieldNames[i]);
2280 termsList = termsList + COMMA + searchTerms[i];
2281 map.put(fieldNames[i], termsList);
2282 } else { // this is the first time this fieldName occurred
2283 // just put the fieldName with searchTerm as-is
2284 map.put(fieldNames[i], searchTerms[i]);
2285 }
2286 }
2287
2288 try {
2289 // For fieldquery, we search on all the fieldNames specified
2290 // - if DOC_TITLES is specified then we use Fedora's search
2291 // - for all other fieldNames specified, we use FedoraGSearch
2292 pids = fieldQuery(collection, map, maxDocs);
2293 }
2294 catch(Exception e) {
2295 LOG.error("Error in FieldQuery processing: " + e);
2296 ex = new FedoraGS3RunException(
2297 "When trying to use FedoraGenericSearch for a FieldQuery", e);
2298 }
2299 }
2300
2301 // Build Greenstone XML Query response message for from
2302 // the pids (which should be document identifiers)
2303 Document doc = builder.newDocument();
2304 // <metadataList><metadata name="numDocsMatched" value="n" />
2305 // </metadataList>
2306 Element metadataList = doc.createElement(
2307 GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2308 Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2309
2310 Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2311 attribute.setValue(NUM_DOCS_MATCHED);
2312 metadata.setAttributeNode(attribute);
2313
2314 attribute = doc.createAttribute(GSXML.VALUE_ATT);
2315 attribute.setValue(Integer.toString(pids.length));
2316 metadata.setAttributeNode(attribute);
2317
2318 metadataList.appendChild(metadata);
2319
2320 // <documentNodeList>
2321 // <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2322 // docType='hierarchy' nodeType="leaf" />
2323 // ...
2324 // ...
2325 // </documentNodeList>
2326 Element docNodeList = doc.createElement(
2327 GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2328 // for each
2329 for(int i = 0; i < pids.length; i++) {
2330 Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2331 attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2332 attribute.setValue(pids[i]);
2333 docNode.setAttributeNode(attribute);
2334
2335 attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2336 attribute.setValue("hierarchy");
2337 docNode.setAttributeNode(attribute);
2338
2339 attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2340 attribute.setValue("root");
2341 docNode.setAttributeNode(attribute);
2342 docNodeList.appendChild(docNode);
2343 }
2344
2345 Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2346 GSXML.REQUEST_TYPE_PROCESS, service);
2347 try{
2348 return FedoraCommons.elementToString(responseMsg);
2349 }catch(TransformerException e) {
2350 return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2351 + " " + e;
2352 }
2353 }
2354
2355
2356 // FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2357 /** Given a URL that represents a fedoraPID, will look up the object.
2358 * If it exists, it will return the contents of the DC:Title of its datastream.
2359 * If it doesn't exist, it will return the URL as-is.
2360 * @param URL: the URL that (after modification) represents a fedoraPID to look up.
2361 * @param collection: the name of collection in which to search for the URL
2362 * representing a fedoraPID.
2363 * @return the string (representing a fedoraPID) stored in the DC:Title of the
2364 * URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2365 * then the parameter URL is returned.
2366 */
2367 public String getPIDforURL(String url, String collection) {
2368 FedoraGS3RunException ex = null; // any RemoteException
2369
2370 // (1) convert url to the fedorapid
2371 // / -> _ and : -> -
2372 String fedoraPID = url.replaceAll("/", "_");
2373 fedoraPID = fedoraPID.replaceAll(":", "-");
2374 // prefix "greenstone-http:<colname>-" to the fedoraPID
2375 fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2376 //LOG.error("### fedoraPID: " + fedoraPID);
2377
2378 // (2) Look up the datastream for the fedorapid
2379 String dcTitle = "";
2380 try {
2381 dcTitle = getDCTitle(fedoraPID);
2382 } catch(Exception e) {
2383 LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2384 ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2385 }
2386 //String dc = this.getDC(fedoraPID);
2387 //LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2388
2389 // (3) if fedorapid exists, extract the dc:title content.
2390 // if it doesn't exist, return url
2391 if(dcTitle.equals("")) {
2392 return url;
2393 } else {
2394 // It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2395 //return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2396 return dcTitle+"-1";
2397 }
2398 }
2399
2400 public static void main(String args[]) {
2401 try{
2402 // testing default constructor
2403 //FedoraGS3Connection con = new FedoraGS3Connection();
2404
2405 // testing constructor that takes properties file to show initial
2406 // fedora server values
2407 java.io.File propertyFilename
2408 = new java.io.File("fedoraGS3.properties");
2409 FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2410
2411 // DESCRIBE: serviceList, collectionList
2412 System.out.println("serviceList:\n" + con.getServiceList());
2413
2414 System.out.println("collectionList:\n" + con.getCollectionList());
2415
2416 String[] colPIDs = con.getCollections();
2417 String[] collectionNames = con.getCollectionNames(con.getCollections());
2418
2419
2420 for(int i = 0; i < collectionNames.length; i++) {
2421 System.out.println("Describing collections:\n");
2422 System.out.println(con.describeCollection(collectionNames[i]));
2423 System.out.println("Describing collection services:\n"
2424 + con.describeCollectionServices(collectionNames[i]));
2425 }
2426
2427 String[] serviceNames = con.getServiceNames();
2428 for(int i = 0; i < serviceNames.length; i++) {
2429 System.out.println("Describing " + serviceNames[i] + ":\n"
2430 + con.describeCollectionService("demo", serviceNames[i]));
2431 }
2432
2433
2434 // TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2435 // along with EX of the top-level document:
2436 System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2437 System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2438
2439
2440 String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2441 System.out.println("\nGET CONTENT:");
2442 for(int i = 0; i < docIDs.length; i++) {
2443 System.out.println(con.getContent(docIDs[i]));
2444 }
2445
2446 System.out.println("\nGET META:");
2447 for(int i = 0; i < docIDs.length; i++) {
2448 System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2449 }
2450
2451 String[] getTitlesFor = {
2452 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2453 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2454 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2455 "greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2456 "greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2457 };
2458
2459 // first let's display the regular meta for top-level docs and
2460 // their sections
2461 for(int i = 0; i < getTitlesFor.length; i++) {
2462 System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2463 }
2464
2465 System.out.println("\nTitles are:");
2466 System.out.println(con.getTitleMetadata(getTitlesFor));
2467
2468 System.out.println("\nGET STRUCTURE:");
2469 for(int i = 0; i < docIDs.length; i++) {
2470 System.out.println("Descendents and numChildren:\n"
2471 + con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {"numChildren"}));
2472 System.out.println("Parent and numSiblings:\n"
2473 + con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {"numSiblings"}));
2474 }
2475
2476 // TEST ERROR CASES:
2477 System.out.println("\nTESTING ERROR CASES");
2478 System.out.println(con.getContent("greenstone:demo-pinky"));
2479 String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2480 "greenstone:demo-pinky" };
2481 System.out.println(con.getContent(errorCases));
2482 System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2483 System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {"numChildren"}));
2484
2485 System.out.println("\nCLASSIFIER BROWSE");
2486 System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2487 new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2488
2489 System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2490 String[] classNodeIDs = new String[26];
2491 for(int i = 0; i < classNodeIDs.length; i++) {
2492 int subClassifierNum = i + 1;
2493 classNodeIDs[i] = "CL1." + subClassifierNum;
2494 }
2495 System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2496 classNodeIDs, new String[]{"all"}));
2497
2498 System.out.println("Testing query services");
2499 System.out.println("TEXT QUERY:");
2500 Map formControlValsMap = new HashMap();
2501 formControlValsMap.put(MAXDOCS, "100");
2502 formControlValsMap.put(QUERY, "snails");
2503 String searchResponse
2504 = con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2505 System.out.println(searchResponse);
2506
2507 System.out.println("FIELD QUERY:");
2508 formControlValsMap.clear();
2509 formControlValsMap.put(MAXDOCS, "100");
2510 formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2511 formControlValsMap.put(FIELDNAME_ATT,
2512 "allFields,docTitles,allFields,allFields");
2513 searchResponse
2514 = con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2515 System.out.println(searchResponse);
2516
2517 System.exit(0);
2518 }catch(Exception e) {
2519 JOptionPane.showMessageDialog(
2520 null, e, "Error", JOptionPane.ERROR_MESSAGE);
2521 //System.err.println("ERROR: " + e);
2522 e.printStackTrace();
2523 }
2524 }
2525}
Note: See TracBrowser for help on using the repository browser.