Context Navigation

source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26171

Last change on this file since 26171 was 26171, checked in by ak19, 12 years ago
Uncommitted changes from ages back to fedoraGS3 classes to get greenstone to work as an interface to fedora repository backend.
File size: 104.7 KB

Line
1	/**
2	*#########################################################################
3	* FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4	* of the Greenstone digital library suite from the New Zealand Digital
5	* Library Project at the * University of Waikato, New Zealand.
6	* <BR><BR>
7	* Copyright (C) 2008 New Zealand Digital Library Project
8	* <BR><BR>
9	* This program is free software; you can redistribute it and/or modify
10	* it under the terms of the GNU General Public License as published by
11	* the Free Software Foundation; either version 2 of the License, or
12	* (at your option) any later version.
13	* <BR><BR>
14	* This program is distributed in the hope that it will be useful,
15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	* GNU General Public License for more details.
18	*########################################################################
19	*/
20
21	package org.greenstone.fedora.services;
22
23
24	import java.io.StringReader;
25
26	import org.apache.log4j.Logger;
27	import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28	import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29	import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30	import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31	import org.greenstone.gsdl3.util.GSXML;
32	import org.w3c.dom.Document;
33	import org.w3c.dom.Element;
34	import org.w3c.dom.Attr;
35	import org.w3c.dom.Text;
36	import org.w3c.dom.NodeList;
37	import org.w3c.dom.Node;
38	import org.xml.sax.InputSource;
39
40	import java.io.File;
41	import java.util.HashMap;
42	import java.util.Properties;
43	import java.util.Map;
44
45	import javax.swing.JOptionPane;
46
47	import org.xml.sax.SAXException;
48	import java.io.UnsupportedEncodingException;
49	import java.io.IOException;
50	import javax.net.ssl.SSLHandshakeException;
51	import java.net.ConnectException;
52	import java.net.MalformedURLException;
53	import java.rmi.RemoteException;
54	import javax.xml.parsers.ParserConfigurationException;
55	import javax.xml.transform.TransformerException;
56
57	/**
58	* Class that extends FedoraConnection in order to be able to use
59	* Fedora's web services to retrieve the specific datastreams of
60	* Greenstone documents stored in Fedora's repository. This class
61	* provides methods that convert those datastreams into Greenstone3
62	* XML response messages which are returned.
63	* @author ak19
64	*/
65	public class FedoraGS3Connection
66	extends FedoraConnection implements FedoraToGS3Interface,
67	FedoraToGS3Interface.Constants
68	{
69	/** The logging instance for this class */
70	private static final Logger LOG = Logger.getLogger(
71	FedoraGS3Connection.class.getName());
72
73	/** Default name of Fedora index */
74	private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
75
76	/** Complete list of services that are supported our FedoraGS3 would
77	* support if everything goes well. If a connection to FedoraGSearch
78	* cannot be established, the query services will no longer be
79	* available. The actual services supported are given by member
80	* variable serviceNames. */
81	protected static final String[] SERVICES = {
82	"DocumentContentRetrieve", "DocumentMetadataRetrieve",
83	"DocumentStructureRetrieve",
84	"TextQuery", "FieldQuery",
85	"ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
86	};
87
88	/** List of services actually supported by our FedoraGS3 repository
89	* after construction. If FedoraGenericSearch can't be connected to,
90	* then query services will not be offered */
91	protected String[] serviceNames;
92
93	/** The object used to connect to FedoraGenericSearch, which is used
94	* for doing full-text searching */
95	protected GSearchConnection fedoraGSearch;
96
97	/** The url for the wsdl file of FedoraGSearch's web services
98	* by default this will be the Fedora server's base URL
99	* concatenated to "gsearch/services/FgsOperations?wsdl" */
100	protected String gSearchWSDLURL;
101
102	/** The last part of the gSearchWSDL URL. The first part is
103	* the same as the fedora server's base url. */
104	protected String gSearchWSDLSuffix;
105
106	/** The name of the index that FedoraGSearch will index the GS3
107	* documents into. If no name is specified in the properties file,
108	* this will default to FedoraIndex. */
109	protected String gSearchIndexName;
110
111	/** 5 argument constructor is the same as that of superclass FedoraConnection:
112	* @param protocol can be either http or https
113	* @param host is the host where the fedora server is listening
114	* @param port is the port where the fedora server is listening
115	* @param fedoraServerUsername is the username for administrative
116	* authentication required to access the fedora server.
117	* @param fedoraServerPassword is the password for administrative
118	* authentication required to access the fedora server. If no password was set
119	* when installing Fedora, leave the field "".
120	* Instantiates a FedoraGS3Connection object which connects to Fedora's
121	* web services through stub classes and tries to connect to FedoraGSearch's
122	* web services through the default WSDL location for it
123	* ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
124	* call setGSearchWSDLURL(url) after the constructor instead.
125	*/
126	public FedoraGS3Connection(String protocol, String host, int port,
127	String fedoraServerUsername, String fedoraServerPassword)
128	throws ParserConfigurationException, MalformedURLException,
129	SSLHandshakeException, RemoteException, AuthenticationFailedException,
130	NotAFedoraServerException, ConnectException, Exception
131	{
132	super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
133	// super() will call setInitialisationProperties(properties)
134	// And that will try to instantiate the GSearchConnection.
135	}
136
137	/** No-argument constructor which is the same as that of superclass
138	* FedoraConnection: it displays a small dialog requesting input for the
139	* host, port, administrative password and username of the fedora server.
140	* If no password was set on the fedora repository when installing it,
141	* the user can leave the password field blank. */
142	public FedoraGS3Connection()
143	throws ParserConfigurationException, MalformedURLException,
144	CancelledException, ConnectException, RemoteException,
145	SSLHandshakeException, Exception
146	{
147	super();
148	// super() will call setInitialisationProperties(properties)
149	// And that will try to instantiate the GSearchConnection.
150	}
151
152	/** Single-argument constructor which is the same as that of superclass
153	* FedoraConnection: it takes the name of the properties file where
154	* connection initialisation values may already be provided and then
155	* displays a small dialog requesting input for the host, port,
156	* administrative password and username of the fedora server showing
157	* the values in the properties file as default. If the necessary
158	* initialisation are not present in the file, the corresponding fields
159	* in the dialog will be blank.
160	* If no password was set on the fedora repository when installing it,
161	* the user can leave the password field blank. */
162	public FedoraGS3Connection(File propertiesFilename)
163	throws ParserConfigurationException, MalformedURLException,
164	CancelledException, ConnectException, RemoteException,
165	SSLHandshakeException, Exception
166	{
167	super(propertiesFilename);
168	// super() will call setInitialisationProperties(properties)
169	// And that will try to instantiate the GSearchConnection.
170	}
171
172	/** The superclass constructor calls this method passing any preset
173	* properties loaded from a propertiesFile. This method is overridden
174	* here in order to instantiate the gSearchConnection based on the
175	* - gSearchWSDLSuffix that will be appended to the fedora base url.
176	* (If one was not provided in the properties file, gSearchWSDLURL defaults
177	* to something of the form
178	* "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
179	* which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
180	* "gsearch/services/FgsOperations?wsdl".
181	* - name of the index into which the GS3 documents have been indexed
182	* and which FedoraGenericSearch should use to perform searches. If none is
183	* given in the properties file, then the index name defaults to "FedoraIndex".
184	* @param properties is the Properties Map loaded from a properties file
185	* (if there was any) which specifies such things as host and port of the
186	* FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
187	* At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
188	* to whatever the final value of this.gSearchWSDLURL' suffix is, and
189	* "gsearch.indexName" will be set to to whatever the final value of
190	* this.gSearchIndexName is.
191	*/
192	protected void setInitialisationProperties(Properties properties)
193	throws ParserConfigurationException, MalformedURLException,
194	CancelledException, ConnectException, RemoteException,
195	SSLHandshakeException, Exception
196	{
197	super.setInitialisationProperties(properties);
198	// gsearchWSDL URL suffix, if not specified, defaults to
199	// "fedoragsearch/services/FgsOperations?wsdl" which is
200	// concatenated to the baseURL of fedora to give the gsearchWSDLURL.
201	this.gSearchWSDLSuffix = properties.getProperty(
202	"gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
203	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
204	// Set the property to whatever this.gSearchWSDLURL is now,
205	// so that it will be written out to the properties file again
206	properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
207
208	// Similarly for the name of the index FedoraGenericSearch should use
209	// when performing searches for GS3 docs stored in Fedora's repository.
210	this.gSearchIndexName = properties.getProperty(
211	"gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
212	properties.setProperty("gsearch.indexName", this.gSearchIndexName);
213	// Create a connection to FedoraGSearch's web services:
214	initSearchFunctionality();
215	}
216
217	/** Overridden init method to work with the 5 argument constructor, so that we can
218	* bypass using setInitialisationProperties() which works with a Properties map.
219	*/
220	protected void init(String protocol, String host, String port,
221	String fedoraServerUsername, String fedoraServerPassword)
222	throws ParserConfigurationException, MalformedURLException,
223	AuthenticationFailedException, RemoteException, Exception
224	{
225	super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
226	this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
227	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
228	this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
229	initSearchFunctionality();
230	}
231
232
233	/** Init method that instantiates a GSearchConnection object used
234	* to work with the separate FedoraGSearch web services.
235	* The url of the WSDL for FedoraGSearch's web services is worked out
236	* from the baseURL of the Fedora server.
237	*/
238	protected void initSearchFunctionality()
239	{
240	try {
241	this.fedoraGSearch = null;
242	this.fedoraGSearch = new GSearchConnection(
243	gSearchWSDLURL, gSearchIndexName);
244	this.serviceNames = SERVICES;
245	} catch(Exception e){
246	LOG.error("Cannot connect to FedoraGSearch's web services at "
247	+ gSearchWSDLURL + "\nQuery services will not be available.");
248	// If an exception occurs, something has gone wrong when
249	// trying to connect to FedoraGSearch's web services. This
250	// means, we can't offer query services, as that's provided
251	// by FedoraGSearch
252	serviceNames = null;
253	int countOfNonQueryServices = 0;
254	for(int i = 0; i < SERVICES.length; i++) {
255	// do not count query services
256	if(!SERVICES[i].toLowerCase().contains("query")) {
257	countOfNonQueryServices++;
258	}
259	}
260	// Services now supported are everything except Query services
261	serviceNames = new String[countOfNonQueryServices];
262	int j = 0;
263	for(int i = 0; i < SERVICES.length; i++) {
264	if(!SERVICES[i].toLowerCase().contains("query")) {
265	serviceNames[j] = SERVICES[i];
266	j++; // valid serviceName, so increment serviceName counter
267	}
268
269	}
270	}
271	}
272
273	/** @return the gSearchWSDLURL, the url of the WSDL for the
274	* FedoraGSearch web services */
275	public String getGSearchWSDLURL() { return gSearchWSDLURL; }
276
277	/** Sets the member variable gSearchWSDLURL that specify the location of
278	* the WSDL file of FedoraGSearch's web services. Then it attempts
279	* to instantiate a connection to those web services.
280	* @param url is the new url of the GSearch web services WSDL file */
281	public void setGSearchWSDLURL(String url) {
282	this.gSearchWSDLURL = url;
283	initSearchFunctionality();
284	}
285
286	/** @return the gSearchIndexName, the name of the index Fedora Generic
287	* Search will search in (where GS3 docs have been indexed into). */
288	public String getGSearchIndexName() { return gSearchIndexName; }
289
290	/** Sets the member variable gSearchIndexName that specifies the name
291	* of the index containing indexed GS3 documents. Then it attempts
292	* to instantiate a connection to the Fedora GSearch web services using
293	* this changed value for indexName.
294	* @param indexName is the new name of the index containing indexed GS3
295	* docs that GSearch should search in. */
296	public void setGSearchIndexName(String indexName) {
297	this.gSearchIndexName = indexName;
298	initSearchFunctionality();
299	}
300
301	/** @return the array of the services actually supported by FedoraGS3 */
302	protected String[] getServiceNames() { return this.serviceNames;}
303
304	/**
305	* For finding out if the sectionNumber is given as part of the docID.
306	* @param docID is the String that contains the docPID and may also
307	* contain the section number.
308	* @return true if the document identifier docID contains a section-
309	* number, and false if it consists solely of the docPID.
310	* That is, true is returned if
311	* <pre>docID = "greenstone:colName-<docPID>-<sectionNum>"</pre>
312	* and false is returned if
313	* <pre>docID = "greenstone:colName-<docPID>"</pre>
314	* */
315	protected boolean containsSectionNumber(String docID) {
316	// if there are two hyphens in the docID, then there are sections
317	// (and the section number is appended at end of docID)
318	// docID = "greenstone:colName-<docPID>-<sectionNum>"
319	return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
320	}
321
322	/** This method will extract the docPID from docID and return it.
323	* (If a sectionNumber is suffixed to the docID, the docPID which is
324	* the prefix is returned; otherwise the docID is the docPID and is
325	* returned)
326	* @param docID is the String that contains the docPID and may also
327	* contain the section number.
328	* @return only the docPID portion of the docID.
329	*/
330	protected String getDocPIDFromDocID(String docID) {
331	if(containsSectionNumber(docID))
332	return docID.substring(0, docID.lastIndexOf(HYPHEN));
333	// else (if there's no sectionNumber), docID is the docPID
334	return docID;
335	}
336
337	/** This method will return the section Number, if there's any
338	* suffixed to the docID. Otherwise it will return the empty string
339	* @param docID is the String that contains the docPID and may also
340	* contain the section number.
341	* @return only the sectionID portion of the docID - if any, else "".
342	*/
343	protected String getSectionIDFromDocID(String docID) {
344	if(containsSectionNumber(docID))
345	return docID.substring(
346	docID.lastIndexOf(HYPHEN)+1, docID.length());
347	return "";
348	}
349
350	/** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
351	* response message that gives the metadata for each collection identified
352	* @param collIDs is an array of fedora pids identifying collections in the
353	* fedora repository
354	* @return a GS3 DocumentMetadataRetrieve response message containing the
355	* EX metadata for all the requested collections */
356	public String getCollectionMetadata(String[] collIDs) {
357	return getMetadata(collIDs, new String[] {"all"});
358	}
359
360	/** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
361	* response message is returned containing the metadata for each document.
362	* @param docIDs is an array of document identifiers (docID can either be
363	* <pid>s items (documents) in the fedora repository, or
364	* "<pid>-sectionNumber".
365	* @return a GS3 DocumentMetadataRetrieve response message containing the
366	* EX, DC, DLS metadata for all the requested documents
367	* @param metadata is the list of metadata elements to be retrieved for each doc */
368	public String getDocumentMetadata(String[] docIDs, String[] metadata) {
369	return getMetadata(docIDs, metadata);
370	}
371
372	/** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
373	* response message that gives the metadata for the collection identified
374	* @param collID is a fedora pid identifying a collection in its repository
375	* @return a GS3 DocumentMetadataRetrieve response message containing the
376	* EX metadata for the requested collection
377	* @param metadata is the list of metadata elements to be retrieved for each doc */
378	public String getCollectionMetadata(String collID) {
379	return getMetadata(new String[] {collID}, new String[] {"all"});
380	}
381
382	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
383	* response message containing the metadata for the document.
384	* @param docID is a document identifier (docID can either be a <pid>
385	* of an item (document) in the fedora repository, or it can be
386	* "<pid>-sectionNumber".
387	* @return a GS3 DocumentMetadataRetrieve response message containing the
388	* EX, DC, DLS metadata for the requested document */
389	public String getDocumentMetadata(String docID, String[] metadata) {
390	return getMetadata(new String[] {docID}, metadata);
391	}
392
393	/** @return a greenstone DocumentMetadataRetrieve response for the
394	* documents or collections indicated by the docIDsOrCollIDs.
395	* @param docIDsOrCollIDs is an array of identifiers which may be either the
396	* fedora pids for collections, or otherwise may be a document identifier.
397	* In the last case, the document ID may consist of either
398	* "documentPID-sectionNumber" or may just be just fedora documentPID
399	* @param metadata is the list of metadata elements to be retrieved for each doc */
400	public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
401	{
402	Document doc = builder.newDocument();
403	FedoraGS3RunException ex = null;
404
405	Element docNodeList = doc.createElement(
406	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
407
408	try{
409	for(int i = 0; i < docIDsOrCollIDs.length; i++) {
410	// create the <documentNode> containing the metadata
411	// for each document docID
412	Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
413	docNodeList.appendChild(docNode);
414	}
415	} catch(Exception e) {
416	ex = new FedoraGS3RunException(e);
417	ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
418	}
419
420	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
421	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
422	try{
423	return FedoraCommons.elementToString(responseMsg);
424	} catch(TransformerException e) {
425	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
426	+ " " + e;
427	}
428	}
429
430	/** Method that takes a new DOM document, as well as an identifier of either
431	* a collection or document (which may be a fedora pid for the collection
432	* or document, or may be the documentPid-sectionNumber for a document) and
433	* returns a documentNode element for it:
434	* <documentNode><metadataList>
435	* <metadata name="">value</metadata>
436	* ...
437	* </metadataList></documentNode>
438	* @return documentNode containing the metadata for the collection or
439	* document given by parameter ID
440	* @param id denotes a collection pid, a document pid or a docID of the
441	* form "documentpid-sectionNumber"
442	* @param metadata is the list of metadata elements to be retrieved for each doc */
443	protected Element getMetadata(Document doc, String id, String[] metadata)
444	throws RemoteException, UnsupportedEncodingException,
445	SAXException, IOException
446	{
447	// We're going to create the documentNode nested inside the following
448	// documentNodeList:
449	// <documentNodeList>
450	// <documentNode nodeID=""><metadataList>
451	// <metadata name="">value</metadata>
452	// </metadataList></documentNode>
453	// <documentNode>...</documentNode>
454	// </documentNodeList>
455	// <documentNodeList>
456
457	// <documentNode nodeID="docID"> - the docNode on which a metadata
458	// retrieve is being performed
459	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
460	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
461	attribute.setValue(id);
462	docNode.setAttributeNode(attribute);
463
464	// <metadataList>
465	Element metadataList = doc.createElement(
466	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
467
468	String ex = "";
469	String dc = "";
470	String dls = "";
471	if(id.endsWith(_COLLECTION)) { // docID refers to a collection
472	// Obtain the "EX" datastream (extracted metadata) for the collection
473	ex = this.getEX(id);
474	}
475	else { // docID refers to a document
476	// work out the document's fedora PID and section ID, and then
477	// obtain the EX (extracted metadata) and DC datastreams for the doc
478
479	// Note that EX/DC for pid="greenstone:<colname>-docPID-1"
480	// is the same as for pid="greenstone:<colname>-docPID"
481	// That is, <Section id="1"> refers to the toplevel document docPID
482	// If requested for top-level document, there may also be DLS meta
483	String sectionID = getSectionIDFromDocID(id);
484	String docPID = getDocPIDFromDocID(id);
485	if(sectionID.equals("") \|\| sectionID.equals("1")) {
486	// metadata of toplevel document is requested
487	ex = this.getEX(docPID); // slightly faster than doing
488	//getSectionEXMetadata(docID, "1")
489	dc = this.getDC(docPID);
490	dls = this.getDLS(docPID);
491	}
492	else {
493	ex = getSectionEXMetadata(docPID, sectionID);
494	dc = getSectionDCMetadata(docPID, sectionID);
495	}
496	}
497
498	String metafields = "";
499	for(int i = 0; i < metadata.length; i++) {
500	metafields = metafields + metadata[i] + "\|";
501	}
502
503	// Adding in metadata sets in alphabetical order
504	// DC metadata for a top-level document is different from EX, DLS:
505	// only the element's namespace prefix is "dc", the rest of a tagname
506	// is unknown.
507	if(!dc.equals("")) {
508	addMetadataWithNamespacedTagNames(doc, metadataList,
509	dc, DC, metafields);
510	}
511
512	// Check if we were supposed to process dls and dc metadata
513	// as well. We only ever do this for top-level documents,
514	// in which case, dls and dc will be non-empty strings
515	if(!dls.equals("")) {
516	addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
517	}
518
519	// we definitely have an EX metadatastream for each
520	// collection object, top-level document object,
521	// and document section item
522	addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
523
524	// now the metadataList has been built up
525	docNode.appendChild(metadataList);
526
527	return docNode; // return <documentNode> containing the metadata
528	}
529
530	/** This method retrieves all the metadata elements in the metaDataStream
531	* parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
532	* metadataSetNS is the namespace of each tag, and creates a new element of
533	* the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
534	* each. Each of these are then appended to the metadataList parameter.
535	* @param doc is the Document object using which the new metadata Elements
536	* are to be constructed
537	* @param metadataList is the <metadataList> Element to which the new
538	* metadata Elements are to be appended as children.
539	* @param metaDatastream the metadata datastream in string form (e.g. the
540	* Dublin Core metadata stored in the Fedora repository).
541	* @param metadataSet is the constant datastream identifier, e.g. "DC".
542	* At present this method applies to the DC metadata and any others like it
543	* where each tagname is different except for the constant dc: namespace.
544	* @param metafields is a \| separated string containing the metadatafields to
545	* extract or "all" if all fields are requested
546	*/
547	protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
548	String metaDatastream, String metadataSet, String metafields)
549	throws SAXException, IOException
550	{
551	Document src = builder.parse(
552	new InputSource(new StringReader(metaDatastream)));
553
554	// The following doesn't work for some reason: to retrieve all elements
555	// whose namespace prefix starts with "dc", we pass "*" for localName
556	//NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
557
558	// Longer way: get the children of the root document
559	NodeList children = src.getDocumentElement().getChildNodes();
560
561	for(int i = 0; i < children.getLength(); i++) {
562	String nodeName = children.item(i).getNodeName();
563	// check that the nodename starts with the metadataSet ("dc") namespace,
564	// which simultaneously ensures that the node's an element:
565	if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
566	// need to have a period for Greenstone instead of Fedora's colon
567	nodeName = nodeName.replace(COLON, PERIOD);
568	if(metadataSet.equals(DC)) { // dc:title -> dc.Title
569	nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
570	+ nodeName.substring(4);
571	}
572
573	// get the requested metadata fields
574	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(nodeName) != -1) {
575	Element metatag = (Element)children.item(i);
576	String value = FedoraCommons.getValue(metatag);
577	// <dc:tagname>value</dc:tagname>
578	// we're going to put this in our metadata element as
579	// <metadata name="dc.Tagname">value</metadata>
580
581	// create metadata of (name, value) pairs in target DOM (doc)
582	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
583	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
584
585	attribute.setValue(nodeName);
586	metadata.setAttributeNode(attribute);
587	Text content = doc.createTextNode(value);
588	metadata.appendChild(content);
589	metadataList.appendChild(metadata);
590	}
591	}
592	}
593	}
594
595	/** This method retrieves all the metadata elements in the metaDataStream
596	* of the form <"namespace:"metadata name="metadataName">value</metadata>
597	* where "namespace" is the namespace prefix of each tag, and metadataName
598	* is the name of the metadata (like author, title). For each element
599	* it creates a corresponding new element of the form
600	* <metadata name="namespace:metadataName">value</metadata>.
601	* Each of these are then appended to the metadataList parameter.
602	* @param doc is the Document object using which the new metadata Elements
603	* are to be constructed
604	* @param metadataList is the <metadataList> Element to which the new
605	* metadata Elements are to be appended as children.
606	* @param metaDatastream the metadata datastream in string form (e.g. the
607	* EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
608	* repository).
609	* @param metadataSet is the constant datastream identifier,
610	* e.g. "DLS" or "EX".
611	* At present this method applies to the DLS and EX metadata as they have
612	* constant tagnames throughout.
613	* @param metafields is a \| separated string containing the metadatafields to
614	* extract or "all" if all fields are requested.
615	*/
616	protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
617	String metaDatastream, String metadataSet, String metafields)
618	throws SAXException, IOException
619	{
620	// Namespace prefix can be "ex:" or "dls:"
621	String namespacePrefix = "";
622	if(!metadataSet.equals(EX)) {
623	// need to have a period for Greenstone instead of Fedora's colon
624	namespacePrefix = metadataSet.toLowerCase() + PERIOD;
625	}
626
627	Document src = builder.parse(
628	new InputSource(new StringReader(metaDatastream)));
629	NodeList metaTags = src.getElementsByTagName(
630	metadataSet.toLowerCase()+COLON+METADATA);
631	// Looking for tagnames: <ex:metadata> or <dls:metadata>
632
633	for(int i = 0; i < metaTags.getLength(); i++) {
634	Element metatag = (Element)metaTags.item(i);
635
636	// extract the metadata of (name, value) pairs from src DOM
637	// look for <metadata name="name">value</metadata>
638	String name = metatag.hasAttribute(NAME) ?
639	metatag.getAttribute(NAME) : "";
640	// sometimes, there are several metadata for the same name, in this
641	// case, look for a qualifier and append its value to the name to
642	// distinguish it uniquely:
643	if(metatag.hasAttribute(QUALIFIER)) {
644	name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
645	}
646	name = namespacePrefix + name; // prefix with namespace, if any
647	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(name) != -1) {
648	String value = FedoraCommons.getValue(metatag);
649
650	// create metadata of (name, value) pairs in target DOM (doc)
651	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
652	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
653	attribute.setValue(name);
654	metadata.setAttributeNode(attribute);
655	Text content = doc.createTextNode(value);
656	metadata.appendChild(content);
657
658	metadataList.appendChild(metadata);
659	}
660	}
661	}
662
663	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
664	* response message containing ONLY the Title metadata for the document.
665	* @param docID is a document identifier (docID can either be a <pid>
666	* of an item (document) in the fedora repository, or it can be
667	* "<pid>-sectionNumber".
668	* @return a GS3 DocumentMetadataRetrieve response message containing the
669	* Title metadata for the requested document */
670	public String getTitleMetadata(String docID) {
671	return getTitleMetadata(new String[] { docID });
672	}
673
674	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
675	* response message containing ONLY the Title metadata for the documents.
676	* @param docIDs is a list of document identifiers (where docID can either be
677	* a <pid> of an item (document) in the fedora repository, or it can be
678	* "<pid>-sectionNumber".
679	* @return a GS3 DocumentMetadataRetrieve response message containing the
680	* Title metadata for all the requested documents */
681	public String getTitleMetadata(String[] docIDs) {
682	// Must create message of the following form:
683	// <documentNodeList><documentNode nodeID="docID">
684	// <metadataList><metadata name="Title">sometitle</metadata>
685	// </metadataList></documentNode>
686
687	Document doc = builder.newDocument();
688	FedoraGS3RunException ex = null;
689
690	Element docNodeList = doc.createElement(
691	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
692	try{
693	for(int i = 0; i < docIDs.length; i++) {
694	Element docNode = getTitleMetadata(doc, docIDs[i]);
695	docNodeList.appendChild(docNode);
696	}
697	}catch(Exception e) {
698	ex = new FedoraGS3RunException(e);
699	//ex.setSpecifics("EX metadata datastream PID: \|" + docIDs[i] + "\|"); // for debugging PID
700	ex.setSpecifics("EX metadata datastream");
701	}
702
703	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
704	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
705	try{
706	return FedoraCommons.elementToString(responseMsg);
707	} catch(TransformerException e) {
708	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
709	+ " " + e;
710	}
711	}
712
713	/** Method that takes a new DOM document, as well as an identifier of either
714	* a document or document section and returns a documentNode element containing
715	* the title metadata for it:
716	* <documentNode nodeID="docID"><metadataList>
717	* <metadata name="Title">sometitle</metadata>
718	* </metadataList></documentNode>
719	* @return documentNode containing the metadata for the collection or
720	* document given by parameter ID
721	* @param docID denotes the id of a document or a document section, so id
722	* is either a document-pid or it's of the form documentpid-sectionNumber */
723	protected Element getTitleMetadata(Document doc, String docID)
724	throws RemoteException, UnsupportedEncodingException,
725	SAXException, IOException
726	{
727	// Returns a docNode element of the following form:
728	// <documentNode nodeID="docID">
729	// <metadataList><metadata name="Title">sometitle</metadata></metadataList>
730	// </documentNode>
731
732	// <documentNode nodeID="docID">
733	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
734	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
735	attribute.setValue(docID);
736	docNode.setAttributeNode(attribute);
737
738	// <metadataList>
739	Element metaList = doc.createElement(
740	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
741	// <metadata name="Title">
742	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
743	// if we connect it all up (append children), we can immediately add
744	// the name attribute into the metadata element:
745	metaList.appendChild(metadata);
746	docNode.appendChild(metaList);
747	metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
748
749	String title = "";
750	String sectionID = getSectionIDFromDocID(docID);
751	String docPID = getDocPIDFromDocID(docID);
752
753	// check if title of toplevel document is requested
754	if(sectionID.equals(""))
755	title = this.getDocTitle(docPID);
756	else { // title of document section
757	title = this.getSectionTitle(docPID, sectionID);
758	}
759
760	metadata.appendChild(doc.createTextNode(title));
761
762	return docNode;
763	}
764
765	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
766	* containing the requested portion of the document structure of the documents
767	* indicated by docIDs:
768	* @param docID is the document identifier of the document whose hierarchical
769	* structure is requested. The name of the collection is already included in the
770	* docID for a Fedora DL.
771	* @param structure - strings specifying the required structure of the document.
772	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
773	* @param info - strings specifying the required structural info of the document.
774	* It can be any combination of: siblingPosition, numSiblings, numChildren.
775	*/
776	public String getDocumentStructure(String docID, String[] structure, String[] info) {
777	return getStructure(new String[]{docID}, structure, info);
778	}
779
780
781	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
782	* containing the requested portion of the document structure of the documents
783	* indicated by docIDs:
784	* @param docIDs is an array of document identifiers of documents whose
785	* hierarchical structures are requested. The name of the collection is already
786	* included in the docID for a Fedora DL.
787	* @param structure - strings specifying the required structure of each document.
788	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
789	* @param info - strings specifying the required structural info of each document.
790	* It can be any combination of: siblingPosition, numSiblings, numChildren.
791	*/
792	public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
793	return getStructure(docIDs, structure, info);
794	}
795
796	/**
797	* Returns a greenstone3 DocumentStructureRetrieve XML response message
798	* containing the document structures for the given docIDs.
799	* Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
800	* greenstone formatted XML is returned. The requested section of the table
801	* of contents (TOC) for a document is converted into the greenstone3 xml
802	* format that is returned upon DocumentStructureRetrieve requests.
803	* @param docIDs the documentIDs for which the section's structure is returned;
804	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
805	* @param structure - the structure of the sections to return. Can be any combination of:
806	* ancestors, parent, siblings, children, descendants, entire.
807	* @param infos - strings containing any combination of the values: numChildren, numSiblings,
808	* siblingPosition. The requested info gets added as attributes to the returned root element.
809	* @return a greenstone3 DocumentStructureRetrieve XML response message in
810	* String format with the structure of the docIDs requested.
811	*/
812	protected String getStructure(String[] docIDs, String[] structure, String[] infos)
813	{
814	Document doc = builder.newDocument();
815	FedoraGS3RunException ex = null;
816	// <documentNodeList>
817	Element docNodeList = doc.createElement(
818	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
819
820	try{
821	// append the <documentNodes> for the docIDs
822	// to the docNodeList
823	//getStructureElement(docNodeList, docIDs, levels);
824	getStructureElement(docNodeList, docIDs, structure, infos);
825	} catch(Exception e) {
826	ex = new FedoraGS3RunException(e);
827	ex.setSpecifics("(requested portion of) TOC datastream");
828	}
829	// insert our <documentNodeList> into a GS3 response message
830	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
831	GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
832	try{
833	return FedoraCommons.elementToString(responseMsg);
834	} catch(TransformerException e) {
835	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
836	+ " " + e;
837	}
838	}
839
840
841	/** Given a <documentNodeList> portion of a greenstone3
842	* DocumentStructureRetrieve XML response message, this method will populate
843	* it with the <documentNodes> that represent the structure of the given docIDs.
844	* @param docNodeList is a <documentNodeList> to which <documentNodes> of
845	* the doc structures are appended.
846	* @param docIDs the documentIDs for which the section's structure is returned;
847	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
848	* @param structures - the structure of the sections to return. Can be any combination of:
849	* ancestors, parent, siblings, children, descendants, entire.
850	* @param infos - a string containing any combination of the values: numChildren, numSiblings,
851	* siblingPosition. The requested info gets added as attributes to the returned root element.
852	*/
853	protected void getStructureElement(Element docNodeList, String[] docIDs,
854	String[] structures, String[] infos)
855	throws RemoteException, UnsupportedEncodingException, SAXException,
856	IOException
857	{
858	// Make one string out of requested structure components, and one string from info components
859	String structure = "";
860	String info = "";
861	for(int i = 0; i < structures.length; i++) {
862	structure = structure + structures[i] + "\|";
863	}
864	for(int i = 0; i < infos.length; i++) {
865	info = info + infos[i] + "\|";
866	}
867
868	// process each docID
869	for(int i = 0; i < docIDs.length; i++) {
870	// work out the document's fedora PID and section ID
871	String sectionID = getSectionIDFromDocID(docIDs[i]);
872	String docPID = getDocPIDFromDocID(docIDs[i]);
873	if(sectionID.equals("")) {
874	sectionID = "1";
875	}
876
877	// get the required section, along with children or descendants
878	Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
879	Document doc = docNodeList.getOwnerDocument();
880
881	// copy-and-convert that structure into a structure format for GS3
882	Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
883
884	if(!info.equals("")) {
885	// <nodeStructureInfo>
886	// <info name="" value="" />
887	// <info name="" value="" />
888	// ...
889	// </nodeStructureInfo>
890	Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+GSXML.INFO_ATT);
891	Element root = srcDocElement.getOwnerDocument().getDocumentElement();
892
893	if(root.hasAttribute("numSiblings")) {
894	String numSiblings = root.getAttribute("numSiblings");
895	Element infoEl = doc.createElement(GSXML.INFO_ATT);
896	infoEl.setAttribute(GSXML.NAME_ATT, "numSiblings");
897	infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
898	nodeStructureInfo.appendChild(infoEl);
899	}
900
901	if(root.hasAttribute("siblingPosition")) {
902	String siblingPosition = root.getAttribute("siblingPosition");
903	Element infoEl = doc.createElement(GSXML.INFO_ATT);
904	infoEl.setAttribute(GSXML.NAME_ATT, "siblingPosition");
905	infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
906	nodeStructureInfo.appendChild(infoEl);
907	}
908
909	if(root.hasAttribute("numChildren")) {
910	String numChildren = root.getAttribute("numChildren");
911	Element infoEl = doc.createElement(GSXML.INFO_ATT);
912	infoEl.setAttribute(GSXML.NAME_ATT, "numChildren");
913	infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
914	nodeStructureInfo.appendChild(infoEl);
915	}
916	docNode.appendChild(nodeStructureInfo);
917	}
918
919	// add it to our list of documentNodes
920	docNodeList.appendChild(docNode);
921	}
922	}
923
924
925	/**
926	* Takes the portion of the XML document outlining the structure of the
927	* document (section)--in the format this is stored in Fedora--and returns
928	* Greenstone 3 DOM XML format for outlining document structure.
929	* @return a <documentNode> element that contains a greenstone3
930	* DocumentStructureRetrieve XML corresponding to the parameter Element section
931	* (which is in fedora XML), for the document indicated by docID.
932	* @param requestingDocID is the identifier of the document for which the
933	* structure was requested. It's this document's children or descendants that
934	* will be returned. Note that this is not always the same as (clear from)
935	* parameter docID.
936	* @param docID is the documentID for which the section's structure is
937	* returned where docID = "docPID-sectionNumber".
938	* @param section - the fedora section XML that is being mirrored in
939	* greenstone3 format.
940	*/
941	protected Element getStructure(Document doc, String requestingDocID,
942	String docID, Element section)
943	{
944	// we want to mirror the section's DOM (given in fedora XML) in
945	// greenstone3's XML for a DocumentStructureRetrieve response.
946
947	// <documentNode nodeID="docID"> - the docNode on which a structure retrieve
948	// is being performed
949	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
950	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
951	attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
952	docNode.setAttributeNode(attribute);
953
954	// <nodeStructure>
955	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
956
957	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
958	Element rootNode = createDocNodeFromSubsection(doc, section, docID);
959
960	// fills in the subtree of the rootNode in our nodeStructure element
961	createDocStructure(doc, section, rootNode, docID);
962	//where section represents the root section
963
964	nodeStructure.appendChild(rootNode);
965	docNode.appendChild(nodeStructure);
966	return docNode;
967	}
968
969
970	/** Recursive method that creates a documentStructure mirroring parameter
971	* section, starting from parameter parent down to all descendants
972	* @param section is the XML <Section> in the fedora repository's TOC
973	* for the docPID whose substructure is to be mirrored
974	* @param parent is the XML documentNode in the greenstone repository whose
975	* descendants created by this method will correspond to the descendants of
976	* parameter section.
977	* @param doc is the document containing the parent;
978	* @param docPID is the prefix of all nodeIDs in the parent's structure
979	*/
980	protected void createDocStructure(
981	Document doc, Element section, Element parent, String docPID)
982	{
983	// get the section's children (if any)
984	NodeList children = section.getChildNodes();
985	for(int i = 0; i < children.getLength(); i++) {
986	Node n = children.item(i);
987
988	if(n.getNodeName().equals(SECTION_ELEMENT)) {
989	//then we know it's an element AND that its tagname is "Section"
990	Element subsection = (Element)n;
991	Element child = createDocNodeFromSubsection(doc, subsection, docPID);
992	parent.appendChild(child);
993
994	// recursion call on newly found child-element and subsection
995	createDocStructure(doc, subsection, child, docPID);
996	}
997	}
998	}
999
1000	/** Given a particular subsection element, this method creates a
1001	* Greenstone3 DocumentNode element that mirrors it.
1002	* @param doc is the document that will contain the created DocumentNode
1003	* @param docID is the prefix of all nodeIDs in the parent's structure
1004	* @param subSection is the XML <Section> in the fedora repository's
1005	* TOC for the docPID which will be mirrored in the greenstone XML
1006	* documentNode that will be returned.
1007	* @return a greenstone <documentNode> that represents the fedora TOC's
1008	* <Section> element passed as parameter subSection. */
1009	protected Element createDocNodeFromSubsection(
1010	Document doc, Element subSection, String docID)
1011	{
1012	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1013	Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1014	docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1015	docNode.setAttributeNode(docType);
1016
1017	Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1018	String sectionID = subSection.hasAttribute(ID) ?
1019	subSection.getAttribute(ID) : "";
1020	if(sectionID.equals("1")
1021	&& subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1022	// reset the attribute without the section number (just "docID" may be important for democlient?)
1023	nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1024	} else {
1025	nodeID.setValue(docID + HYPHEN + sectionID);
1026	}
1027	//nodeID.setValue(docID + HYPHEN + sectionID);
1028	docNode.setAttributeNode(nodeID);
1029
1030	Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1031	if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1032	nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1033	}
1034	docNode.setAttributeNode(nodeType);
1035	return docNode;
1036	}
1037
1038
1039	/** Given an identifier that is either a docPID or a concatenation of
1040	* docPID+sectionID, this method works out the fedora assigned docPID and
1041	* sectionID and then calls getContentBody(docPID, sectionID) with those.
1042	* @param docID is expected to be of the form
1043	* "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
1044	* "greenstone:<collectionName>-<docPID>"
1045	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1046	* "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
1047	* is returned! */
1048	public String getContent(String docID) {
1049	return this.getContent(new String[]{docID});
1050	}
1051
1052	/** Given an identifier that is a concatenation of docID+sectionID, this
1053	* method works out the fedora assigned docPID and sectionID and then calls
1054	* getContentBody(docPID, sectionID) with those.
1055	* @param docIDs is an array of document identifiers of the form
1056	* "greenstone:<collectionName>-<docPID>-<sectionNumber>"
1057	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1058	* "greenstone:<collectionName>-Section1" is returned! */
1059	public String getContent(String[] docIDs) {
1060	Document doc = builder.newDocument();
1061	FedoraGS3RunException ex = null;
1062
1063	//<documentNodeList>
1064	Element docNodeList = doc.createElement(
1065	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1066
1067	try{
1068	for(int i = 0; i < docIDs.length; i++) {
1069	// get the sectionID and docPID from the docID
1070	String sectionID = this.removePrefix(
1071	getSectionIDFromDocID(docIDs[i]), SECTION);
1072	String docPID = getDocPIDFromDocID(docIDs[i]);
1073	if(sectionID.equals("")) // if no section is specified, get
1074	sectionID = "1"; // get the content for Section id="1"
1075
1076	// Get the contents for the requested section of document docPID
1077	String sectionContent = this.getContentBody(docPID, sectionID);
1078
1079	// set the nodeID attribute
1080	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1081	Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1082
1083	nodeId.setValue(docIDs[i]); // just set the docID which will contain
1084	// the docPID (and sectionID if already present)
1085
1086	docNode.setAttributeNode(nodeId);
1087	// set the text content to what was retrieved
1088	Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1089	Text textNode = doc.createTextNode(sectionContent.trim());
1090
1091	nodeContent.appendChild(textNode);
1092	docNode.appendChild(nodeContent);
1093	//add the documentNode to the docNodeList
1094	docNodeList.appendChild(docNode);
1095	}
1096	} catch(Exception e) {
1097	ex = new FedoraGS3RunException(e);
1098	ex.setSpecifics("requested doc Section datastream");
1099	}
1100	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1101	GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1102	try{
1103	return FedoraCommons.elementToString(responseMsg);
1104	} catch(TransformerException e) {
1105	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1106	+ " " + e;
1107	}
1108	}
1109
1110	/** Gets the contents of a textNode from a section.
1111	* @return the text content of a section.
1112	* @param docPID the pid of the document from which a section's text is to
1113	* be retrieved.
1114	* @param sectionID is the section identifier of the document denoted by
1115	* docPID whose text is to be returned.
1116	*/
1117	protected String getContentBody(String docPID, String sectionID)
1118	throws RemoteException, UnsupportedEncodingException,
1119	SAXException, IOException
1120	{
1121	String section = this.getSection(docPID, sectionID);
1122
1123	// the content is nested inside a <Section> element,
1124	// we extract it from there:
1125	InputSource source = new InputSource(new StringReader(section));
1126	Document doc = builder.parse(source);
1127
1128	// The document Element is the <Section> we want.
1129	// Get its text contents:
1130	section = FedoraCommons.getValue(doc.getDocumentElement());
1131
1132	// we are going to remove all occurrences of "_httpdocimg_/"
1133	// that precede associated filenames, because that's a GS3
1134	// defined macro for resolving relative urls. It won't help
1135	// with documents stored in fedora.
1136	section = section.replaceAll(GS3FilePathMacro+"/", "");
1137	return section;
1138	}
1139
1140	/** Here we create the greenstone's response message element:
1141	* <message&lg;<response><content></response></message>
1142	* @return a greenstone response-message element.
1143	* @param doc - the Document object which should me used to create the
1144	* <message> and <response> elements
1145	* @param content - the element that is to be nested inside <response>
1146	* @param ex - any exception that occurred when trying to create
1147	* the content parameter
1148	* @param responseType - the value for the type attribute of <response>,
1149	* such as "describe", "retrieve", "browse", "query"...
1150	* @param originator - indiates the collectionName or service (like
1151	* DocumentContentRetrieve) from where this response message originates
1152	*/
1153	protected Element createResponseMessage(Document doc, Element content,
1154	Exception ex, String responseType, String originator)
1155	{
1156	Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1157	// from = "FedoraGS3"
1158	Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1159	attribute.setValue(originator);
1160	response.setAttributeNode(attribute);
1161
1162	// type = "describe" or "process" - whatever's given in requestType:
1163	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1164	attribute.setValue(responseType);
1165	response.setAttributeNode(attribute);
1166
1167	if(content != null)
1168	response.appendChild(content);
1169
1170	// we'll create an error element for RemoteExceptions (web service problems)
1171	// and UnsupportedEncodingExceptions and
1172	if(ex != null) {
1173	Element error = doc.createElement(GSXML.ERROR_ELEM);
1174	error.appendChild(doc.createTextNode(ex.getMessage()));
1175	// now append the error to the <response> element (after
1176	// the content element whatever that was)
1177	response.appendChild(error);
1178	}
1179
1180	Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1181	message.appendChild(response);
1182	doc.appendChild(message);
1183	return message;
1184	}
1185
1186	/** @return a <serviceList> Element as defined by GS3: containing all the
1187	* services (denoted by <service> elements) that are supported by FedoraGS3.
1188	* At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1189	* DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1190	* ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1191	* @param doc - the Document object which should me used to create the
1192	* <serviceList> element */
1193	protected Element createServiceList(Document doc)
1194	{
1195	Element serviceList = doc.createElement(
1196	GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1197
1198	for(int i = 0; i < serviceNames.length; i++) {
1199	// create the <service name="serviceName[i]" type="servicetype" />
1200	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1201
1202	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1203	attribute.setValue(serviceNames[i]);
1204	service.setAttributeNode(attribute);
1205
1206	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1207	if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1208	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1209	else if(serviceNames[i].contains("Query")) // search services
1210	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1211	else
1212	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1213	service.setAttributeNode(attribute);
1214
1215	// add the service element to the serviceList element
1216	// <serviceList><service /></serviceList>
1217	serviceList.appendChild(service);
1218	}
1219	return serviceList;
1220	}
1221
1222	/** @return a GS3 response message for a describe services request:
1223	* indicating the list of services supported by the Fedora-Greenstone
1224	* interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1225	* DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1226	* ClassifierBrowseMetadataRetrieve - as indicated by member variable
1227	* serviceNames. */
1228	public String getServiceList()
1229	{
1230	Document doc = builder.newDocument();
1231	Element serviceList = createServiceList(doc);
1232	// make <serviceList> the body of the responseMessage:
1233	// <message><response><serviceList></response></message>
1234	Element responseMsg = createResponseMessage(doc, serviceList, null,
1235	GSXML.REQUEST_TYPE_DESCRIBE, "");
1236	try {
1237	return FedoraCommons.elementToString(responseMsg);
1238	}catch(TransformerException e) {
1239	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1240	+ " " + e;
1241	}
1242	}
1243
1244	/** @return a GS3 describe response message listing the collections and
1245	* collection-specific metadata stored in the Fedora-Greenstone repository. */
1246	public String getCollectionList()
1247	{
1248	Document doc = builder.newDocument();
1249	FedoraGS3RunException ex = null; // any RemoteException
1250
1251	// create the <collectionList /> element
1252	Element collectionList = doc.createElement(
1253	GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1254	try{
1255	String[] collectionNames = this.getCollectionNames(
1256	this.getCollections()); // this line could throw RemoteException
1257	for(int i = 0; i < collectionNames.length; i++) {
1258	// create the <collection name="somename" /> element
1259	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1260	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1261	attribute.setValue(collectionNames[i]);
1262	collection.setAttributeNode(attribute);
1263
1264	// append the <collection> element as child of <collectionList>
1265	collectionList.appendChild(collection);
1266
1267	//if(collection.hasAttribute(GSXML.NAME_ATT))
1268	//LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1269	}
1270	} catch(RemoteException e) { // if this happens, perhaps it's because it
1271	// can't find Greenstone collections in fedora repository?
1272	ex = new FedoraGS3RunException(e);
1273	ex.setSpecifics(
1274	"greenstone collections in fedora repository");
1275	}
1276
1277	// make <collectionList> the body of the responseMessage:
1278	// <message><response><collectionList></response></message>
1279	Element responseMsg = createResponseMessage(doc, collectionList, ex,
1280	GSXML.REQUEST_TYPE_DESCRIBE, "");
1281	try{
1282	return FedoraCommons.elementToString(responseMsg);
1283	}catch(TransformerException e) {
1284	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1285	+ " " + e;
1286	}
1287	}
1288
1289	/** @return a GS3 describe response message for a collection in the
1290	* Fedora-Greenstone repository.
1291	* @param collectionName - the name of the collection that is to be described.
1292	* It will be converted to a fedora collection pid, which is of the form
1293	* "greenstone:<collectionName>-collection". */
1294	public String describeCollection(String collectionName)
1295	{
1296	Document doc = builder.newDocument();
1297	FedoraGS3RunException ex = null;
1298
1299	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1300	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1301	attribute.setValue(collectionName);
1302	collection.setAttributeNode(attribute);
1303
1304	//<displayItem assigned="true" lang="en" name="name">
1305	//"some display name"</displayItem>
1306	Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1307
1308	attribute = doc.createAttribute(GSXML.LANG_ATT);
1309	attribute.setValue(this.lang);
1310	displayItem.setAttributeNode(attribute);
1311
1312	attribute = doc.createAttribute(GSXML.NAME_ATT);
1313	attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1314	displayItem.setAttributeNode(attribute);
1315
1316	try{
1317	Text textNode = doc.createTextNode(
1318	this.getCollectionTitle(getCollectionPID(collectionName)));
1319	displayItem.appendChild(textNode);
1320	} catch(Exception e) {
1321	// can't find Greenstone collections in fedora repository or problem
1322	// getting their titles from their metadata datastream?
1323	ex = new FedoraGS3RunException(e);
1324	ex.setSpecifics("greenstone collections or their metadata"
1325	+ "in the fedora repository");
1326	}
1327	// now append the displayItem element as child of the collection element
1328	collection.appendChild(displayItem);
1329	// get the <serviceList> and add it into the collection description.
1330	// Services for all collections in the FedoraGS3 repository are the
1331	// same, offering a ClassifierBrowse to browse titles by starting letter
1332	// and DocRetrieve services: Content, Metadata and Structure.
1333
1334	Element serviceList = createServiceList(doc);
1335	collection.appendChild(serviceList);
1336
1337	Element responseMsg = createResponseMessage(doc, collection, ex,
1338	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1339	try{
1340	return FedoraCommons.elementToString(responseMsg);
1341	}catch(TransformerException e) {
1342	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1343	+ " " + e;
1344	}
1345	}
1346
1347	/** @return a GS3 describe response message for the services of a collection
1348	* in the Fedora-Greenstone repository. So far, these services are the same for
1349	* all fedora collections: they are the services given in member variable
1350	* serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1351	* ClassifierBrowseMetadataRetrieve.
1352	* All collections in this Digital Library (Fedora Repository) share the
1353	* same services, so this method returns the same services as getServiceList();
1354	* @param collectionName - the name of the collection whose services are to
1355	* be described. It will be converted to a fedora collection pid, which is of
1356	* the form "greenstone:<collectionName>-collection". */
1357	public String describeCollectionServices(String collectionName)
1358	{
1359	Document doc = builder.newDocument();
1360
1361	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1362	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1363	attribute.setValue(collectionName);
1364	collection.setAttributeNode(attribute);
1365
1366	Element serviceList = createServiceList(doc);
1367	collection.appendChild(serviceList);
1368
1369	Element responseMsg = createResponseMessage(doc, collection, null,
1370	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1371	try{
1372	return FedoraCommons.elementToString(responseMsg);
1373	}catch(TransformerException e) {
1374	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1375	+ " " + e;
1376	}
1377	}
1378
1379	/** All collections in this Digital Library (Fedora Repository) share
1380	* the same services, so this method returns the same as
1381	* describeCollectionService(collName, serviceName).
1382	* @return a GS3 describe response message for the requested service
1383	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1384	* return nothing special except their names; browse (and any query)
1385	* return more complex XML responses.
1386	* @param serviceName - the name of the service in the collection which is to
1387	* be described.*/
1388	public String describeService(String serviceName)
1389	{
1390	// For all the retrieve services (incl ClassifierBrowseMetadataRetrieve)
1391	// we return:
1392	// <message><response from="<name>Retrieve" type="describe">
1393	// <service name="<name>Retrieve" type="retrieve" /></response></message>
1394	// But for browse (and any query) service, we return the data necessary
1395	// for displaying it
1396
1397	Document doc = this.builder.newDocument();
1398	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1399	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1400	attribute.setValue(serviceName);
1401	service.setAttributeNode(attribute);
1402
1403	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1404
1405	if(serviceName.toLowerCase().endsWith("retrieve")) {
1406	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1407	}
1408	else if(serviceName.toLowerCase().contains("browse")) {
1409	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1410
1411	// we need name and description <displayItem> elements
1412	Element displayItem
1413	= createNameValuePairElement(doc,
1414	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1415	service.appendChild(displayItem);
1416
1417	displayItem = createNameValuePairElement(doc,
1418	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1419	"Browse pre-defined classification hierarchies");
1420	service.appendChild(displayItem);
1421
1422	// now need a classifierList
1423	Element classifierList = doc.createElement(
1424	GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1425
1426	int classifierNum = 1;
1427	// append a <classifier content="some letter" name="CL#">
1428	// for each letter of the alphabet:
1429	Element classifier = createClassifierElement(doc, "TitleByLetter",
1430	classifierNum++, "titles by letter", "Browse titles by letter");
1431	// now add this <classifier> to the <classifierList>
1432	classifierList.appendChild(classifier);
1433
1434	// ANY MORE CLASSIFIERS? ADD THEM HERE
1435
1436	service.appendChild(classifierList);
1437	} // ELSE check for whether it is a query service
1438	else if(serviceName.toLowerCase().contains("query")) {
1439	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1440	if(serviceName.equals("TextQuery")) {
1441	describeTextQueryService(service);
1442	} else if(serviceName.equals("FieldQuery")) {
1443	describeFieldQueryService(service);
1444	}
1445	}
1446
1447	// don't forget to add the type attribute to the service!
1448	service.setAttributeNode(attribute);
1449
1450	String from = serviceName;
1451
1452	Element responseMsg = createResponseMessage(doc, service, null,
1453	GSXML.REQUEST_TYPE_DESCRIBE, from);
1454	try{
1455	return FedoraCommons.elementToString(responseMsg);
1456	}catch(TransformerException e) {
1457	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1458	+ " " + e;
1459	}
1460	}
1461
1462	/** Appends children to the parameter service Element that make the
1463	* final service Element into a describe response XML for FedoraGS3's
1464	* TextQuery service.
1465	* @param service is the service Element that is being filled out. */
1466	protected void describeTextQueryService(Element service) {
1467	Document doc = service.getOwnerDocument();
1468	// we need name, submit (button) and description <displayItem> elements
1469	Element displayItem = createNameValuePairElement(doc,
1470	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1471	"Text Search");
1472	service.appendChild(displayItem);
1473
1474	displayItem = createNameValuePairElement(doc,
1475	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1476	service.appendChild(displayItem);
1477
1478	displayItem = createNameValuePairElement(doc,
1479	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1480	"Title and full-text search service");
1481	service.appendChild(displayItem);
1482
1483	//create the <paramList>
1484	Element paramList = doc.createElement(
1485	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1486
1487	// we ignore granularity to search at: it will always be
1488	// document and section level
1489	// we ignore casefolding: always on (that is, case is irrelevant)
1490	// we ignore document display order: always ranked
1491
1492	// Constructing the following:
1493	// <param default="100" name="maxDocs" type="integer">
1494	// <displayItem name="name">Maximum hits to return</displayItem>
1495	// </param>
1496	Element param = doc.createElement(GSXML.PARAM_ELEM);
1497
1498	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1499	attribute.setValue(MAXDOCS);
1500	param.setAttributeNode(attribute);
1501
1502	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1503	attribute.setValue("100");
1504	param.setAttributeNode(attribute);
1505
1506	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1507	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1508	param.setAttributeNode(attribute);
1509
1510	displayItem = createNameValuePairElement(doc,
1511	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1512	"Maximum hits to return");
1513	param.appendChild(displayItem);
1514
1515	paramList.appendChild(param);
1516
1517	// Constructing the following:
1518	// <param name="query" type="string">
1519	// <displayItem name="name">Query string</displayItem>
1520	// </param>
1521	param = doc.createElement(GSXML.PARAM_ELEM);
1522
1523	attribute = doc.createAttribute(GSXML.NAME_ATT);
1524	attribute.setValue(QUERY);
1525	param.setAttributeNode(attribute);
1526
1527	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1528	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1529	param.setAttributeNode(attribute);
1530
1531	displayItem = createNameValuePairElement(doc,
1532	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1533	"Query string");
1534	param.appendChild(displayItem);
1535
1536	paramList.appendChild(param);
1537
1538	service.appendChild(paramList);
1539	}
1540
1541	/** Appends children to the parameter service Element that make the
1542	* final service Element into a describe response XML for FedoraGS3's
1543	* FieldQuery service.
1544	* @param service is the service Element that is being filled out. */
1545	protected void describeFieldQueryService(Element service) {
1546	Document doc = service.getOwnerDocument();
1547	// we need name, submit (button) and description <displayItem> elements
1548	Element displayItem = createNameValuePairElement(doc,
1549	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1550	"Form Search");
1551	service.appendChild(displayItem);
1552
1553	displayItem = createNameValuePairElement(doc,
1554	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1555	service.appendChild(displayItem);
1556
1557	displayItem = createNameValuePairElement(doc,
1558	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1559	"Simple fielded search");
1560	service.appendChild(displayItem);
1561
1562	//create the <paramList>
1563	Element paramList = doc.createElement(
1564	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1565
1566	// we ignore granularity to search at: it will always be
1567	// document and section level
1568	// we ignore casefolding: always on (that is, case is irrelevant)
1569	// we ignore document display order: always ranked
1570
1571	// Constructing the following:
1572	// <param default="100" name="maxDocs" type="integer">
1573	// <displayItem name="name">Maximum hits to return</displayItem>
1574	// </param>
1575	Element param = doc.createElement(GSXML.PARAM_ELEM);
1576
1577	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1578	attribute.setValue(MAXDOCS);
1579	param.setAttributeNode(attribute);
1580
1581	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1582	attribute.setValue("100");
1583	param.setAttributeNode(attribute);
1584
1585	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1586	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1587	param.setAttributeNode(attribute);
1588
1589	displayItem = createNameValuePairElement(doc,
1590	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1591	"Maximum hits to return");
1592	param.appendChild(displayItem);
1593
1594	paramList.appendChild(param);
1595
1596	// Constructing the following:
1597	// <param name="simpleField" occurs="4" type="multi">
1598	// <displayItem name="name"></displayItem>
1599	//
1600	// <param name="query" type="string">
1601	// <displayItem name="name">Word or phrase </displayItem>
1602	// </param>
1603	//
1604	// <param default="allFields" name="fieldname" type="enum_single">
1605	// <displayItem name="name">in field</displayItem>
1606	//
1607	// <option name="docTitles">
1608	// <displayItem name="name">document titles</displayItem>
1609	// </option>
1610	// <option name="allTitles">
1611	// <displayItem name="name">document and section titles</displayItem>
1612	// </option>
1613	// <option name="fullText">
1614	// <displayItem name="name">full text</displayItem>
1615	// </option>
1616	// <option name="all">
1617	// <displayItem name="name">titles and full text</displayItem>
1618	// </option>
1619	// <option name="">
1620	// <displayItem name="name"></displayItem>
1621	// </option>
1622	// </param>
1623	// </param>
1624	Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1625	attribute = doc.createAttribute(GSXML.NAME_ATT);
1626	attribute.setValue(SIMPLEFIELD_ATT);
1627	rowOfParams.setAttributeNode(attribute);
1628
1629	// we want the row of controls to occur multiple times
1630	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1631	attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1632	rowOfParams.setAttributeNode(attribute);
1633
1634	attribute = doc.createAttribute(OCCURS_ATT);
1635	attribute.setValue("4"); // we want this row to occur 4 times
1636	rowOfParams.setAttributeNode(attribute);
1637
1638	// <param name="query" type="string">
1639	// <displayItem name="name">Word or phrase </displayItem>
1640	// </param>
1641	param = doc.createElement(GSXML.PARAM_ELEM);
1642
1643	attribute = doc.createAttribute(GSXML.NAME_ATT);
1644	attribute.setValue(QUERY);
1645	param.setAttributeNode(attribute);
1646
1647	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1648	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1649	param.setAttributeNode(attribute);
1650
1651	displayItem = createNameValuePairElement(doc,
1652	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1653	"Word or phrase");
1654	param.appendChild(displayItem);
1655	rowOfParams.appendChild(param);
1656
1657	// <param default="allFields" name="fieldName" type="enum_single">
1658	// <displayItem name="name">in field</displayItem>
1659	param = doc.createElement(GSXML.PARAM_ELEM);
1660	attribute = doc.createAttribute(GSXML.NAME_ATT);
1661	attribute.setValue(FIELDNAME_ATT);
1662	param.setAttributeNode(attribute);
1663
1664	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1665	attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1666	param.setAttributeNode(attribute);
1667
1668	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1669	attribute.setValue(ALL_FIELDS);
1670	param.setAttributeNode(attribute);
1671
1672	displayItem = createNameValuePairElement(doc,
1673	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1674	"in field");
1675	param.appendChild(displayItem);
1676
1677	String[] searchFieldNames
1678	= {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1679	String[] searchFieldDisplay = {"all titles and full-text",
1680	"document titles only", "document and section titles",
1681	"full-text only"};
1682
1683	// for each fieldName create an option element and insert
1684	// the option into the enum_multi drop-down param:
1685	// <option name="fieldName">
1686	// <displayItem name="name">fieldName</displayItem>
1687	// </option>
1688	for(int i = 0; i < searchFieldNames.length; i++) {
1689	Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1690	attribute = doc.createAttribute(GSXML.NAME_ATT);
1691	attribute.setValue(searchFieldNames[i]);
1692	option.setAttributeNode(attribute);
1693
1694	displayItem = createNameValuePairElement(doc,
1695	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1696	searchFieldDisplay[i]);
1697	option.appendChild(displayItem);
1698	param.appendChild(option); // add option to the drop-down box
1699	}
1700
1701	rowOfParams.appendChild(param);
1702	paramList.appendChild(rowOfParams);
1703	service.appendChild(paramList);
1704	}
1705
1706	/**
1707	* @return a GS3 describe response message for the requested service
1708	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1709	* return nothing special except their names; browse (and any query)
1710	* return more complex XML responses.
1711	* All collections in this Digital Library (Fedora Repository) share
1712	* the same services, so this method returns the same as
1713	* describeService(serviceName).
1714	* @param collectionName - the name of the collection whose service is to
1715	* be described. It will be converted to a fedora collection pid, which is of
1716	* the form "greenstone:<collectionName>-collection".
1717	* @param serviceName - the name of the service in the collection which is to
1718	* be described. */
1719	public String describeCollectionService(String collectionName,
1720	String serviceName) {
1721	// collectionName can be ignored, because all services are FedoraGS3
1722	// services and are not unique to any particular (greenstone) collection.
1723	return describeService(serviceName);
1724	}
1725
1726	/** This method performs the implemented browse operation: allowing the
1727	* user to browse the titles of documents in the given collection by letter
1728	* and returning the results.
1729	* @param collectionName is the name of the collection whose documents
1730	* starting with the given letter will be returned.
1731	* @param classifierIDs are the ids of the classifiers on which to browse. In
1732	* this case, the classifier indicates whether we browse titles by letter, or
1733	* browse (documents) by collection; and it is of the form <CL(letter)>.
1734	* @param structures - the requested browse substructure. Can be any combination
1735	* of ancestors, parent, siblings, children, descendants.
1736	* @param infos - the requested structural info. Can be numSiblings,
1737	* siblingPosition, numChildren.
1738	* @return a GS3 ClassifierBrowse response message which lists all
1739	* the documents that start with the letter indicated by parameter classifier.
1740	*/
1741	public String browse(String collectionName, String[] classifierIDs,
1742	String[] structures, String[] infos)
1743	{
1744	// Construct one string from the structures and structural info arrays
1745	String structure = "";
1746	String info = "";
1747	for(int i = 0; i < structures.length; i++) {
1748	structure = structure + structures[i] + "\|";
1749	}
1750	for(int i = 0; i < infos.length; i++) {
1751	info = info + infos[i] + "\|";
1752	}
1753
1754	Document doc = builder.newDocument();
1755	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1756
1757	// <classifierNodeList>
1758	Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1759
1760	for(int i = 0; i < classifierIDs.length; i++) {
1761	if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1762	browseTitlesByLetterClassifier(doc, classifierNodeList,
1763	collectionName, classifierIDs[i],
1764	structure, info);
1765	}
1766	}
1767
1768	Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1769	GSXML.REQUEST_TYPE_DESCRIBE, /collectionName+/ /"ClassifierBrowse");
1770	try {
1771	return FedoraCommons.elementToString(responseMsg);
1772	} catch(TransformerException e) {
1773	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1774	+ " " + e;
1775	}
1776	}
1777
1778	/** CL1 browsing classifier: browsing titles by starting letter.
1779	* The browsing structure is retrieved.
1780	* @param doc - the document object that will contain the CL1 browsing structure.
1781	* @param classifierNodeList - the classifiers will be added to this nodeList.
1782	* @param collectionName - name of the collection through which we are browsing CL1.
1783	* @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1784	* a letter.
1785	* @param structure - the requested browse substructure. Can be any combination of
1786	* ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1787	* @param info - the requested structural info. Can be numSiblings, siblingPosition,
1788	* numChildren.
1789	* @return the classifierNodeList with the CL1 classifier browse structure.
1790	*/
1791	public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1792	String collectionName, String classifierID,
1793	String structure, String info)
1794	{
1795	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1796
1797	if(structure.indexOf("entire") != -1) {
1798	structure = structure + "ancestors\|descendants";
1799	}
1800
1801	// Structure of ancestors and children only at this stage
1802	int firstLevel = classifierID.indexOf('.');
1803	int secondLevel = classifierID.lastIndexOf('.');
1804
1805	// <nodeStructure>
1806	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1807
1808	// requested classifier node
1809	Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1810	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1811	attribute.setValue(classifierID);
1812	classNode.setAttributeNode(attribute);
1813
1814	if(firstLevel == -1) { // CL1 - toplevel node
1815	Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1816
1817	classifierNodeList.appendChild(classNode);
1818	classNode.appendChild(nodeStructure);
1819
1820	nodeStructure.appendChild(root);
1821	if(structure.indexOf("descendants") != -1) {
1822	getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1823	} else if(structure.indexOf("children") != -1) {
1824	getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1825	}
1826	// nothing to be done for siblings
1827	}
1828	else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1829
1830	if(structure.indexOf("parent") != -1
1831	\|\| structure.indexOf("ancestors") != -1
1832	\|\| structure.indexOf("siblings") != -1) {
1833	String toplevelID = classifierID.substring(0, firstLevel);
1834	Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1835	attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1836	attribute.setValue(toplevelID);
1837	toplevelNode.setAttributeNode(attribute);
1838	Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1839
1840	classifierNodeList.appendChild(toplevelNode);
1841	toplevelNode.appendChild(nodeStructure);
1842	nodeStructure.appendChild(node);
1843
1844	if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1845	getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1846	// pass the requested node (classNode) so that it is attached in the correct
1847	// location among its siblings, and to ensure that it is not recreated.
1848	// getTitlesByLetterStructure() will append classNode to node
1849	} else {
1850	node.appendChild(classNode);
1851	}
1852	} else {
1853	Element node = (Element)classNode.cloneNode(true);
1854	classifierNodeList.appendChild(node);
1855	node.appendChild(nodeStructure);
1856	nodeStructure.appendChild(classNode);
1857	}
1858
1859	int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1860	char ch = (char)(num - 1 + 'A');
1861	if(structure.indexOf("descendants") != -1) {
1862	getTitlesForLetter(ch, collectionName, classNode, "descendants");
1863	} else if(structure.indexOf("children") != -1) {
1864	getTitlesForLetter(ch, collectionName, classNode, "children");
1865	}
1866	}
1867	else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1868	LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1869	}
1870
1871	return classifierNodeList;
1872	}
1873
1874	/** Creates a (CL1) subclassifier element for the docs whose titles start with
1875	* the given letter.
1876	* @param ch - the starting letter of the document titles to retrieve.
1877	* @param collectionName - name of the collection through which we are browsing CL1.
1878	* @param classifierNode - the docNodes found will be appended to this node.
1879	* @param depthStructure - can be descendants or children. Specifies what to retrieve:
1880	* gets descendants of any documents found, otherwise gets just the children.
1881	* @return the given classifierNode which will have the child (or descendant) documents
1882	* appended to it.
1883	*/
1884	public Element getTitlesForLetter(char ch, String collectionName,
1885	Element classifierNode, String depthStructure)
1886	{
1887	Document doc = classifierNode.getOwnerDocument();
1888	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1889
1890
1891	// Retrieve the document structure for each subClassifierID:
1892	// all the documents that begin with its letter.
1893	String letter = String.valueOf(ch);
1894	try {
1895	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1896	if(docPIDs.length == 0) {
1897	return classifierNode; // skip letters that don't have any kids
1898	}
1899
1900	for(int i = 0; i < docPIDs.length; i++) {
1901	// work out the document's fedora PID and section ID
1902	String sectionID = getSectionIDFromDocID(docPIDs[i]);
1903	String docPID = getDocPIDFromDocID(docPIDs[i]);
1904
1905	// get the required section, along with children or descendants
1906	Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1907
1908	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1909	Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1910
1911	// fills in the subtree of the rootNode in our nodeStructure element
1912	createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1913	classifierNode.appendChild(docRootNode);
1914	}
1915	} catch(Exception e) {
1916	ex = new FedoraGS3RunException(e);
1917	ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1918	}
1919
1920	return classifierNode;
1921	}
1922
1923
1924	/** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1925	* starting letter of the alphabet. X is each letter of the alphabet for which there
1926	* are matching document titles.
1927	* @param collectionName - name of the collection through which we are browsing CL1.
1928	* @param classifierNode - the docNodes found will be appended to this node.
1929	* @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1930	* the IDs for the subclassifiers (CL.x).
1931	* @param getDescendants - if true, get descendants of any documents found, otherwise
1932	* get just the children.
1933	* @param wantedSibling - the node (already created) whose siblings are requested. We
1934	* need to make sure not to recreate this node when creating its sibling nodes.
1935	* @return the given classifierNode, with the CL.x subclassifiers for the letters of
1936	* the alphabet that are represented in the document titles.
1937	*/
1938	public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1939	String classifierID, boolean getDescendants,
1940	Element wantedSibling)
1941	{
1942	String ID = "";
1943	if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1944	ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1945	}
1946
1947	Document doc = classifierNode.getOwnerDocument();
1948	FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1949
1950	// We're going to loop to the end of the alphabet
1951	int count = 1;
1952	for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1953	// Retrieve the document structure for each subClassifierID:
1954	// all the documents that begin with its letter.
1955	String letter = String.valueOf(ch);
1956	try {
1957	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1958	if(docPIDs.length == 0) {
1959	continue; // skip letters that don't have any kids
1960	}
1961	Element subClassifier = null;
1962	if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1963	// already have the requested node, don't recreate it
1964	subClassifier = wantedSibling;
1965	} else {
1966	// <classifierNode nodeID="CL1.x">
1967	subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
1968	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1969	attribute.setValue(classifierID+"."+count);
1970	subClassifier.setAttributeNode(attribute);
1971	}
1972	classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
1973
1974	if(getDescendants) { // get the documents
1975
1976	// append the <docNodes> for the docPIDs found as children
1977	// of subclassifier
1978
1979	for(int i = 0; i < docPIDs.length; i++) {
1980	// work out the document's fedora PID and section ID
1981	String sectionID = getSectionIDFromDocID(docPIDs[i]);
1982	String docPID = getDocPIDFromDocID(docPIDs[i]);
1983
1984	// get the required section, along with children or descendants
1985	Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
1986
1987	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1988	Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
1989
1990	// fills in the subtree of the rootNode in our nodeStructure element
1991	createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
1992	subClassifier.appendChild(rootNode);
1993	}
1994	}
1995	} catch(Exception e) {
1996	ex = new FedoraGS3RunException(e);
1997	ex.setSpecifics("requested portion of TOC file or "
1998	+ "trouble with fielded search ");
1999	}
2000	}
2001	return classifierNode;
2002	}
2003
2004
2005	/** This method performs something equivalent to a greenstone3
2006	* ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2007	* @param classNodeIDs array of classifierNode IDs for which the metadata
2008	* needs to be returned.
2009	* @param metafields are the classifier metadata fields that are to be returned.
2010	* At present this method ignores them/pretends the requested metafields are
2011	* "all" and always returns the Title meta for the requested classifier nodes
2012	* (because that is all the metadata this Fedora classifier has at present).
2013	* @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2014	* lists the metadata for all the classifierNodes passed as parameter.*/
2015	public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2016	{
2017	Document doc = this.builder.newDocument();
2018	// <classifierNodeList>
2019	Element classifierNodeList = doc.createElement(
2020	GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2021
2022	// create <classifierNode><metadataList><metadata>s
2023	// </metadataList></classifierNode> for all letters of the alphabet
2024	for(int i = 0; i < classNodeIDs.length; i++) {
2025	// strip ID of everything before the first '.' (i.e. remove "CL#.")
2026	int index = classNodeIDs[i].indexOf('.');
2027	String subClassifierNumber = classNodeIDs[i].substring(index+1);
2028	index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2029	if(index != -1) {
2030	subClassifierNumber = subClassifierNumber.substring(0, index);
2031	}
2032	int subClassifierNum = Integer.parseInt(subClassifierNumber);
2033	String classifierName = "";
2034	if(subClassifierNum == 0) { // no document titles started with a letter
2035	classifierName = "A-Z";
2036	} else {
2037	char letter = (char)('A' + subClassifierNum - 1); // A = 1
2038	classifierName = String.valueOf(letter);
2039	}
2040
2041	// <classifierNode nodeID="CL#.subNum">
2042	Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2043	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2044	attribute.setValue(classNodeIDs[i]);
2045	classifierNode.setAttributeNode(attribute);
2046
2047	// <metadataList>
2048	Element metadataList = doc.createElement(
2049	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2050
2051	// at least one metadata element: that of the title of this
2052	// classifierNode:
2053	// <metadata name="Title">letter</metadata>
2054	Element metadata = this.createNameValuePairElement(doc,
2055	GSXML.METADATA_ELEM, "Title", classifierName);
2056
2057	// now connect up everything
2058	metadataList.appendChild(metadata);
2059	classifierNode.appendChild(metadataList);
2060	classifierNodeList.appendChild(classifierNode);
2061	}
2062
2063	Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2064	GSXML.REQUEST_TYPE_PROCESS, //collName +
2065	"ClassifierBrowseMetadataRetrieve");
2066	try{
2067	return FedoraCommons.elementToString(responseMsg);
2068	}catch(TransformerException e) {
2069	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2070	+ " " + e;
2071	}
2072	}
2073
2074	/** @return a newly created element of the following format:
2075	* <classifier content="somecontent" name="CL+num">
2076	* <displayItem name="name">someClassifierName</displayItem>
2077	* <displayItem name="description">Browse by classifier name</displayItem>
2078	* </classifier>
2079	* @param doc - the document used to create the element
2080	* @param content - value of the content attribute
2081	* @param classifierNum - the number suffixed to the CL, together forming
2082	* the classifier Node's ID
2083	* @param displayNameVal is the bodytext of a named displayItem element
2084	* @param displayDescrVal is the bodytext of a displayItem element with
2085	* description */
2086	protected Element createClassifierElement(Document doc, String content,
2087	int classifierNum, String displayNameVal, String displayDescrVal)
2088	{
2089	final String CL = "CL";
2090	Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2091	// content attribute
2092	Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2093	att.setValue(content);
2094	classifier.setAttributeNode(att);
2095	// name attribute
2096	att = doc.createAttribute(GSXML.NAME_ATT);
2097	att.setValue(CL + classifierNum);
2098	classifier.setAttributeNode(att);
2099
2100	// now create the displayItem children for classifier:
2101	// <displayItem name="name">#letter</displayItem>
2102	// <displayItem name="description">Browse titles starting with #letter</displayItem>
2103	Element displayItem = createNameValuePairElement(doc,
2104	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2105	classifier.appendChild(displayItem);
2106	displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2107	GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2108	classifier.appendChild(displayItem);
2109
2110	return classifier;
2111	}
2112
2113
2114	/** @return a newly created element of the following format:
2115	* <elementName name="somename">"some display value"</elementName>
2116	* @param doc - the document used to create the element
2117	* @param elementName - the tag name
2118	* @param name - value of attribute name
2119	* @param value - the body text of the element */
2120	protected Element createNameValuePairElement(Document doc, String elementName,
2121	String name, String value) {
2122	// <elementName name="somename">"some display value"</elementName>
2123	Element element = doc.createElement(elementName);
2124	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2125	attribute.setValue(name);
2126	element.setAttributeNode(attribute);
2127
2128	element.appendChild(doc.createTextNode(value));
2129	return element;
2130	}
2131
2132	/**
2133	* @param collection is the collection to search in
2134	* @param query is the query term to search for. It won't specify the
2135	* indexed field to search in, which will mean that GSearch will
2136	* search all default indexed fields.
2137	* @param maxDocs is the maximum number of results to return (which
2138	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2139	*/
2140	public String[] textQuery(String collection, String query,
2141	int maxDocs)
2142	throws Exception
2143	{
2144	// no need to search there is no query or query is empty spaces
2145	if(query.trim().equals(""))
2146	return new String[]{};
2147
2148	// QUERY value won't specify indexed field to search, Fedora
2149	// Gsearch will take that as meaning all default indexed fields.
2150	// Params to search() method below: string of fielded query terms;
2151	// hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2152	query = query + " " + "PID" + COLON + GREENSTONE;
2153
2154	String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2155	// now we have the XML returned by FedoraGSearch, get the pids
2156	// of the documents returned (if any)
2157	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2158	collection, searchResult);
2159	return pids;
2160	}
2161
2162	/**
2163	* This method performs a fieldquery, searching for x number of phrases
2164	* in each of the 4 indexed fields.
2165	* @param collection is the collection to search in
2166	* @param nameValParamsMap is a Map of several(key, value) entries,
2167	* 4 of which we're concerned with here:
2168	* - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2169	* - the values are a comma separated list of terms (phrases or single
2170	* words) to search that field in. There may be more than 1 or
2171	* there may be none (in which case there may be N empty values or
2172	* spaces separated by commas).
2173	* @param maxDocs is the maximum number of results to return (which
2174	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2175	* */
2176	public String[] fieldQuery(String collection, Map nameValParamsMap,
2177	int maxDocs)
2178	throws Exception
2179	{
2180	// we're going to maintain a list of UNIQUE pids that were returned
2181	// in search results. Hence we use Set:
2182	java.util.Set set = new java.util.HashSet();
2183
2184	// (1) Use Fedora's search to search document titles, if they were
2185	// specified:
2186	String[] docTitlepids = {};
2187
2188	String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2189	if(docTitleTerms != null) { // no doc titles may have been specified
2190	String[] phrases = docTitleTerms.split(COMMA);
2191
2192	// search the individual phrases first:
2193	for(int i = 0; i < phrases.length; i++) {
2194	if(phrases.equals("") \|\| phrases.equals(" "))
2195	continue; //skip when there are no terms
2196	docTitlepids = this.searchDocumentTitles(
2197	collection, phrases[i], false);
2198	for(int j = 0; j < docTitlepids.length; j++)
2199	set.add(docTitlepids[j]);
2200	}
2201	}
2202	// (2) use FedoraGSearch to search doc AND section titles, and
2203	// fulltext (in case these were specified in nameValParamsMap):
2204	String searchResult = this.fedoraGSearch.search(
2205	nameValParamsMap, 1, maxDocs);
2206
2207	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2208	collection, searchResult);
2209
2210	for(int i = 0; i < pids.length; i++)
2211	set.add(pids[i]);
2212
2213	pids = null;
2214	pids = new String[set.size()];
2215	set.toArray(pids); // unique pids
2216	return pids;
2217	}
2218
2219	/** @return a String representing Greenstone3 XML for a query process
2220	* response returning the results for the query denoted by parameter
2221	* nameValParamsMap.
2222	* @param nameValParamsMap is a Hashmap of name and value pairs for all the
2223	* query field data values. The names match the field names that
2224	* describeCollectionService() would have returned for the query service.
2225	* @param collection is the name of the collection
2226	* @param service is the name of the query service
2227	* This method is only ever called when any of the services in the digital
2228	* library described themselves as type=query. Therefore any digital
2229	* libraries that have no query services, can just return emtpy message
2230	* strings (or even "") since this method will never be called on them
2231	* anyway. */
2232	public String query(String collection, String service,
2233	Map nameValParamsMap)
2234	{
2235	FedoraGS3RunException ex = null;
2236	// (1) obtain the requested number of maximum result documents
2237	int maxDocs = 100;
2238	try{
2239	maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2240	} catch(NumberFormatException e) {
2241	maxDocs = 100;
2242	}
2243
2244	String pids[] = {};
2245	// (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2246	if(service.endsWith("TextQuery")) {
2247	try {
2248	// get the Query field:
2249	String query = (String)nameValParamsMap.get(QUERY);
2250	pids = textQuery(collection, query, maxDocs);
2251	}
2252	catch(Exception e) {
2253	LOG.error("Error in TextQuery processing: " + e);
2254	ex = new FedoraGS3RunException(
2255	"When trying to use FedoraGenericSearch for a TextQuery", e);
2256
2257	}
2258	} else { // (3) FieldQuery
2259	// first get the comma-separated lists
2260	String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2261	String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2262	// both are comma separated lists, so split both on 'comma'
2263	String[] fieldNames = listOfFieldNames.split(COMMA);
2264	String[] searchTerms = listOfSearchTerms.split(COMMA);
2265
2266	// In the fieldNames and searchTerms lists of nameValParamsMap,
2267	// each searchTerm element was matched with its correspondingly
2268	// indexed fieldName.
2269	// A new map is going to reorganise this, by putting all terms
2270	// for a particular fieldName together in a comma separated list
2271	// and associating that with the fieldName. I.e. (key, value) ->
2272	// (fieldName, comma-separated list of all terms in that field)
2273	Map map = new HashMap();
2274	for(int i = 0; i < searchTerms.length; i++) {
2275	// there may be fewer searchTerms than fieldNames (since some
2276	// fieldNames may have been left empty), so loop on searchTerms
2277	if(map.containsKey(fieldNames[i])) { // fieldName is already
2278	// in the list, so append comma with new value
2279	String termsList = (String)map.get(fieldNames[i]);
2280	termsList = termsList + COMMA + searchTerms[i];
2281	map.put(fieldNames[i], termsList);
2282	} else { // this is the first time this fieldName occurred
2283	// just put the fieldName with searchTerm as-is
2284	map.put(fieldNames[i], searchTerms[i]);
2285	}
2286	}
2287
2288	try {
2289	// For fieldquery, we search on all the fieldNames specified
2290	// - if DOC_TITLES is specified then we use Fedora's search
2291	// - for all other fieldNames specified, we use FedoraGSearch
2292	pids = fieldQuery(collection, map, maxDocs);
2293	}
2294	catch(Exception e) {
2295	LOG.error("Error in FieldQuery processing: " + e);
2296	ex = new FedoraGS3RunException(
2297	"When trying to use FedoraGenericSearch for a FieldQuery", e);
2298	}
2299	}
2300
2301	// Build Greenstone XML Query response message for from
2302	// the pids (which should be document identifiers)
2303	Document doc = builder.newDocument();
2304	// <metadataList><metadata name="numDocsMatched" value="n" />
2305	// </metadataList>
2306	Element metadataList = doc.createElement(
2307	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2308	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2309
2310	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2311	attribute.setValue(NUM_DOCS_MATCHED);
2312	metadata.setAttributeNode(attribute);
2313
2314	attribute = doc.createAttribute(GSXML.VALUE_ATT);
2315	attribute.setValue(Integer.toString(pids.length));
2316	metadata.setAttributeNode(attribute);
2317
2318	metadataList.appendChild(metadata);
2319
2320	// <documentNodeList>
2321	// <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2322	// docType='hierarchy' nodeType="leaf" />
2323	// ...
2324	// ...
2325	// </documentNodeList>
2326	Element docNodeList = doc.createElement(
2327	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2328	// for each
2329	for(int i = 0; i < pids.length; i++) {
2330	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2331	attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2332	attribute.setValue(pids[i]);
2333	docNode.setAttributeNode(attribute);
2334
2335	attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2336	attribute.setValue("hierarchy");
2337	docNode.setAttributeNode(attribute);
2338
2339	attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2340	attribute.setValue("root");
2341	docNode.setAttributeNode(attribute);
2342	docNodeList.appendChild(docNode);
2343	}
2344
2345	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2346	GSXML.REQUEST_TYPE_PROCESS, service);
2347	try{
2348	return FedoraCommons.elementToString(responseMsg);
2349	}catch(TransformerException e) {
2350	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2351	+ " " + e;
2352	}
2353	}
2354
2355
2356	// FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2357	/** Given a URL that represents a fedoraPID, will look up the object.
2358	* If it exists, it will return the contents of the DC:Title of its datastream.
2359	* If it doesn't exist, it will return the URL as-is.
2360	* @param URL: the URL that (after modification) represents a fedoraPID to look up.
2361	* @param collection: the name of collection in which to search for the URL
2362	* representing a fedoraPID.
2363	* @return the string (representing a fedoraPID) stored in the DC:Title of the
2364	* URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2365	* then the parameter URL is returned.
2366	*/
2367	public String getPIDforURL(String url, String collection) {
2368	FedoraGS3RunException ex = null; // any RemoteException
2369
2370	// (1) convert url to the fedorapid
2371	// / -> _ and : -> -
2372	String fedoraPID = url.replaceAll("/", "_");
2373	fedoraPID = fedoraPID.replaceAll(":", "-");
2374	// prefix "greenstone-http:<colname>-" to the fedoraPID
2375	fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2376	//LOG.error("### fedoraPID: " + fedoraPID);
2377
2378	// (2) Look up the datastream for the fedorapid
2379	String dcTitle = "";
2380	try {
2381	dcTitle = getDCTitle(fedoraPID);
2382	} catch(Exception e) {
2383	LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2384	ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2385	}
2386	//String dc = this.getDC(fedoraPID);
2387	//LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2388
2389	// (3) if fedorapid exists, extract the dc:title content.
2390	// if it doesn't exist, return url
2391	if(dcTitle.equals("")) {
2392	return url;
2393	} else {
2394	// It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2395	//return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2396	return dcTitle+"-1";
2397	}
2398	}
2399
2400	public static void main(String args[]) {
2401	try{
2402	// testing default constructor
2403	//FedoraGS3Connection con = new FedoraGS3Connection();
2404
2405	// testing constructor that takes properties file to show initial
2406	// fedora server values
2407	java.io.File propertyFilename
2408	= new java.io.File("fedoraGS3.properties");
2409	FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2410
2411	// DESCRIBE: serviceList, collectionList
2412	System.out.println("serviceList:\n" + con.getServiceList());
2413
2414	System.out.println("collectionList:\n" + con.getCollectionList());
2415
2416	String[] colPIDs = con.getCollections();
2417	String[] collectionNames = con.getCollectionNames(con.getCollections());
2418
2419
2420	for(int i = 0; i < collectionNames.length; i++) {
2421	System.out.println("Describing collections:\n");
2422	System.out.println(con.describeCollection(collectionNames[i]));
2423	System.out.println("Describing collection services:\n"
2424	+ con.describeCollectionServices(collectionNames[i]));
2425	}
2426
2427	String[] serviceNames = con.getServiceNames();
2428	for(int i = 0; i < serviceNames.length; i++) {
2429	System.out.println("Describing " + serviceNames[i] + ":\n"
2430	+ con.describeCollectionService("demo", serviceNames[i]));
2431	}
2432
2433
2434	// TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2435	// along with EX of the top-level document:
2436	System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2437	System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2438
2439
2440	String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2441	System.out.println("\nGET CONTENT:");
2442	for(int i = 0; i < docIDs.length; i++) {
2443	System.out.println(con.getContent(docIDs[i]));
2444	}
2445
2446	System.out.println("\nGET META:");
2447	for(int i = 0; i < docIDs.length; i++) {
2448	System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2449	}
2450
2451	String[] getTitlesFor = {
2452	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2453	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2454	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2455	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2456	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2457	};
2458
2459	// first let's display the regular meta for top-level docs and
2460	// their sections
2461	for(int i = 0; i < getTitlesFor.length; i++) {
2462	System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2463	}
2464
2465	System.out.println("\nTitles are:");
2466	System.out.println(con.getTitleMetadata(getTitlesFor));
2467
2468	System.out.println("\nGET STRUCTURE:");
2469	for(int i = 0; i < docIDs.length; i++) {
2470	System.out.println("Descendents and numChildren:\n"
2471	+ con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {"numChildren"}));
2472	System.out.println("Parent and numSiblings:\n"
2473	+ con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {"numSiblings"}));
2474	}
2475
2476	// TEST ERROR CASES:
2477	System.out.println("\nTESTING ERROR CASES");
2478	System.out.println(con.getContent("greenstone:demo-pinky"));
2479	String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2480	"greenstone:demo-pinky" };
2481	System.out.println(con.getContent(errorCases));
2482	System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2483	System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {"numChildren"}));
2484
2485	System.out.println("\nCLASSIFIER BROWSE");
2486	System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2487	new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2488
2489	System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2490	String[] classNodeIDs = new String[26];
2491	for(int i = 0; i < classNodeIDs.length; i++) {
2492	int subClassifierNum = i + 1;
2493	classNodeIDs[i] = "CL1." + subClassifierNum;
2494	}
2495	System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2496	classNodeIDs, new String[]{"all"}));
2497
2498	System.out.println("Testing query services");
2499	System.out.println("TEXT QUERY:");
2500	Map formControlValsMap = new HashMap();
2501	formControlValsMap.put(MAXDOCS, "100");
2502	formControlValsMap.put(QUERY, "snails");
2503	String searchResponse
2504	= con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2505	System.out.println(searchResponse);
2506
2507	System.out.println("FIELD QUERY:");
2508	formControlValsMap.clear();
2509	formControlValsMap.put(MAXDOCS, "100");
2510	formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2511	formControlValsMap.put(FIELDNAME_ATT,
2512	"allFields,docTitles,allFields,allFields");
2513	searchResponse
2514	= con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2515	System.out.println(searchResponse);
2516
2517	System.exit(0);
2518	}catch(Exception e) {
2519	JOptionPane.showMessageDialog(
2520	null, e, "Error", JOptionPane.ERROR_MESSAGE);
2521	//System.err.println("ERROR: " + e);
2522	e.printStackTrace();
2523	}
2524	}
2525	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: