Context Navigation

source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26309

Last change on this file since 26309 was 26309, checked in by ak19, 12 years ago
Corrections to XML returned by FedoraGS3 to get the VList display for classifierBrowse to work correctly in Greenstone: classifierStyle attribute needs to be set on documents returned. Also added in further missing attributes for query and browse, in case these turn out to be important. 2. Replaced 3-line setAttributeNode() calls with 1-line setAttribute() calls.
File size: 106.8 KB

Line
1	/**
2	*#########################################################################
3	* FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4	* of the Greenstone digital library suite from the New Zealand Digital
5	* Library Project at the * University of Waikato, New Zealand.
6	* <BR><BR>
7	* Copyright (C) 2008 New Zealand Digital Library Project
8	* <BR><BR>
9	* This program is free software; you can redistribute it and/or modify
10	* it under the terms of the GNU General Public License as published by
11	* the Free Software Foundation; either version 2 of the License, or
12	* (at your option) any later version.
13	* <BR><BR>
14	* This program is distributed in the hope that it will be useful,
15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	* GNU General Public License for more details.
18	*########################################################################
19	*/
20
21	package org.greenstone.fedora.services;
22
23
24	import java.io.StringReader;
25
26	import org.apache.log4j.Logger;
27	import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28	import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29	import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30	import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31	import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
32	import org.greenstone.gsdl3.util.GSXML;
33	import org.w3c.dom.Document;
34	import org.w3c.dom.Element;
35	import org.w3c.dom.Attr;
36	import org.w3c.dom.Text;
37	import org.w3c.dom.NodeList;
38	import org.w3c.dom.Node;
39	import org.xml.sax.InputSource;
40
41	import java.io.File;
42	import java.util.HashMap;
43	import java.util.Properties;
44	import java.util.Map;
45
46	import javax.swing.JOptionPane;
47
48	import org.xml.sax.SAXException;
49	import java.io.UnsupportedEncodingException;
50	import java.io.IOException;
51	import javax.net.ssl.SSLHandshakeException;
52	import java.net.Authenticator;
53	import java.net.ConnectException;
54	import java.net.MalformedURLException;
55	import java.net.PasswordAuthentication;
56	import java.rmi.RemoteException;
57	import javax.xml.parsers.ParserConfigurationException;
58	import javax.xml.transform.TransformerException;
59
60	/**
61	* Class that extends FedoraConnection in order to be able to use
62	* Fedora's web services to retrieve the specific datastreams of
63	* Greenstone documents stored in Fedora's repository. This class
64	* provides methods that convert those datastreams into Greenstone3
65	* XML response messages which are returned.
66	* @author ak19
67	*/
68	public class FedoraGS3Connection
69	extends FedoraConnection implements FedoraToGS3Interface,
70	FedoraToGS3Interface.Constants
71	{
72	/** The logging instance for this class */
73	private static final Logger LOG = Logger.getLogger(
74	FedoraGS3Connection.class.getName());
75
76	/** Default name of Fedora index */
77	private static final String DEFAULT_FEDORA_INDEX = "FgsIndex"; //"BasicIndex" for older versions of GSearch
78
79	/** Complete list of services that our FedoraGS3 would support
80	* if everything goes well. If a connection to FedoraGSearch
81	* cannot be established, the query services will no longer be
82	* available. The actual services supported are given by member
83	* variable serviceNames. */
84	protected static final String[] SERVICES = {
85	"DocumentContentRetrieve", "DocumentMetadataRetrieve",
86	"DocumentStructureRetrieve",
87	"TextQuery", "FieldQuery",
88	"ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
89	};
90
91	/** List of services actually supported by our FedoraGS3 repository
92	* after construction. If FedoraGenericSearch can't be connected to,
93	* then query services will not be offered */
94	protected String[] serviceNames;
95
96	/** The object used to connect to FedoraGenericSearch, which is used
97	* for doing full-text searching */
98	protected GSearchConnection fedoraGSearch;
99
100	/** The url for the wsdl file of FedoraGSearch's web services
101	* by default this will be the Fedora server's base URL
102	* concatenated to "gsearch/services/FgsOperations?wsdl" */
103	protected String gSearchWSDLURL;
104
105	/** The last part of the gSearchWSDL URL. The first part is
106	* the same as the fedora server's base url. */
107	protected String gSearchWSDLSuffix;
108
109	/** The name of the index that FedoraGSearch will index the GS3
110	* documents into. If no name is specified in the properties file,
111	* this will default to FedoraIndex. */
112	protected String gSearchIndexName;
113
114	/** 5 argument constructor is the same as that of superclass FedoraConnection:
115	* @param protocol can be either http or https
116	* @param host is the host where the fedora server is listening
117	* @param port is the port where the fedora server is listening
118	* @param fedoraServerUsername is the username for administrative
119	* authentication required to access the fedora server.
120	* @param fedoraServerPassword is the password for administrative
121	* authentication required to access the fedora server. If no password was set
122	* when installing Fedora, leave the field "".
123	* Instantiates a FedoraGS3Connection object which connects to Fedora's
124	* web services through stub classes and tries to connect to FedoraGSearch's
125	* web services through the default WSDL location for it
126	* ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
127	* call setGSearchWSDLURL(url) after the constructor instead.
128	*/
129	public FedoraGS3Connection(String protocol, String host, int port,
130	String fedoraServerUsername, String fedoraServerPassword)
131	throws ParserConfigurationException, MalformedURLException,
132	SSLHandshakeException, RemoteException, AuthenticationFailedException,
133	NotAFedoraServerException, ConnectException, Exception
134	{
135	super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
136	// super() will call setInitialisationProperties(properties)
137	// And that will try to instantiate the GSearchConnection.
138	}
139
140	/** No-argument constructor which is the same as that of superclass
141	* FedoraConnection: it displays a small dialog requesting input for the
142	* host, port, administrative password and username of the fedora server.
143	* If no password was set on the fedora repository when installing it,
144	* the user can leave the password field blank. */
145	public FedoraGS3Connection()
146	throws ParserConfigurationException, MalformedURLException,
147	CancelledException, ConnectException, RemoteException,
148	SSLHandshakeException, Exception
149	{
150	super();
151	// super() will call setInitialisationProperties(properties)
152	// And that will try to instantiate the GSearchConnection.
153	}
154
155	/** Single-argument constructor which is the same as that of superclass
156	* FedoraConnection: it takes the name of the properties file where
157	* connection initialisation values may already be provided and then
158	* displays a small dialog requesting input for the host, port,
159	* administrative password and username of the fedora server showing
160	* the values in the properties file as default. If the necessary
161	* initialisation are not present in the file, the corresponding fields
162	* in the dialog will be blank.
163	* If no password was set on the fedora repository when installing it,
164	* the user can leave the password field blank. */
165	public FedoraGS3Connection(File propertiesFilename)
166	throws ParserConfigurationException, MalformedURLException,
167	CancelledException, ConnectException, RemoteException,
168	SSLHandshakeException, Exception
169	{
170	super(propertiesFilename);
171	// super() will call setInitialisationProperties(properties)
172	// And that will try to instantiate the GSearchConnection.
173	}
174
175	/** The superclass constructor calls this method passing any preset
176	* properties loaded from a propertiesFile. This method is overridden
177	* here in order to instantiate the gSearchConnection based on the
178	* - gSearchWSDLSuffix that will be appended to the fedora base url.
179	* (If one was not provided in the properties file, gSearchWSDLURL defaults
180	* to something of the form
181	* "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
182	* which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
183	* "gsearch/services/FgsOperations?wsdl".
184	* - name of the index into which the GS3 documents have been indexed
185	* and which FedoraGenericSearch should use to perform searches. If none is
186	* given in the properties file, then the index name defaults to "FgsIndex"
187	* (no longer BasicIndex or FedoraIndex).
188	* @param properties is the Properties Map loaded from a properties file
189	* (if there was any) which specifies such things as host and port of the
190	* FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
191	* At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
192	* to whatever the final value of this.gSearchWSDLURL' suffix is, and
193	* "gsearch.indexName" will be set to to whatever the final value of
194	* this.gSearchIndexName is.
195	*/
196	protected void setInitialisationProperties(Properties properties)
197	throws ParserConfigurationException, MalformedURLException,
198	CancelledException, ConnectException, RemoteException,
199	SSLHandshakeException, Exception
200	{
201	super.setInitialisationProperties(properties);
202	// gsearchWSDL URL suffix, if not specified, defaults to
203	// "fedoragsearch/services/FgsOperations?wsdl" which is
204	// concatenated to the baseURL of fedora to give the gsearchWSDLURL.
205	this.gSearchWSDLSuffix = properties.getProperty(
206	"gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
207	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
208	// Set the property to whatever this.gSearchWSDLURL is now,
209	// so that it will be written out to the properties file again
210	properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
211
212	// Similarly for the name of the index FedoraGenericSearch should use
213	// when performing searches for GS3 docs stored in Fedora's repository.
214	this.gSearchIndexName = properties.getProperty(
215	"gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
216	properties.setProperty("gsearch.indexName", this.gSearchIndexName);
217	// Create a connection to FedoraGSearch's web services:
218	initSearchFunctionality();
219	}
220
221	/** Overridden init method to work with the 5 argument constructor, so that we can
222	* bypass using setInitialisationProperties() which works with a Properties map.
223	*/
224	protected void init(String protocol, String host, String port,
225	final String fedoraServerUsername, final String fedoraServerPassword)
226	throws ParserConfigurationException, MalformedURLException,
227	AuthenticationFailedException, RemoteException, Exception
228	{
229	super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
230	this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
231	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
232	this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
233
234	// Now need to set username and password for accessing WSDL (after GSearch 2.2)
235	// http://stackoverflow.com/questions/3037221/401-error-when-consuming-a-web-service-with-http-basic-authentication-using-cxf
236
237	// The java.net.Authenticator can be used to send user credentials when needed.
238	Authenticator.setDefault(new Authenticator() {
239	@Override
240	protected PasswordAuthentication getPasswordAuthentication() {
241	return new PasswordAuthentication(
242	fedoraServerUsername,
243	fedoraServerPassword.toCharArray());
244	}
245	});
246
247	initSearchFunctionality();
248	}
249
250
251	/** Init method that instantiates a GSearchConnection object used
252	* to work with the separate FedoraGSearch web services.
253	* The url of the WSDL for FedoraGSearch's web services is worked out
254	* from the baseURL of the Fedora server.
255	*/
256	protected void initSearchFunctionality()
257	{
258	try {
259	this.fedoraGSearch = null;
260	this.fedoraGSearch = new GSearchConnection(
261	gSearchWSDLURL, gSearchIndexName);
262	this.serviceNames = SERVICES;
263	} catch(Exception e){
264	LOG.error("Cannot connect to FedoraGSearch's web services at "
265	+ gSearchWSDLURL + "\nQuery services will not be available.", e);
266	// Exception, e, as parameter prints the stacktrace of the exception to the log
267
268	// If an exception occurs, something has gone wrong when
269	// trying to connect to FedoraGSearch's web services. This
270	// means, we can't offer query services, as that's provided
271	// by FedoraGSearch
272	serviceNames = null;
273	int countOfNonQueryServices = 0;
274	for(int i = 0; i < SERVICES.length; i++) {
275	// do not count query services
276	if(!SERVICES[i].toLowerCase().contains("query")) {
277	countOfNonQueryServices++;
278	}
279	}
280	// Services now supported are everything except Query services
281	serviceNames = new String[countOfNonQueryServices];
282	int j = 0;
283	for(int i = 0; i < SERVICES.length; i++) {
284	if(!SERVICES[i].toLowerCase().contains("query")) {
285	serviceNames[j] = SERVICES[i];
286	j++; // valid serviceName, so increment serviceName counter
287	}
288
289	}
290	}
291	}
292
293	/** @return the gSearchWSDLURL, the url of the WSDL for the
294	* FedoraGSearch web services */
295	public String getGSearchWSDLURL() { return gSearchWSDLURL; }
296
297	/** Sets the member variable gSearchWSDLURL that specify the location of
298	* the WSDL file of FedoraGSearch's web services. Then it attempts
299	* to instantiate a connection to those web services.
300	* @param url is the new url of the GSearch web services WSDL file */
301	public void setGSearchWSDLURL(String url) {
302	this.gSearchWSDLURL = url;
303	initSearchFunctionality();
304	}
305
306	/** @return the gSearchIndexName, the name of the index Fedora Generic
307	* Search will search in (where GS3 docs have been indexed into). */
308	public String getGSearchIndexName() { return gSearchIndexName; }
309
310	/** Sets the member variable gSearchIndexName that specifies the name
311	* of the index containing indexed GS3 documents. Then it attempts
312	* to instantiate a connection to the Fedora GSearch web services using
313	* this changed value for indexName.
314	* @param indexName is the new name of the index containing indexed GS3
315	* docs that GSearch should search in. */
316	public void setGSearchIndexName(String indexName) {
317	this.gSearchIndexName = indexName;
318	initSearchFunctionality();
319	}
320
321	/** @return the array of the services actually supported by FedoraGS3 */
322	protected String[] getServiceNames() { return this.serviceNames;}
323
324	/**
325	* For finding out if the sectionNumber is given as part of the docID.
326	* @param docID is the String that contains the docPID and may also
327	* contain the section number.
328	* @return true if the document identifier docID contains a section-
329	* number, and false if it consists solely of the docPID.
330	* That is, true is returned if
331	* <pre>docID = "greenstone:colName-<docPID>-<sectionNum>"</pre>
332	* and false is returned if
333	* <pre>docID = "greenstone:colName-<docPID>"</pre>
334	* */
335	protected boolean containsSectionNumber(String docID) {
336	// if there are two hyphens in the docID, then there are sections
337	// (and the section number is appended at end of docID)
338	// docID = "greenstone:colName-<docPID>-<sectionNum>"
339	return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
340	}
341
342	/** This method will extract the docPID from docID and return it.
343	* (If a sectionNumber is suffixed to the docID, the docPID which is
344	* the prefix is returned; otherwise the docID is the docPID and is
345	* returned)
346	* @param docID is the String that contains the docPID and may also
347	* contain the section number.
348	* @return only the docPID portion of the docID.
349	*/
350	protected String getDocPIDFromDocID(String docID) {
351	if(containsSectionNumber(docID))
352	return docID.substring(0, docID.lastIndexOf(HYPHEN));
353	// else (if there's no sectionNumber), docID is the docPID
354	return docID;
355	}
356
357	/** This method will return the section Number, if there's any
358	* suffixed to the docID. Otherwise it will return the empty string
359	* @param docID is the String that contains the docPID and may also
360	* contain the section number.
361	* @return only the sectionID portion of the docID - if any, else "".
362	*/
363	protected String getSectionIDFromDocID(String docID) {
364	if(containsSectionNumber(docID))
365	return docID.substring(
366	docID.lastIndexOf(HYPHEN)+1, docID.length());
367	return "";
368	}
369
370	/** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
371	* response message that gives the metadata for each collection identified
372	* @param collIDs is an array of fedora pids identifying collections in the
373	* fedora repository
374	* @return a GS3 DocumentMetadataRetrieve response message containing the
375	* EX metadata for all the requested collections */
376	public String getCollectionMetadata(String[] collIDs) {
377	return getMetadata(collIDs, new String[] {"all"});
378	}
379
380	/** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
381	* response message is returned containing the metadata for each document.
382	* @param docIDs is an array of document identifiers (docID can either be
383	* <pid>s items (documents) in the fedora repository, or
384	* "<pid>-sectionNumber".
385	* @return a GS3 DocumentMetadataRetrieve response message containing the
386	* EX, DC, DLS metadata for all the requested documents
387	* @param metadata is the list of metadata elements to be retrieved for each doc */
388	public String getDocumentMetadata(String[] docIDs, String[] metadata) {
389	return getMetadata(docIDs, metadata);
390	}
391
392	/** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
393	* response message that gives the metadata for the collection identified
394	* @param collID is a fedora pid identifying a collection in its repository
395	* @return a GS3 DocumentMetadataRetrieve response message containing the
396	* EX metadata for the requested collection
397	* @param metadata is the list of metadata elements to be retrieved for each doc */
398	public String getCollectionMetadata(String collID) {
399	return getMetadata(new String[] {collID}, new String[] {"all"});
400	}
401
402	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
403	* response message containing the metadata for the document.
404	* @param docID is a document identifier (docID can either be a <pid>
405	* of an item (document) in the fedora repository, or it can be
406	* "<pid>-sectionNumber".
407	* @return a GS3 DocumentMetadataRetrieve response message containing the
408	* EX, DC, DLS metadata for the requested document */
409	public String getDocumentMetadata(String docID, String[] metadata) {
410	return getMetadata(new String[] {docID}, metadata);
411	}
412
413	/** @return a greenstone DocumentMetadataRetrieve response for the
414	* documents or collections indicated by the docIDsOrCollIDs.
415	* @param docIDsOrCollIDs is an array of identifiers which may be either the
416	* fedora pids for collections, or otherwise may be a document identifier.
417	* In the last case, the document ID may consist of either
418	* "documentPID-sectionNumber" or may just be just fedora documentPID
419	* @param metadata is the list of metadata elements to be retrieved for each doc */
420	public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
421	{
422	Document doc = builder.newDocument();
423	FedoraGS3RunException ex = null;
424
425	Element docNodeList = doc.createElement(
426	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
427
428	try{
429	for(int i = 0; i < docIDsOrCollIDs.length; i++) {
430	// create the <documentNode> containing the metadata
431	// for each document docID
432	Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
433	docNodeList.appendChild(docNode);
434	}
435	} catch(Exception e) {
436	ex = new FedoraGS3RunException(e);
437	ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
438	}
439
440	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
441	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
442	try{
443	return FedoraCommons.elementToString(responseMsg);
444	} catch(TransformerException e) {
445	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
446	+ " " + e;
447	}
448	}
449
450	/** Method that takes a new DOM document, as well as an identifier of either
451	* a collection or document (which may be a fedora pid for the collection
452	* or document, or may be the documentPid-sectionNumber for a document) and
453	* returns a documentNode element for it:
454	* <documentNode><metadataList>
455	* <metadata name="">value</metadata>
456	* ...
457	* </metadataList></documentNode>
458	* @return documentNode containing the metadata for the collection or
459	* document given by parameter ID
460	* @param id denotes a collection pid, a document pid or a docID of the
461	* form "documentpid-sectionNumber"
462	* @param metadata is the list of metadata elements to be retrieved for each doc */
463	protected Element getMetadata(Document doc, String id, String[] metadata)
464	throws RemoteException, UnsupportedEncodingException,
465	SAXException, IOException
466	{
467	// We're going to create the documentNode nested inside the following
468	// documentNodeList:
469	// <documentNodeList>
470	// <documentNode nodeID=""><metadataList>
471	// <metadata name="">value</metadata>
472	// </metadataList></documentNode>
473	// <documentNode>...</documentNode>
474	// </documentNodeList>
475	// <documentNodeList>
476
477	// <documentNode nodeID="docID"> - the docNode on which a metadata
478	// retrieve is being performed
479	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
480	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
481	attribute.setValue(id);
482	docNode.setAttributeNode(attribute);
483
484	// <metadataList>
485	Element metadataList = doc.createElement(
486	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
487
488	String ex = "";
489	String dc = "";
490	String dls = "";
491	if(id.endsWith(_COLLECTION)) { // docID refers to a collection
492	// Obtain the "EX" datastream (extracted metadata) for the collection
493	ex = this.getEX(id);
494	}
495	else { // docID refers to a document
496
497	docNode.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
498	docNode.setAttribute(GSXML.NODE_RANK_ATT, "NaN");
499
500	// work out the document's fedora PID and section ID, and then
501	// obtain the EX (extracted metadata) and DC datastreams for the doc
502
503	// Note that EX/DC for pid="greenstone:<colname>-docPID-1"
504	// is the same as for pid="greenstone:<colname>-docPID"
505	// That is, <Section id="1"> refers to the toplevel document docPID
506	// If requested for top-level document, there may also be DLS meta
507	String sectionID = getSectionIDFromDocID(id);
508	String docPID = getDocPIDFromDocID(id);
509	if(sectionID.equals("") \|\| sectionID.equals("1")) {
510	// metadata of toplevel document is requested
511	ex = this.getEX(docPID); // slightly faster than doing
512	//getSectionEXMetadata(docID, "1")
513	dc = this.getDC(docPID);
514	dls = this.getDLS(docPID);
515	docNode.setAttribute(GSXML.NODE_TYPE_ATT, "root");
516	}
517	else {
518	ex = getSectionEXMetadata(docPID, sectionID);
519	dc = getSectionDCMetadata(docPID, sectionID);
520	docNode.setAttribute(GSXML.NODE_TYPE_ATT, "leaf");
521	}
522	}
523
524	String metafields = "";
525	for(int i = 0; i < metadata.length; i++) {
526	metafields = metafields + metadata[i] + "\|";
527	}
528
529	// Adding in metadata sets in alphabetical order
530	// DC metadata for a top-level document is different from EX, DLS:
531	// only the element's namespace prefix is "dc", the rest of a tagname
532	// is unknown.
533	if(!dc.equals("")) {
534	addMetadataWithNamespacedTagNames(doc, metadataList,
535	dc, DC, metafields);
536	}
537
538	// Check if we were supposed to process dls and dc metadata
539	// as well. We only ever do this for top-level documents,
540	// in which case, dls and dc will be non-empty strings
541	if(!dls.equals("")) {
542	addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
543	}
544
545	// we definitely have an EX metadatastream for each
546	// collection object, top-level document object,
547	// and document section item
548	addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
549
550	// now the metadataList has been built up
551	docNode.appendChild(metadataList);
552
553	return docNode; // return <documentNode> containing the metadata
554	}
555
556	/** This method retrieves all the metadata elements in the metaDataStream
557	* parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
558	* metadataSetNS is the namespace of each tag, and creates a new element of
559	* the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
560	* each. Each of these are then appended to the metadataList parameter.
561	* @param doc is the Document object using which the new metadata Elements
562	* are to be constructed
563	* @param metadataList is the <metadataList> Element to which the new
564	* metadata Elements are to be appended as children.
565	* @param metaDatastream the metadata datastream in string form (e.g. the
566	* Dublin Core metadata stored in the Fedora repository).
567	* @param metadataSet is the constant datastream identifier, e.g. "DC".
568	* At present this method applies to the DC metadata and any others like it
569	* where each tagname is different except for the constant dc: namespace.
570	* @param metafields is a \| separated string containing the metadatafields to
571	* extract or "all" if all fields are requested
572	*/
573	protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
574	String metaDatastream, String metadataSet, String metafields)
575	throws SAXException, IOException
576	{
577	Document src = builder.parse(
578	new InputSource(new StringReader(metaDatastream)));
579
580	// The following doesn't work for some reason: to retrieve all elements
581	// whose namespace prefix starts with "dc", we pass "*" for localName
582	//NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
583
584	// Longer way: get the children of the root document
585	NodeList children = src.getDocumentElement().getChildNodes();
586
587	for(int i = 0; i < children.getLength(); i++) {
588	String nodeName = children.item(i).getNodeName();
589	// check that the nodename starts with the metadataSet ("dc") namespace,
590	// which simultaneously ensures that the node's an element:
591	if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
592	// need to have a period for Greenstone instead of Fedora's colon
593	nodeName = nodeName.replace(COLON, PERIOD);
594	if(metadataSet.equals(DC)) { // dc:title -> dc.Title
595	nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
596	+ nodeName.substring(4);
597	}
598
599	// get the requested metadata fields
600	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(nodeName) != -1) {
601	Element metatag = (Element)children.item(i);
602	String value = FedoraCommons.getValue(metatag);
603	// <dc:tagname>value</dc:tagname>
604	// we're going to put this in our metadata element as
605	// <metadata name="dc.Tagname">value</metadata>
606
607	// create metadata of (name, value) pairs in target DOM (doc)
608	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
609	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
610
611	attribute.setValue(nodeName);
612	metadata.setAttributeNode(attribute);
613	Text content = doc.createTextNode(value);
614	metadata.appendChild(content);
615	metadataList.appendChild(metadata);
616	}
617	}
618	}
619	}
620
621	/** This method retrieves all the metadata elements in the metaDataStream
622	* of the form <"namespace:"metadata name="metadataName">value</metadata>
623	* where "namespace" is the namespace prefix of each tag, and metadataName
624	* is the name of the metadata (like author, title). For each element
625	* it creates a corresponding new element of the form
626	* <metadata name="namespace:metadataName">value</metadata>.
627	* Each of these are then appended to the metadataList parameter.
628	* @param doc is the Document object using which the new metadata Elements
629	* are to be constructed
630	* @param metadataList is the <metadataList> Element to which the new
631	* metadata Elements are to be appended as children.
632	* @param metaDatastream the metadata datastream in string form (e.g. the
633	* EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
634	* repository).
635	* @param metadataSet is the constant datastream identifier,
636	* e.g. "DLS" or "EX".
637	* At present this method applies to the DLS and EX metadata as they have
638	* constant tagnames throughout.
639	* @param metafields is a \| separated string containing the metadatafields to
640	* extract or "all" if all fields are requested.
641	*/
642	protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
643	String metaDatastream, String metadataSet, String metafields)
644	throws SAXException, IOException
645	{
646	// Namespace prefix can be "ex:" or "dls:"
647	String namespacePrefix = "";
648	if(!metadataSet.equals(EX)) {
649	// need to have a period for Greenstone instead of Fedora's colon
650	namespacePrefix = metadataSet.toLowerCase() + PERIOD;
651	}
652
653	Document src = builder.parse(
654	new InputSource(new StringReader(metaDatastream)));
655	NodeList metaTags = src.getElementsByTagName(
656	metadataSet.toLowerCase()+COLON+METADATA);
657	// Looking for tagnames: <ex:metadata> or <dls:metadata>
658
659	for(int i = 0; i < metaTags.getLength(); i++) {
660	Element metatag = (Element)metaTags.item(i);
661
662	// extract the metadata of (name, value) pairs from src DOM
663	// look for <metadata name="name">value</metadata>
664	String name = metatag.hasAttribute(NAME) ?
665	metatag.getAttribute(NAME) : "";
666	// sometimes, there are several metadata for the same name, in this
667	// case, look for a qualifier and append its value to the name to
668	// distinguish it uniquely:
669	if(metatag.hasAttribute(QUALIFIER)) {
670	name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
671	}
672	name = namespacePrefix + name; // prefix with namespace, if any
673	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(name) != -1) {
674	String value = FedoraCommons.getValue(metatag);
675
676	// create metadata of (name, value) pairs in target DOM (doc)
677	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
678	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
679	attribute.setValue(name);
680	metadata.setAttributeNode(attribute);
681	Text content = doc.createTextNode(value);
682	metadata.appendChild(content);
683
684	metadataList.appendChild(metadata);
685	}
686	}
687	}
688
689	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
690	* response message containing ONLY the Title metadata for the document.
691	* @param docID is a document identifier (docID can either be a <pid>
692	* of an item (document) in the fedora repository, or it can be
693	* "<pid>-sectionNumber".
694	* @return a GS3 DocumentMetadataRetrieve response message containing the
695	* Title metadata for the requested document */
696	public String getTitleMetadata(String docID) {
697	return getTitleMetadata(new String[] { docID });
698	}
699
700	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
701	* response message containing ONLY the Title metadata for the documents.
702	* @param docIDs is a list of document identifiers (where docID can either be
703	* a <pid> of an item (document) in the fedora repository, or it can be
704	* "<pid>-sectionNumber".
705	* @return a GS3 DocumentMetadataRetrieve response message containing the
706	* Title metadata for all the requested documents */
707	public String getTitleMetadata(String[] docIDs) {
708	// Must create message of the following form:
709	// <documentNodeList><documentNode nodeID="docID">
710	// <metadataList><metadata name="Title">sometitle</metadata>
711	// </metadataList></documentNode>
712
713	Document doc = builder.newDocument();
714	FedoraGS3RunException ex = null;
715
716	Element docNodeList = doc.createElement(
717	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
718	try{
719	for(int i = 0; i < docIDs.length; i++) {
720	Element docNode = getTitleMetadata(doc, docIDs[i]);
721	docNodeList.appendChild(docNode);
722	}
723	}catch(Exception e) {
724	ex = new FedoraGS3RunException(e);
725	//ex.setSpecifics("EX metadata datastream PID: \|" + docIDs[i] + "\|"); // for debugging PID
726	ex.setSpecifics("EX metadata datastream");
727	}
728
729	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
730	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
731	try{
732	return FedoraCommons.elementToString(responseMsg);
733	} catch(TransformerException e) {
734	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
735	+ " " + e;
736	}
737	}
738
739	/** Method that takes a new DOM document, as well as an identifier of either
740	* a document or document section and returns a documentNode element containing
741	* the title metadata for it:
742	* <documentNode nodeID="docID"><metadataList>
743	* <metadata name="Title">sometitle</metadata>
744	* </metadataList></documentNode>
745	* @return documentNode containing the metadata for the collection or
746	* document given by parameter ID
747	* @param docID denotes the id of a document or a document section, so id
748	* is either a document-pid or it's of the form documentpid-sectionNumber */
749	protected Element getTitleMetadata(Document doc, String docID)
750	throws RemoteException, UnsupportedEncodingException,
751	SAXException, IOException
752	{
753	// Returns a docNode element of the following form:
754	// <documentNode nodeID="docID">
755	// <metadataList><metadata name="Title">sometitle</metadata></metadataList>
756	// </documentNode>
757
758	// <documentNode nodeID="docID">
759	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
760	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
761	attribute.setValue(docID);
762	docNode.setAttributeNode(attribute);
763
764	// <metadataList>
765	Element metaList = doc.createElement(
766	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
767	// <metadata name="Title">
768	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
769	// if we connect it all up (append children), we can immediately add
770	// the name attribute into the metadata element:
771	metaList.appendChild(metadata);
772	docNode.appendChild(metaList);
773	metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
774
775	String title = "";
776	String sectionID = getSectionIDFromDocID(docID);
777	String docPID = getDocPIDFromDocID(docID);
778
779	// check if title of toplevel document is requested
780	if(sectionID.equals(""))
781	title = this.getDocTitle(docPID);
782	else { // title of document section
783	title = this.getSectionTitle(docPID, sectionID);
784	}
785
786	metadata.appendChild(doc.createTextNode(title));
787
788	return docNode;
789	}
790
791	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
792	* containing the requested portion of the document structure of the documents
793	* indicated by docIDs:
794	* @param docID is the document identifier of the document whose hierarchical
795	* structure is requested. The name of the collection is already included in the
796	* docID for a Fedora DL.
797	* @param structure - strings specifying the required structure of the document.
798	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
799	* @param info - strings specifying the required structural info of the document.
800	* It can be any combination of: siblingPosition, numSiblings, numChildren.
801	*/
802	public String getDocumentStructure(String docID, String[] structure, String[] info) {
803	return getStructure(new String[]{docID}, structure, info);
804	}
805
806
807	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
808	* containing the requested portion of the document structure of the documents
809	* indicated by docIDs:
810	* @param docIDs is an array of document identifiers of documents whose
811	* hierarchical structures are requested. The name of the collection is already
812	* included in the docID for a Fedora DL.
813	* @param structure - strings specifying the required structure of each document.
814	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
815	* @param info - strings specifying the required structural info of each document.
816	* It can be any combination of: siblingPosition, numSiblings, numChildren.
817	*/
818	public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
819	return getStructure(docIDs, structure, info);
820	}
821
822	/**
823	* Returns a greenstone3 DocumentStructureRetrieve XML response message
824	* containing the document structures for the given docIDs.
825	* Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
826	* greenstone formatted XML is returned. The requested section of the table
827	* of contents (TOC) for a document is converted into the greenstone3 xml
828	* format that is returned upon DocumentStructureRetrieve requests.
829	* @param docIDs the documentIDs for which the section's structure is returned;
830	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
831	* @param structure - the structure of the sections to return. Can be any combination of:
832	* ancestors, parent, siblings, children, descendants, entire.
833	* @param infos - strings containing any combination of the values: numChildren, numSiblings,
834	* siblingPosition. The requested info gets added as attributes to the returned root element.
835	* @return a greenstone3 DocumentStructureRetrieve XML response message in
836	* String format with the structure of the docIDs requested.
837	*/
838	protected String getStructure(String[] docIDs, String[] structure, String[] infos)
839	{
840	Document doc = builder.newDocument();
841	FedoraGS3RunException ex = null;
842	// <documentNodeList>
843	Element docNodeList = doc.createElement(
844	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
845
846	try{
847	// append the <documentNodes> for the docIDs
848	// to the docNodeList
849	//getStructureElement(docNodeList, docIDs, levels);
850	getStructureElement(docNodeList, docIDs, structure, infos);
851	} catch(Exception e) {
852	ex = new FedoraGS3RunException(e);
853	ex.setSpecifics("(requested portion of) TOC datastream");
854	}
855	// insert our <documentNodeList> into a GS3 response message
856	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
857	GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
858	try{
859	return FedoraCommons.elementToString(responseMsg);
860	} catch(TransformerException e) {
861	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
862	+ " " + e;
863	}
864	}
865
866
867	/** Given a <documentNodeList> portion of a greenstone3
868	* DocumentStructureRetrieve XML response message, this method will populate
869	* it with the <documentNodes> that represent the structure of the given docIDs.
870	* @param docNodeList is a <documentNodeList> to which <documentNodes> of
871	* the doc structures are appended.
872	* @param docIDs the documentIDs for which the section's structure is returned;
873	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
874	* @param structures - the structure of the sections to return. Can be any combination of:
875	* ancestors, parent, siblings, children, descendants, entire.
876	* @param infos - a string containing any combination of the values: numChildren, numSiblings,
877	* siblingPosition. The requested info gets added as attributes to the returned root element.
878	*/
879	protected void getStructureElement(Element docNodeList, String[] docIDs,
880	String[] structures, String[] infos)
881	throws RemoteException, UnsupportedEncodingException, SAXException,
882	IOException
883	{
884	// Make one string out of requested structure components, and one string from info components
885	String structure = "";
886	String info = "";
887	for(int i = 0; i < structures.length; i++) {
888	structure = structure + structures[i] + "\|";
889	}
890	for(int i = 0; i < infos.length; i++) {
891	info = info + infos[i] + "\|";
892	}
893
894	// process each docID
895	for(int i = 0; i < docIDs.length; i++) {
896	// work out the document's fedora PID and section ID
897	String sectionID = getSectionIDFromDocID(docIDs[i]);
898	String docPID = getDocPIDFromDocID(docIDs[i]);
899	if(sectionID.equals("")) {
900	sectionID = "1";
901	}
902
903	// get the required section, along with children or descendants
904	Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
905	Document doc = docNodeList.getOwnerDocument();
906
907	// copy-and-convert that structure into a structure format for GS3
908	Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
909
910	if(!info.equals("")) {
911	// <nodeStructureInfo>
912	// <info name="" value="" />
913	// <info name="" value="" />
914	// ...
915	// </nodeStructureInfo>
916	Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
917	Element root = srcDocElement.getOwnerDocument().getDocumentElement();
918
919	if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
920	String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
921	Element infoEl = doc.createElement(GSXML.INFO_ATT);
922	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
923	infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
924	nodeStructureInfo.appendChild(infoEl);
925	}
926
927	if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
928	String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
929	Element infoEl = doc.createElement(GSXML.INFO_ATT);
930	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
931	infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
932	nodeStructureInfo.appendChild(infoEl);
933	}
934
935	if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
936	String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
937	Element infoEl = doc.createElement(GSXML.INFO_ATT);
938	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
939	infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
940	nodeStructureInfo.appendChild(infoEl);
941	}
942
943	if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
944	String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
945	Element infoEl = doc.createElement(GSXML.INFO_ATT);
946	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
947	infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
948	nodeStructureInfo.appendChild(infoEl);
949	}
950
951	docNode.appendChild(nodeStructureInfo);
952	}
953
954	// add it to our list of documentNodes
955	docNodeList.appendChild(docNode);
956	}
957	}
958
959
960	/**
961	* Takes the portion of the XML document outlining the structure of the
962	* document (section)--in the format this is stored in Fedora--and returns
963	* Greenstone 3 DOM XML format for outlining document structure.
964	* @return a <documentNode> element that contains a greenstone3
965	* DocumentStructureRetrieve XML corresponding to the parameter Element section
966	* (which is in fedora XML), for the document indicated by docID.
967	* @param requestingDocID is the identifier of the document for which the
968	* structure was requested. It's this document's children or descendants that
969	* will be returned. Note that this is not always the same as (clear from)
970	* parameter docID.
971	* @param docID is the documentID for which the section's structure is
972	* returned where docID = "docPID-sectionNumber".
973	* @param section - the fedora section XML that is being mirrored in
974	* greenstone3 format.
975	*/
976	protected Element getStructure(Document doc, String requestingDocID,
977	String docID, Element section)
978	{
979	// we want to mirror the section's DOM (given in fedora XML) in
980	// greenstone3's XML for a DocumentStructureRetrieve response.
981
982	// <documentNode nodeID="docID"> - the docNode on which a structure retrieve
983	// is being performed
984	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
985	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
986	attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
987	docNode.setAttributeNode(attribute);
988
989	// <nodeStructure>
990	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
991
992	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
993	Element rootNode = createDocNodeFromSubsection(doc, section, docID);
994
995	// fills in the subtree of the rootNode in our nodeStructure element
996	createDocStructure(doc, section, rootNode, docID);
997	//where section represents the root section
998
999	nodeStructure.appendChild(rootNode);
1000	docNode.appendChild(nodeStructure);
1001	return docNode;
1002	}
1003
1004
1005	/** Recursive method that creates a documentStructure mirroring parameter
1006	* section, starting from parameter parent down to all descendants
1007	* @param section is the XML <Section> in the fedora repository's TOC
1008	* for the docPID whose substructure is to be mirrored
1009	* @param parent is the XML documentNode in the greenstone repository whose
1010	* descendants created by this method will correspond to the descendants of
1011	* parameter section.
1012	* @param doc is the document containing the parent;
1013	* @param docPID is the prefix of all nodeIDs in the parent's structure
1014	*/
1015	protected void createDocStructure(
1016	Document doc, Element section, Element parent, String docPID)
1017	{
1018	// get the section's children (if any)
1019	NodeList children = section.getChildNodes();
1020	for(int i = 0; i < children.getLength(); i++) {
1021	Node n = children.item(i);
1022
1023	if(n.getNodeName().equals(SECTION_ELEMENT)) {
1024	//then we know it's an element AND that its tagname is "Section"
1025	Element subsection = (Element)n;
1026	Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1027	parent.appendChild(child);
1028
1029	// recursion call on newly found child-element and subsection
1030	createDocStructure(doc, subsection, child, docPID);
1031	}
1032	}
1033	}
1034
1035	/** Given a particular subsection element, this method creates a
1036	* Greenstone3 DocumentNode element that mirrors it.
1037	* @param doc is the document that will contain the created DocumentNode
1038	* @param docID is the prefix of all nodeIDs in the parent's structure
1039	* @param subSection is the XML <Section> in the fedora repository's
1040	* TOC for the docPID which will be mirrored in the greenstone XML
1041	* documentNode that will be returned.
1042	* @return a greenstone <documentNode> that represents the fedora TOC's
1043	* <Section> element passed as parameter subSection. */
1044	protected Element createDocNodeFromSubsection(
1045	Document doc, Element subSection, String docID)
1046	{
1047	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1048	Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1049	docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1050	docNode.setAttributeNode(docType);
1051
1052	Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1053	String sectionID = subSection.hasAttribute(ID) ?
1054	subSection.getAttribute(ID) : "";
1055	if(sectionID.equals("1")
1056	&& subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1057	// reset the attribute without the section number (just "docID" may be important for democlient?)
1058	nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1059	} else {
1060	nodeID.setValue(docID + HYPHEN + sectionID);
1061	}
1062	//nodeID.setValue(docID + HYPHEN + sectionID);
1063	docNode.setAttributeNode(nodeID);
1064
1065	Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1066	if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1067	nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1068	}
1069	docNode.setAttributeNode(nodeType);
1070	return docNode;
1071	}
1072
1073
1074	/** Given an identifier that is either a docPID or a concatenation of
1075	* docPID+sectionID, this method works out the fedora assigned docPID and
1076	* sectionID and then calls getContentBody(docPID, sectionID) with those.
1077	* @param docID is expected to be of the form
1078	* "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
1079	* "greenstone:<collectionName>-<docPID>"
1080	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1081	* "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
1082	* is returned! */
1083	public String getContent(String docID) {
1084	return this.getContent(new String[]{docID});
1085	}
1086
1087	/** Given an identifier that is a concatenation of docID+sectionID, this
1088	* method works out the fedora assigned docPID and sectionID and then calls
1089	* getContentBody(docPID, sectionID) with those.
1090	* @param docIDs is an array of document identifiers of the form
1091	* "greenstone:<collectionName>-<docPID>-<sectionNumber>"
1092	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1093	* "greenstone:<collectionName>-Section1" is returned! */
1094	public String getContent(String[] docIDs) {
1095	Document doc = builder.newDocument();
1096	FedoraGS3RunException ex = null;
1097
1098	//<documentNodeList>
1099	Element docNodeList = doc.createElement(
1100	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1101
1102	try{
1103	for(int i = 0; i < docIDs.length; i++) {
1104	// get the sectionID and docPID from the docID
1105	String sectionID = this.removePrefix(
1106	getSectionIDFromDocID(docIDs[i]), SECTION);
1107	String docPID = getDocPIDFromDocID(docIDs[i]);
1108	if(sectionID.equals("")) // if no section is specified, get
1109	sectionID = "1"; // get the content for Section id="1"
1110
1111	// Get the contents for the requested section of document docPID
1112	String sectionContent = this.getContentBody(docPID, sectionID);
1113
1114	// set the nodeID attribute
1115	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1116	Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1117
1118	nodeId.setValue(docIDs[i]); // just set the docID which will contain
1119	// the docPID (and sectionID if already present)
1120
1121	docNode.setAttributeNode(nodeId);
1122	// set the text content to what was retrieved
1123	Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1124	Text textNode = doc.createTextNode(sectionContent.trim());
1125
1126	nodeContent.appendChild(textNode);
1127	docNode.appendChild(nodeContent);
1128	//add the documentNode to the docNodeList
1129	docNodeList.appendChild(docNode);
1130	}
1131	} catch(Exception e) {
1132	ex = new FedoraGS3RunException(e);
1133	ex.setSpecifics("requested doc Section datastream");
1134	}
1135	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1136	GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1137	try{
1138	return FedoraCommons.elementToString(responseMsg);
1139	} catch(TransformerException e) {
1140	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1141	+ " " + e;
1142	}
1143	}
1144
1145	/** Gets the contents of a textNode from a section.
1146	* @return the text content of a section.
1147	* @param docPID the pid of the document from which a section's text is to
1148	* be retrieved.
1149	* @param sectionID is the section identifier of the document denoted by
1150	* docPID whose text is to be returned.
1151	*/
1152	protected String getContentBody(String docPID, String sectionID)
1153	throws RemoteException, UnsupportedEncodingException,
1154	SAXException, IOException
1155	{
1156	String section = this.getSection(docPID, sectionID);
1157
1158	// the content is nested inside a <Section> element,
1159	// we extract it from there:
1160	InputSource source = new InputSource(new StringReader(section));
1161	Document doc = builder.parse(source);
1162
1163	// The document Element is the <Section> we want.
1164	// Get its text contents:
1165	section = FedoraCommons.getValue(doc.getDocumentElement());
1166
1167	// we are going to remove all occurrences of "_httpdocimg_/"
1168	// that precede associated filenames, because that's a GS3
1169	// defined macro for resolving relative urls. It won't help
1170	// with documents stored in fedora.
1171	section = section.replaceAll(GS3FilePathMacro+"/", "");
1172	return section;
1173	}
1174
1175	/** Here we create the greenstone's response message element:
1176	* <message&lg;<response><content></response></message>
1177	* @return a greenstone response-message element.
1178	* @param doc - the Document object which should me used to create the
1179	* <message> and <response> elements
1180	* @param content - the element that is to be nested inside <response>
1181	* @param ex - any exception that occurred when trying to create
1182	* the content parameter
1183	* @param responseType - the value for the type attribute of <response>,
1184	* such as "describe", "retrieve", "browse", "query"...
1185	* @param originator - indiates the collectionName or service (like
1186	* DocumentContentRetrieve) from where this response message originates
1187	*/
1188	protected Element createResponseMessage(Document doc, Element content,
1189	Exception ex, String responseType, String originator)
1190	{
1191	Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1192	// from = "FedoraGS3"
1193	Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1194	attribute.setValue(originator);
1195	response.setAttributeNode(attribute);
1196
1197	// type = "describe" or "process" - whatever's given in requestType:
1198	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1199	attribute.setValue(responseType);
1200	response.setAttributeNode(attribute);
1201
1202	if(content != null)
1203	response.appendChild(content);
1204
1205	// we'll create an error element for RemoteExceptions (web service problems)
1206	// and UnsupportedEncodingExceptions and
1207	if(ex != null) {
1208	Element error = doc.createElement(GSXML.ERROR_ELEM);
1209	error.appendChild(doc.createTextNode(ex.getMessage()));
1210	// now append the error to the <response> element (after
1211	// the content element whatever that was)
1212	response.appendChild(error);
1213	}
1214
1215	Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1216	message.appendChild(response);
1217	doc.appendChild(message);
1218	return message;
1219	}
1220
1221	/** @return a <serviceList> Element as defined by GS3: containing all the
1222	* services (denoted by <service> elements) that are supported by FedoraGS3.
1223	* At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1224	* DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1225	* ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1226	* @param doc - the Document object which should me used to create the
1227	* <serviceList> element */
1228	protected Element createServiceList(Document doc)
1229	{
1230	Element serviceList = doc.createElement(
1231	GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1232
1233	for(int i = 0; i < serviceNames.length; i++) {
1234	// create the <service name="serviceName[i]" type="servicetype" />
1235	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1236
1237	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1238	attribute.setValue(serviceNames[i]);
1239	service.setAttributeNode(attribute);
1240
1241	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1242	if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1243	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1244	else if(serviceNames[i].contains("Query")) // search services
1245	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1246	else
1247	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1248	service.setAttributeNode(attribute);
1249
1250	// add the service element to the serviceList element
1251	// <serviceList><service /></serviceList>
1252	serviceList.appendChild(service);
1253	}
1254	return serviceList;
1255	}
1256
1257	/** @return a GS3 response message for a describe services request:
1258	* indicating the list of services supported by the Fedora-Greenstone
1259	* interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1260	* DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1261	* ClassifierBrowseMetadataRetrieve - as indicated by member variable
1262	* serviceNames. */
1263	public String getServiceList()
1264	{
1265	Document doc = builder.newDocument();
1266	Element serviceList = createServiceList(doc);
1267	// make <serviceList> the body of the responseMessage:
1268	// <message><response><serviceList></response></message>
1269	Element responseMsg = createResponseMessage(doc, serviceList, null,
1270	GSXML.REQUEST_TYPE_DESCRIBE, "");
1271	try {
1272	return FedoraCommons.elementToString(responseMsg);
1273	}catch(TransformerException e) {
1274	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1275	+ " " + e;
1276	}
1277	}
1278
1279	/** @return a GS3 describe response message listing the collections and
1280	* collection-specific metadata stored in the Fedora-Greenstone repository. */
1281	public String getCollectionList()
1282	{
1283	Document doc = builder.newDocument();
1284	FedoraGS3RunException ex = null; // any RemoteException
1285
1286	// create the <collectionList /> element
1287	Element collectionList = doc.createElement(
1288	GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1289	try{
1290	String[] collectionNames = this.getCollectionNames(
1291	this.getCollections()); // this line could throw RemoteException
1292	for(int i = 0; i < collectionNames.length; i++) {
1293	// create the <collection name="somename" /> element
1294	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1295	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1296	attribute.setValue(collectionNames[i]);
1297	collection.setAttributeNode(attribute);
1298
1299	// append the <collection> element as child of <collectionList>
1300	collectionList.appendChild(collection);
1301
1302	//if(collection.hasAttribute(GSXML.NAME_ATT))
1303	//LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1304	}
1305	} catch(RemoteException e) { // if this happens, perhaps it's because it
1306	// can't find Greenstone collections in fedora repository?
1307	ex = new FedoraGS3RunException(e);
1308	ex.setSpecifics(
1309	"greenstone collections in fedora repository");
1310	}
1311
1312	// make <collectionList> the body of the responseMessage:
1313	// <message><response><collectionList></response></message>
1314	Element responseMsg = createResponseMessage(doc, collectionList, ex,
1315	GSXML.REQUEST_TYPE_DESCRIBE, "");
1316	try{
1317	return FedoraCommons.elementToString(responseMsg);
1318	}catch(TransformerException e) {
1319	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1320	+ " " + e;
1321	}
1322	}
1323
1324	/** @return a GS3 describe response message for a collection in the
1325	* Fedora-Greenstone repository.
1326	* @param collectionName - the name of the collection that is to be described.
1327	* It will be converted to a fedora collection pid, which is of the form
1328	* "greenstone:<collectionName>-collection". */
1329	public String describeCollection(String collectionName)
1330	{
1331	Document doc = builder.newDocument();
1332	FedoraGS3RunException ex = null;
1333
1334	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1335	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1336	attribute.setValue(collectionName);
1337	collection.setAttributeNode(attribute);
1338
1339	//<displayItem assigned="true" lang="en" name="name">
1340	//"some display name"</displayItem>
1341	Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1342
1343	attribute = doc.createAttribute(GSXML.LANG_ATT);
1344	attribute.setValue(this.lang);
1345	displayItem.setAttributeNode(attribute);
1346
1347	attribute = doc.createAttribute(GSXML.NAME_ATT);
1348	attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1349	displayItem.setAttributeNode(attribute);
1350
1351	try{
1352	Text textNode = doc.createTextNode(
1353	this.getCollectionTitle(getCollectionPID(collectionName)));
1354	displayItem.appendChild(textNode);
1355	} catch(Exception e) {
1356	// can't find Greenstone collections in fedora repository or problem
1357	// getting their titles from their metadata datastream?
1358	ex = new FedoraGS3RunException(e);
1359	ex.setSpecifics("greenstone collections or their metadata"
1360	+ "in the fedora repository");
1361	}
1362	// now append the displayItem element as child of the collection element
1363	collection.appendChild(displayItem);
1364	// get the <serviceList> and add it into the collection description.
1365	// Services for all collections in the FedoraGS3 repository are the
1366	// same, offering a ClassifierBrowse to browse titles by starting letter
1367	// and DocRetrieve services: Content, Metadata and Structure.
1368
1369	Element serviceList = createServiceList(doc);
1370	collection.appendChild(serviceList);
1371
1372	Element responseMsg = createResponseMessage(doc, collection, ex,
1373	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1374	try{
1375	return FedoraCommons.elementToString(responseMsg);
1376	}catch(TransformerException e) {
1377	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1378	+ " " + e;
1379	}
1380	}
1381
1382	/** @return a GS3 describe response message for the services of a collection
1383	* in the Fedora-Greenstone repository. So far, these services are the same for
1384	* all fedora collections: they are the services given in member variable
1385	* serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1386	* ClassifierBrowseMetadataRetrieve.
1387	* All collections in this Digital Library (Fedora Repository) share the
1388	* same services, so this method returns the same services as getServiceList();
1389	* @param collectionName - the name of the collection whose services are to
1390	* be described. It will be converted to a fedora collection pid, which is of
1391	* the form "greenstone:<collectionName>-collection". */
1392	public String describeCollectionServices(String collectionName)
1393	{
1394	Document doc = builder.newDocument();
1395
1396	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1397	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1398	attribute.setValue(collectionName);
1399	collection.setAttributeNode(attribute);
1400
1401	Element serviceList = createServiceList(doc);
1402	collection.appendChild(serviceList);
1403
1404	Element responseMsg = createResponseMessage(doc, collection, null,
1405	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1406	try{
1407	return FedoraCommons.elementToString(responseMsg);
1408	}catch(TransformerException e) {
1409	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1410	+ " " + e;
1411	}
1412	}
1413
1414	/** All collections in this Digital Library (Fedora Repository) share
1415	* the same services, so this method returns the same as
1416	* describeCollectionService(collName, serviceName).
1417	* @return a GS3 describe response message for the requested service
1418	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1419	* return nothing special except their names; browse (and any query)
1420	* return more complex XML responses.
1421	* @param serviceName - the name of the service in the collection which is to
1422	* be described.*/
1423	public String describeService(String serviceName)
1424	{
1425	// For all the retrieve services (incl ClassifierBrowseMetadataRetrieve)
1426	// we return:
1427	// <message><response from="<name>Retrieve" type="describe">
1428	// <service name="<name>Retrieve" type="retrieve" /></response></message>
1429	// But for browse (and any query) service, we return the data necessary
1430	// for displaying it
1431
1432	Document doc = this.builder.newDocument();
1433	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1434	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1435	attribute.setValue(serviceName);
1436	service.setAttributeNode(attribute);
1437
1438	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1439
1440	if(serviceName.toLowerCase().endsWith("retrieve")) {
1441	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1442	}
1443	else if(serviceName.toLowerCase().contains("browse")) {
1444	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1445
1446	// we need name and description <displayItem> elements
1447	Element displayItem
1448	= createNameValuePairElement(doc,
1449	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1450	service.appendChild(displayItem);
1451
1452	displayItem = createNameValuePairElement(doc,
1453	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1454	"Browse pre-defined classification hierarchies");
1455	service.appendChild(displayItem);
1456
1457	// now need a classifierList
1458	Element classifierList = doc.createElement(
1459	GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1460
1461	int classifierNum = 1;
1462	// append a <classifier content="some letter" name="CL#">
1463	// for each letter of the alphabet:
1464	Element classifier = createClassifierElement(doc, "TitleByLetter",
1465	classifierNum++, "titles by letter", "Browse titles by letter");
1466	// now add this <classifier> to the <classifierList>
1467	classifierList.appendChild(classifier);
1468
1469	// ANY MORE CLASSIFIERS? ADD THEM HERE
1470
1471	service.appendChild(classifierList);
1472	} // ELSE check for whether it is a query service
1473	else if(serviceName.toLowerCase().contains("query")) {
1474	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1475	if(serviceName.equals("TextQuery")) {
1476	describeTextQueryService(service);
1477	} else if(serviceName.equals("FieldQuery")) {
1478	describeFieldQueryService(service);
1479	}
1480	}
1481
1482	// don't forget to add the type attribute to the service!
1483	service.setAttributeNode(attribute);
1484
1485	String from = serviceName;
1486
1487	Element responseMsg = createResponseMessage(doc, service, null,
1488	GSXML.REQUEST_TYPE_DESCRIBE, from);
1489	try{
1490	return FedoraCommons.elementToString(responseMsg);
1491	}catch(TransformerException e) {
1492	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1493	+ " " + e;
1494	}
1495	}
1496
1497	/** Appends children to the parameter service Element that make the
1498	* final service Element into a describe response XML for FedoraGS3's
1499	* TextQuery service.
1500	* @param service is the service Element that is being filled out. */
1501	protected void describeTextQueryService(Element service) {
1502	Document doc = service.getOwnerDocument();
1503	// we need name, submit (button) and description <displayItem> elements
1504	Element displayItem = createNameValuePairElement(doc,
1505	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1506	"Text Search");
1507	service.appendChild(displayItem);
1508
1509	displayItem = createNameValuePairElement(doc,
1510	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1511	service.appendChild(displayItem);
1512
1513	displayItem = createNameValuePairElement(doc,
1514	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1515	"Title and full-text search service");
1516	service.appendChild(displayItem);
1517
1518	//create the <paramList>
1519	Element paramList = doc.createElement(
1520	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1521
1522	// we ignore granularity to search at: it will always be
1523	// document and section level
1524	// we ignore casefolding: always on (that is, case is irrelevant)
1525	// we ignore document display order: always ranked
1526
1527	// Constructing the following:
1528	// <param default="100" name="maxDocs" type="integer">
1529	// <displayItem name="name">Maximum hits to return</displayItem>
1530	// </param>
1531	Element param = doc.createElement(GSXML.PARAM_ELEM);
1532
1533	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1534	attribute.setValue(MAXDOCS);
1535	param.setAttributeNode(attribute);
1536
1537	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1538	attribute.setValue("100");
1539	param.setAttributeNode(attribute);
1540
1541	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1542	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1543	param.setAttributeNode(attribute);
1544
1545	displayItem = createNameValuePairElement(doc,
1546	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1547	"Maximum hits to return");
1548	param.appendChild(displayItem);
1549
1550	paramList.appendChild(param);
1551
1552	// Constructing the following:
1553	// <param name="query" type="string">
1554	// <displayItem name="name">Query string</displayItem>
1555	// </param>
1556	param = doc.createElement(GSXML.PARAM_ELEM);
1557
1558	attribute = doc.createAttribute(GSXML.NAME_ATT);
1559	attribute.setValue(QUERY);
1560	param.setAttributeNode(attribute);
1561
1562	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1563	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1564	param.setAttributeNode(attribute);
1565
1566	displayItem = createNameValuePairElement(doc,
1567	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1568	"Query string");
1569	param.appendChild(displayItem);
1570
1571	paramList.appendChild(param);
1572
1573	service.appendChild(paramList);
1574	}
1575
1576	/** Appends children to the parameter service Element that make the
1577	* final service Element into a describe response XML for FedoraGS3's
1578	* FieldQuery service.
1579	* @param service is the service Element that is being filled out. */
1580	protected void describeFieldQueryService(Element service) {
1581	Document doc = service.getOwnerDocument();
1582	// we need name, submit (button) and description <displayItem> elements
1583	Element displayItem = createNameValuePairElement(doc,
1584	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1585	"Form Search");
1586	service.appendChild(displayItem);
1587
1588	displayItem = createNameValuePairElement(doc,
1589	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1590	service.appendChild(displayItem);
1591
1592	displayItem = createNameValuePairElement(doc,
1593	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1594	"Simple fielded search");
1595	service.appendChild(displayItem);
1596
1597	//create the <paramList>
1598	Element paramList = doc.createElement(
1599	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1600
1601	// we ignore granularity to search at: it will always be
1602	// document and section level
1603	// we ignore casefolding: always on (that is, case is irrelevant)
1604	// we ignore document display order: always ranked
1605
1606	// Constructing the following:
1607	// <param default="100" name="maxDocs" type="integer">
1608	// <displayItem name="name">Maximum hits to return</displayItem>
1609	// </param>
1610	Element param = doc.createElement(GSXML.PARAM_ELEM);
1611
1612	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1613	attribute.setValue(MAXDOCS);
1614	param.setAttributeNode(attribute);
1615
1616	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1617	attribute.setValue("100");
1618	param.setAttributeNode(attribute);
1619
1620	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1621	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1622	param.setAttributeNode(attribute);
1623
1624	displayItem = createNameValuePairElement(doc,
1625	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1626	"Maximum hits to return");
1627	param.appendChild(displayItem);
1628
1629	paramList.appendChild(param);
1630
1631	// Constructing the following:
1632	// <param name="simpleField" occurs="4" type="multi">
1633	// <displayItem name="name"></displayItem>
1634	//
1635	// <param name="query" type="string">
1636	// <displayItem name="name">Word or phrase </displayItem>
1637	// </param>
1638	//
1639	// <param default="allFields" name="fieldname" type="enum_single">
1640	// <displayItem name="name">in field</displayItem>
1641	//
1642	// <option name="docTitles">
1643	// <displayItem name="name">document titles</displayItem>
1644	// </option>
1645	// <option name="allTitles">
1646	// <displayItem name="name">document and section titles</displayItem>
1647	// </option>
1648	// <option name="fullText">
1649	// <displayItem name="name">full text</displayItem>
1650	// </option>
1651	// <option name="all">
1652	// <displayItem name="name">titles and full text</displayItem>
1653	// </option>
1654	// <option name="">
1655	// <displayItem name="name"></displayItem>
1656	// </option>
1657	// </param>
1658	// </param>
1659	Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1660	attribute = doc.createAttribute(GSXML.NAME_ATT);
1661	attribute.setValue(SIMPLEFIELD_ATT);
1662	rowOfParams.setAttributeNode(attribute);
1663
1664	// we want the row of controls to occur multiple times
1665	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1666	attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1667	rowOfParams.setAttributeNode(attribute);
1668
1669	attribute = doc.createAttribute(OCCURS_ATT);
1670	attribute.setValue("4"); // we want this row to occur 4 times
1671	rowOfParams.setAttributeNode(attribute);
1672
1673	// <param name="query" type="string">
1674	// <displayItem name="name">Word or phrase </displayItem>
1675	// </param>
1676	param = doc.createElement(GSXML.PARAM_ELEM);
1677
1678	attribute = doc.createAttribute(GSXML.NAME_ATT);
1679	attribute.setValue(QUERY);
1680	param.setAttributeNode(attribute);
1681
1682	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1683	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1684	param.setAttributeNode(attribute);
1685
1686	displayItem = createNameValuePairElement(doc,
1687	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1688	"Word or phrase");
1689	param.appendChild(displayItem);
1690	rowOfParams.appendChild(param);
1691
1692	// <param default="allFields" name="fieldName" type="enum_single">
1693	// <displayItem name="name">in field</displayItem>
1694	param = doc.createElement(GSXML.PARAM_ELEM);
1695	attribute = doc.createAttribute(GSXML.NAME_ATT);
1696	attribute.setValue(FIELDNAME_ATT);
1697	param.setAttributeNode(attribute);
1698
1699	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1700	attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1701	param.setAttributeNode(attribute);
1702
1703	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1704	attribute.setValue(ALL_FIELDS);
1705	param.setAttributeNode(attribute);
1706
1707	displayItem = createNameValuePairElement(doc,
1708	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1709	"in field");
1710	param.appendChild(displayItem);
1711
1712	String[] searchFieldNames
1713	= {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1714	String[] searchFieldDisplay = {"all titles and full-text",
1715	"document titles only", "document and section titles",
1716	"full-text only"};
1717
1718	// for each fieldName create an option element and insert
1719	// the option into the enum_multi drop-down param:
1720	// <option name="fieldName">
1721	// <displayItem name="name">fieldName</displayItem>
1722	// </option>
1723	for(int i = 0; i < searchFieldNames.length; i++) {
1724	Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1725	attribute = doc.createAttribute(GSXML.NAME_ATT);
1726	attribute.setValue(searchFieldNames[i]);
1727	option.setAttributeNode(attribute);
1728
1729	displayItem = createNameValuePairElement(doc,
1730	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1731	searchFieldDisplay[i]);
1732	option.appendChild(displayItem);
1733	param.appendChild(option); // add option to the drop-down box
1734	}
1735
1736	rowOfParams.appendChild(param);
1737	paramList.appendChild(rowOfParams);
1738	service.appendChild(paramList);
1739	}
1740
1741	/**
1742	* @return a GS3 describe response message for the requested service
1743	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1744	* return nothing special except their names; browse (and any query)
1745	* return more complex XML responses.
1746	* All collections in this Digital Library (Fedora Repository) share
1747	* the same services, so this method returns the same as
1748	* describeService(serviceName).
1749	* @param collectionName - the name of the collection whose service is to
1750	* be described. It will be converted to a fedora collection pid, which is of
1751	* the form "greenstone:<collectionName>-collection".
1752	* @param serviceName - the name of the service in the collection which is to
1753	* be described. */
1754	public String describeCollectionService(String collectionName,
1755	String serviceName) {
1756	// collectionName can be ignored, because all services are FedoraGS3
1757	// services and are not unique to any particular (greenstone) collection.
1758	return describeService(serviceName);
1759	}
1760
1761	/** This method performs the implemented browse operation: allowing the
1762	* user to browse the titles of documents in the given collection by letter
1763	* and returning the results.
1764	* @param collectionName is the name of the collection whose documents
1765	* starting with the given letter will be returned.
1766	* @param classifierIDs are the ids of the classifiers on which to browse. In
1767	* this case, the classifier indicates whether we browse titles by letter, or
1768	* browse (documents) by collection; and it is of the form <CL(letter)>.
1769	* @param structures - the requested browse substructure. Can be any combination
1770	* of ancestors, parent, siblings, children, descendants.
1771	* @param infos - the requested structural info. Can be numSiblings,
1772	* siblingPosition, numChildren.
1773	* @return a GS3 ClassifierBrowse response message which lists all
1774	* the documents that start with the letter indicated by parameter classifier.
1775	*/
1776	public String browse(String collectionName, String[] classifierIDs,
1777	String[] structures, String[] infos)
1778	{
1779	// Construct one string from the structures and structural info arrays
1780	String structure = "";
1781	String info = "";
1782	for(int i = 0; i < structures.length; i++) {
1783	structure = structure + structures[i] + "\|";
1784	}
1785	for(int i = 0; i < infos.length; i++) {
1786	info = info + infos[i] + "\|";
1787	}
1788
1789	Document doc = builder.newDocument();
1790	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1791
1792	// <classifierNodeList>
1793	Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1794
1795	for(int i = 0; i < classifierIDs.length; i++) {
1796	if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1797	browseTitlesByLetterClassifier(doc, classifierNodeList,
1798	collectionName, classifierIDs[i],
1799	structure, info);
1800	}
1801	}
1802
1803	Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1804	GSXML.REQUEST_TYPE_PROCESS, /collectionName+/ /"ClassifierBrowse");
1805	try {
1806	return FedoraCommons.elementToString(responseMsg);
1807	} catch(TransformerException e) {
1808	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1809	+ " " + e;
1810	}
1811	}
1812
1813	/** CL1 browsing classifier: browsing titles by starting letter.
1814	* The browsing structure is retrieved.
1815	* @param doc - the document object that will contain the CL1 browsing structure.
1816	* @param classifierNodeList - the classifiers will be added to this nodeList.
1817	* @param collectionName - name of the collection through which we are browsing CL1.
1818	* @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1819	* a letter.
1820	* @param structure - the requested browse substructure. Can be any combination of
1821	* ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1822	* @param info - the requested structural info. Can be numSiblings, siblingPosition,
1823	* numChildren.
1824	* @return the classifierNodeList with the CL1 classifier browse structure.
1825	*/
1826	public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1827	String collectionName, String classifierID,
1828	String structure, String info)
1829	{
1830	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1831
1832	if(structure.indexOf("entire") != -1) {
1833	structure = structure + "ancestors\|descendants";
1834	}
1835
1836	// Structure of ancestors and children only at this stage
1837	int firstLevel = classifierID.indexOf('.');
1838	int secondLevel = classifierID.lastIndexOf('.');
1839
1840	// <nodeStructure>
1841	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1842
1843	// requested classifier node
1844	Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1845	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1846	attribute.setValue(classifierID);
1847	classNode.setAttributeNode(attribute);
1848
1849	if(firstLevel == -1) { // CL1 - toplevel node
1850	Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1851
1852	classifierNodeList.appendChild(classNode);
1853	classNode.appendChild(nodeStructure);
1854	nodeStructure.appendChild(root);
1855	root.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1856
1857	if(structure.indexOf("descendants") != -1) {
1858	getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1859	} else if(structure.indexOf("children") != -1) {
1860	getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1861	}
1862	// nothing to be done for siblings
1863	}
1864	else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1865
1866	if(structure.indexOf("parent") != -1
1867	\|\| structure.indexOf("ancestors") != -1
1868	\|\| structure.indexOf("siblings") != -1) {
1869	String toplevelID = classifierID.substring(0, firstLevel);
1870	Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1871	attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1872	attribute.setValue(toplevelID);
1873	toplevelNode.setAttributeNode(attribute);
1874	Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1875
1876	classifierNodeList.appendChild(toplevelNode);
1877	toplevelNode.appendChild(nodeStructure);
1878	nodeStructure.appendChild(node);
1879	node.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1880
1881	if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1882	getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1883	// pass the requested node (classNode) so that it is attached in the correct
1884	// location among its siblings, and to ensure that it is not recreated.
1885	// getTitlesByLetterStructure() will append classNode to node
1886	} else {
1887	node.appendChild(classNode);
1888	classNode.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1889	}
1890	} else {
1891	Element node = (Element)classNode.cloneNode(true);
1892	classifierNodeList.appendChild(node);
1893	node.appendChild(nodeStructure);
1894	nodeStructure.appendChild(classNode);
1895	classNode.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
1896	}
1897
1898	int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1899	char ch = (char)(num - 1 + 'A');
1900	if(structure.indexOf("descendants") != -1) {
1901	getTitlesForLetter(ch, collectionName, classNode, "descendants");
1902	} else if(structure.indexOf("children") != -1) {
1903	getTitlesForLetter(ch, collectionName, classNode, "children");
1904	}
1905	}
1906	else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1907	LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1908	}
1909
1910	return classifierNodeList;
1911	}
1912
1913	/** Creates a (CL1) subclassifier element for the docs whose titles start with
1914	* the given letter.
1915	* @param ch - the starting letter of the document titles to retrieve.
1916	* @param collectionName - name of the collection through which we are browsing CL1.
1917	* @param classifierNode - the docNodes found will be appended to this node.
1918	* @param depthStructure - can be descendants or children. Specifies what to retrieve:
1919	* gets descendants of any documents found, otherwise gets just the children.
1920	* @return the given classifierNode which will have the child (or descendant) documents
1921	* appended to it.
1922	*/
1923	public Element getTitlesForLetter(char ch, String collectionName,
1924	Element classifierNode, String depthStructure)
1925	{
1926	Document doc = classifierNode.getOwnerDocument();
1927	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1928
1929
1930	// Retrieve the document structure for each subClassifierID:
1931	// all the documents that begin with its letter.
1932	String letter = String.valueOf(ch);
1933	try {
1934	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1935	if(docPIDs.length == 0) {
1936	return classifierNode; // skip letters that don't have any kids
1937	}
1938
1939	for(int i = 0; i < docPIDs.length; i++) {
1940	// work out the document's fedora PID and section ID
1941	String sectionID = getSectionIDFromDocID(docPIDs[i]);
1942	String docPID = getDocPIDFromDocID(docPIDs[i]);
1943
1944	// get the required section, along with children or descendants
1945	Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1946
1947	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1948	Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1949
1950	// fills in the subtree of the rootNode in our nodeStructure element
1951	createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1952	classifierNode.appendChild(docRootNode);
1953	}
1954	} catch(Exception e) {
1955	ex = new FedoraGS3RunException(e);
1956	ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1957	}
1958
1959	return classifierNode;
1960	}
1961
1962
1963	/** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1964	* starting letter of the alphabet. X is each letter of the alphabet for which there
1965	* are matching document titles.
1966	* @param collectionName - name of the collection through which we are browsing CL1.
1967	* @param classifierNode - the docNodes found will be appended to this node.
1968	* @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1969	* the IDs for the subclassifiers (CL.x).
1970	* @param getDescendants - if true, get descendants of any documents found, otherwise
1971	* get just the children.
1972	* @param wantedSibling - the node (already created) whose siblings are requested. We
1973	* need to make sure not to recreate this node when creating its sibling nodes.
1974	* @return the given classifierNode, with the CL.x subclassifiers for the letters of
1975	* the alphabet that are represented in the document titles.
1976	*/
1977	public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1978	String classifierID, boolean getDescendants,
1979	Element wantedSibling)
1980	{
1981	String ID = "";
1982	if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1983	ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1984	}
1985
1986	Document doc = classifierNode.getOwnerDocument();
1987	FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1988
1989	// We're going to loop to the end of the alphabet
1990	int count = 1;
1991	for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1992	// Retrieve the document structure for each subClassifierID:
1993	// all the documents that begin with its letter.
1994	String letter = String.valueOf(ch);
1995	try {
1996	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1997	if(docPIDs.length == 0) {
1998	continue; // skip letters that don't have any kids
1999	}
2000	Element subClassifier = null;
2001	if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
2002	// already have the requested node, don't recreate it
2003	subClassifier = wantedSibling;
2004	} else {
2005	// <classifierNode childType="VList" nodeID="CL1.x">
2006	subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
2007	subClassifier.setAttribute(GSXML.CHILD_TYPE_ATT, GSXML.VLIST);
2008	subClassifier.setAttribute(GSXML.NODE_ID_ATT, classifierID+"."+count);
2009	subClassifier.setAttribute(GSXML.CLASSIFIER_STYLE_ATT, GSXML.VLIST);
2010	}
2011	classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
2012
2013	if(getDescendants) { // get the documents
2014
2015	// append the <docNodes> for the docPIDs found as children
2016	// of subclassifier
2017
2018	for(int i = 0; i < docPIDs.length; i++) {
2019	// work out the document's fedora PID and section ID
2020	String sectionID = getSectionIDFromDocID(docPIDs[i]);
2021	String docPID = getDocPIDFromDocID(docPIDs[i]);
2022
2023	// get the required section, along with children or descendants
2024	Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2025
2026	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2027	Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2028
2029	// fills in the subtree of the rootNode in our nodeStructure element
2030	createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2031	subClassifier.appendChild(rootNode);
2032	}
2033	}
2034	} catch(Exception e) {
2035	ex = new FedoraGS3RunException(e);
2036	ex.setSpecifics("requested portion of TOC file or "
2037	+ "trouble with fielded search ");
2038	}
2039	}
2040	return classifierNode;
2041	}
2042
2043
2044	/** This method performs something equivalent to a greenstone3
2045	* ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2046	* @param classNodeIDs array of classifierNode IDs for which the metadata
2047	* needs to be returned.
2048	* @param metafields are the classifier metadata fields that are to be returned.
2049	* At present this method ignores them/pretends the requested metafields are
2050	* "all" and always returns the Title meta for the requested classifier nodes
2051	* (because that is all the metadata this Fedora classifier has at present).
2052	* @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2053	* lists the metadata for all the classifierNodes passed as parameter.*/
2054	public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2055	{
2056	Document doc = this.builder.newDocument();
2057	// <classifierNodeList>
2058	Element classifierNodeList = doc.createElement(
2059	GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2060
2061	// create <classifierNode><metadataList><metadata>s
2062	// </metadataList></classifierNode> for all letters of the alphabet
2063	for(int i = 0; i < classNodeIDs.length; i++) {
2064	// strip ID of everything before the first '.' (i.e. remove "CL#.")
2065	int index = classNodeIDs[i].indexOf('.');
2066	String subClassifierNumber = classNodeIDs[i].substring(index+1);
2067	index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2068	if(index != -1) {
2069	subClassifierNumber = subClassifierNumber.substring(0, index);
2070	}
2071	int subClassifierNum = Integer.parseInt(subClassifierNumber);
2072	String classifierName = "";
2073	if(subClassifierNum == 0) { // no document titles started with a letter
2074	classifierName = "A-Z";
2075	} else {
2076	char letter = (char)('A' + subClassifierNum - 1); // A = 1
2077	classifierName = String.valueOf(letter);
2078	}
2079
2080	// <classifierNode nodeID="CL#.subNum">
2081	Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2082	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2083	attribute.setValue(classNodeIDs[i]);
2084	classifierNode.setAttributeNode(attribute);
2085
2086	// <metadataList>
2087	Element metadataList = doc.createElement(
2088	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2089
2090	// at least one metadata element: that of the title of this
2091	// classifierNode:
2092	// <metadata name="Title">letter</metadata>
2093	Element metadata = this.createNameValuePairElement(doc,
2094	GSXML.METADATA_ELEM, "Title", classifierName);
2095
2096	// now connect up everything
2097	metadataList.appendChild(metadata);
2098	classifierNode.appendChild(metadataList);
2099	classifierNodeList.appendChild(classifierNode);
2100	}
2101
2102	Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2103	GSXML.REQUEST_TYPE_PROCESS, //collName +
2104	"ClassifierBrowseMetadataRetrieve");
2105	try{
2106	return FedoraCommons.elementToString(responseMsg);
2107	}catch(TransformerException e) {
2108	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2109	+ " " + e;
2110	}
2111	}
2112
2113	/** @return a newly created element of the following format:
2114	* <classifier content="somecontent" name="CL+num">
2115	* <displayItem name="name">someClassifierName</displayItem>
2116	* <displayItem name="description">Browse by classifier name</displayItem>
2117	* </classifier>
2118	* @param doc - the document used to create the element
2119	* @param content - value of the content attribute
2120	* @param classifierNum - the number suffixed to the CL, together forming
2121	* the classifier Node's ID
2122	* @param displayNameVal is the bodytext of a named displayItem element
2123	* @param displayDescrVal is the bodytext of a displayItem element with
2124	* description */
2125	protected Element createClassifierElement(Document doc, String content,
2126	int classifierNum, String displayNameVal, String displayDescrVal)
2127	{
2128	final String CL = "CL";
2129	Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2130	// content attribute
2131	Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2132	att.setValue(content);
2133	classifier.setAttributeNode(att);
2134	// name attribute
2135	att = doc.createAttribute(GSXML.NAME_ATT);
2136	att.setValue(CL + classifierNum);
2137	classifier.setAttributeNode(att);
2138
2139	// now create the displayItem children for classifier:
2140	// <displayItem name="name">#letter</displayItem>
2141	// <displayItem name="description">Browse titles starting with #letter</displayItem>
2142	Element displayItem = createNameValuePairElement(doc,
2143	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2144	classifier.appendChild(displayItem);
2145	displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2146	GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2147	classifier.appendChild(displayItem);
2148
2149	return classifier;
2150	}
2151
2152
2153	/** @return a newly created element of the following format:
2154	* <elementName name="somename">"some display value"</elementName>
2155	* @param doc - the document used to create the element
2156	* @param elementName - the tag name
2157	* @param name - value of attribute name
2158	* @param value - the body text of the element */
2159	protected Element createNameValuePairElement(Document doc, String elementName,
2160	String name, String value) {
2161	// <elementName name="somename">"some display value"</elementName>
2162	Element element = doc.createElement(elementName);
2163	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2164	attribute.setValue(name);
2165	element.setAttributeNode(attribute);
2166
2167	element.appendChild(doc.createTextNode(value));
2168	return element;
2169	}
2170
2171	/**
2172	* @param collection is the collection to search in
2173	* @param query is the query term to search for. It won't specify the
2174	* indexed field to search in, which will mean that GSearch will
2175	* search all default indexed fields.
2176	* @param maxDocs is the maximum number of results to return (which
2177	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2178	*/
2179	public String[] textQuery(String collection, String query,
2180	int maxDocs)
2181	throws Exception
2182	{
2183	// no need to search there is no query or query is empty spaces
2184	if(query.trim().equals(""))
2185	return new String[]{};
2186
2187	// QUERY value won't specify indexed field to search, Fedora
2188	// Gsearch will take that as meaning all default indexed fields.
2189	// Params to search() method below: string of fielded query terms;
2190	// hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2191	query = query + " " + "PID" + COLON + GREENSTONE;
2192
2193	String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2194	// now we have the XML returned by FedoraGSearch, get the pids
2195	// of the documents returned (if any)
2196	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2197	collection, searchResult);
2198	return pids;
2199	}
2200
2201	/**
2202	* This method performs a fieldquery, searching for x number of phrases
2203	* in each of the 4 indexed fields.
2204	* @param collection is the collection to search in
2205	* @param nameValParamsMap is a Map of several(key, value) entries,
2206	* 4 of which we're concerned with here:
2207	* - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2208	* - the values are a comma separated list of terms (phrases or single
2209	* words) to search that field in. There may be more than 1 or
2210	* there may be none (in which case there may be N empty values or
2211	* spaces separated by commas).
2212	* @param maxDocs is the maximum number of results to return (which
2213	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2214	* */
2215	public String[] fieldQuery(String collection, Map nameValParamsMap,
2216	int maxDocs)
2217	throws Exception
2218	{
2219	// we're going to maintain a list of UNIQUE pids that were returned
2220	// in search results. Hence we use Set:
2221	java.util.Set set = new java.util.HashSet();
2222
2223	// (1) Use Fedora's search to search document titles, if they were
2224	// specified:
2225	String[] docTitlepids = {};
2226
2227	String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2228	if(docTitleTerms != null) { // no doc titles may have been specified
2229	String[] phrases = docTitleTerms.split(COMMA);
2230
2231	// search the individual phrases first:
2232	for(int i = 0; i < phrases.length; i++) {
2233	if(phrases.equals("") \|\| phrases.equals(" "))
2234	continue; //skip when there are no terms
2235	docTitlepids = this.searchDocumentTitles(
2236	collection, phrases[i], false);
2237	for(int j = 0; j < docTitlepids.length; j++)
2238	set.add(docTitlepids[j]);
2239	}
2240	}
2241	// (2) use FedoraGSearch to search doc AND section titles, and
2242	// fulltext (in case these were specified in nameValParamsMap):
2243	String searchResult = this.fedoraGSearch.search(
2244	nameValParamsMap, 1, maxDocs);
2245
2246	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2247	collection, searchResult);
2248
2249	for(int i = 0; i < pids.length; i++)
2250	set.add(pids[i]);
2251
2252	pids = null;
2253	pids = new String[set.size()];
2254	set.toArray(pids); // unique pids
2255	return pids;
2256	}
2257
2258	/** @return a String representing Greenstone3 XML for a query process
2259	* response returning the results for the query denoted by parameter
2260	* nameValParamsMap.
2261	* @param nameValParamsMap is a Hashmap of name and value pairs for all the
2262	* query field data values. The names match the field names that
2263	* describeCollectionService() would have returned for the query service.
2264	* @param collection is the name of the collection
2265	* @param service is the name of the query service
2266	* This method is only ever called when any of the services in the digital
2267	* library described themselves as type=query. Therefore any digital
2268	* libraries that have no query services, can just return emtpy message
2269	* strings (or even "") since this method will never be called on them
2270	* anyway. */
2271	public String query(String collection, String service,
2272	Map nameValParamsMap)
2273	{
2274	FedoraGS3RunException ex = null;
2275	// (1) obtain the requested number of maximum result documents
2276	int maxDocs = 100;
2277	try{
2278	maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2279	} catch(NumberFormatException e) {
2280	maxDocs = 100;
2281	}
2282
2283	String pids[] = {};
2284	// (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2285	if(service.endsWith("TextQuery")) {
2286	try {
2287	// get the Query field:
2288	String query = (String)nameValParamsMap.get(QUERY);
2289	pids = textQuery(collection, query, maxDocs);
2290	}
2291	catch(Exception e) {
2292	LOG.error("Error in TextQuery processing: " + e);
2293	ex = new FedoraGS3RunException(
2294	"When trying to use FedoraGenericSearch for a TextQuery", e);
2295
2296	}
2297	} else { // (3) FieldQuery
2298	// first get the comma-separated lists
2299	String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2300	String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2301	// both are comma separated lists, so split both on 'comma'
2302	String[] fieldNames = listOfFieldNames.split(COMMA);
2303	String[] searchTerms = listOfSearchTerms.split(COMMA);
2304
2305	// In the fieldNames and searchTerms lists of nameValParamsMap,
2306	// each searchTerm element was matched with its correspondingly
2307	// indexed fieldName.
2308	// A new map is going to reorganise this, by putting all terms
2309	// for a particular fieldName together in a comma separated list
2310	// and associating that with the fieldName. I.e. (key, value) ->
2311	// (fieldName, comma-separated list of all terms in that field)
2312	Map map = new HashMap();
2313	for(int i = 0; i < searchTerms.length; i++) {
2314	// there may be fewer searchTerms than fieldNames (since some
2315	// fieldNames may have been left empty), so loop on searchTerms
2316	if(map.containsKey(fieldNames[i])) { // fieldName is already
2317	// in the list, so append comma with new value
2318	String termsList = (String)map.get(fieldNames[i]);
2319	termsList = termsList + COMMA + searchTerms[i];
2320	map.put(fieldNames[i], termsList);
2321	} else { // this is the first time this fieldName occurred
2322	// just put the fieldName with searchTerm as-is
2323	map.put(fieldNames[i], searchTerms[i]);
2324	}
2325	}
2326
2327	try {
2328	// For fieldquery, we search on all the fieldNames specified
2329	// - if DOC_TITLES is specified then we use Fedora's search
2330	// - for all other fieldNames specified, we use FedoraGSearch
2331	pids = fieldQuery(collection, map, maxDocs);
2332	}
2333	catch(Exception e) {
2334	LOG.error("Error in FieldQuery processing: " + e);
2335	ex = new FedoraGS3RunException(
2336	"When trying to use FedoraGenericSearch for a FieldQuery", e);
2337	}
2338	}
2339
2340	// Build Greenstone XML Query response message from
2341	// the pids (which should be document identifiers)
2342	Document doc = builder.newDocument();
2343	// <metadataList><metadata name="numDocsMatched" value="n" />
2344	// </metadataList>
2345	Element metadataList = doc.createElement(
2346	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2347	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2348	metadata.setAttribute(GSXML.NAME_ATT, NUM_DOCS_MATCHED);
2349	metadata.setAttribute(GSXML.VALUE_ATT, Integer.toString(pids.length));
2350	metadataList.appendChild(metadata);
2351
2352	metadata = doc.createElement(GSXML.METADATA_ELEM);
2353	metadata.setAttribute(GSXML.NAME_ATT, "numDocsReturned");
2354	metadata.setAttribute(GSXML.VALUE_ATT, Integer.toString(pids.length));
2355	metadataList.appendChild(metadata);
2356
2357	// <documentNodeList>
2358	// <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2359	// docType='hierarchy' nodeType="leaf" />
2360	// ...
2361	// ...
2362	// </documentNodeList>
2363	Element docNodeList = doc.createElement(
2364	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2365	// for each
2366	for(int i = 0; i < pids.length; i++) {
2367	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2368	docNode.setAttribute(GSXML.NODE_ID_ATT, pids[i]);
2369	docNode.setAttribute(GSXML.DOC_TYPE_ATT, "hierarchy");
2370	docNode.setAttribute(GSXML.NODE_TYPE_ATT, "root");
2371	docNode.setAttribute(GSXML.NODE_RANK_ATT, "NaN");
2372
2373	docNodeList.appendChild(docNode);
2374	}
2375
2376	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2377	GSXML.REQUEST_TYPE_PROCESS, service);
2378
2379	//docNodeList.getParentNode().appendChild(metadataList); // need to add term info
2380
2381	try{
2382	return FedoraCommons.elementToString(responseMsg);
2383	}catch(TransformerException e) {
2384	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2385	+ " " + e;
2386	}
2387	}
2388
2389
2390	// FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2391	/** Given a URL that represents a fedoraPID, will look up the object.
2392	* If it exists, it will return the contents of the DC:Title of its datastream.
2393	* If it doesn't exist, it will return the URL as-is.
2394	* @param URL: the URL that (after modification) represents a fedoraPID to look up.
2395	* @param collection: the name of collection in which to search for the URL
2396	* representing a fedoraPID.
2397	* @return the string (representing a fedoraPID) stored in the DC:Title of the
2398	* URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2399	* then the parameter URL is returned.
2400	*/
2401	public String getPIDforURL(String url, String collection) {
2402	FedoraGS3RunException ex = null; // any RemoteException
2403
2404	// (1) convert url to the fedorapid
2405	// / -> _ and : -> -
2406	String fedoraPID = url.replaceAll("/", "_");
2407	fedoraPID = fedoraPID.replaceAll(":", "-");
2408	// prefix "greenstone-http:<colname>-" to the fedoraPID
2409	fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2410	//LOG.error("### fedoraPID: " + fedoraPID);
2411
2412	// (2) Look up the datastream for the fedorapid
2413	String dcTitle = "";
2414	try {
2415	dcTitle = getDCTitle(fedoraPID);
2416	} catch(Exception e) {
2417	LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2418	ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2419	}
2420	//String dc = this.getDC(fedoraPID);
2421	//LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2422
2423	// (3) if fedorapid exists, extract the dc:title content.
2424	// if it doesn't exist, return url
2425	if(dcTitle.equals("")) {
2426	return url;
2427	} else {
2428	// It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2429	//return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2430	return dcTitle+"-1";
2431	}
2432	}
2433
2434	public static void main(String args[]) {
2435	try{
2436	// testing default constructor
2437	//FedoraGS3Connection con = new FedoraGS3Connection();
2438
2439	// testing constructor that takes properties file to show initial
2440	// fedora server values
2441	java.io.File propertyFilename
2442	= new java.io.File("fedoraGS3.properties");
2443	FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2444
2445	// DESCRIBE: serviceList, collectionList
2446	System.out.println("serviceList:\n" + con.getServiceList());
2447
2448	System.out.println("collectionList:\n" + con.getCollectionList());
2449
2450	String[] colPIDs = con.getCollections();
2451	String[] collectionNames = con.getCollectionNames(con.getCollections());
2452
2453
2454	for(int i = 0; i < collectionNames.length; i++) {
2455	System.out.println("Describing collections:\n");
2456	System.out.println(con.describeCollection(collectionNames[i]));
2457	System.out.println("Describing collection services:\n"
2458	+ con.describeCollectionServices(collectionNames[i]));
2459	}
2460
2461	String[] serviceNames = con.getServiceNames();
2462	for(int i = 0; i < serviceNames.length; i++) {
2463	System.out.println("Describing " + serviceNames[i] + ":\n"
2464	+ con.describeCollectionService("demo", serviceNames[i]));
2465	}
2466
2467
2468	// TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2469	// along with EX of the top-level document:
2470	System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2471	System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2472
2473
2474	String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2475	System.out.println("\nGET CONTENT:");
2476	for(int i = 0; i < docIDs.length; i++) {
2477	System.out.println(con.getContent(docIDs[i]));
2478	}
2479
2480	System.out.println("\nGET META:");
2481	for(int i = 0; i < docIDs.length; i++) {
2482	System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2483	}
2484
2485	String[] getTitlesFor = {
2486	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2487	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2488	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2489	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2490	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2491	};
2492
2493	// first let's display the regular meta for top-level docs and
2494	// their sections
2495	for(int i = 0; i < getTitlesFor.length; i++) {
2496	System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2497	}
2498
2499	System.out.println("\nTitles are:");
2500	System.out.println(con.getTitleMetadata(getTitlesFor));
2501
2502	System.out.println("\nGET STRUCTURE:");
2503	for(int i = 0; i < docIDs.length; i++) {
2504	System.out.println("Descendents and numChildren:\n"
2505	+ con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2506	System.out.println("Parent and numSiblings:\n"
2507	+ con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
2508	}
2509
2510	// TEST ERROR CASES:
2511	System.out.println("\nTESTING ERROR CASES");
2512	System.out.println(con.getContent("greenstone:demo-pinky"));
2513	String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2514	"greenstone:demo-pinky" };
2515	System.out.println(con.getContent(errorCases));
2516	System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2517	System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2518
2519	System.out.println("\nCLASSIFIER BROWSE");
2520	System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2521	new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2522
2523	System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2524	String[] classNodeIDs = new String[26];
2525	for(int i = 0; i < classNodeIDs.length; i++) {
2526	int subClassifierNum = i + 1;
2527	classNodeIDs[i] = "CL1." + subClassifierNum;
2528	}
2529	System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2530	classNodeIDs, new String[]{"all"}));
2531
2532	System.out.println("Testing query services");
2533	System.out.println("TEXT QUERY:");
2534	Map formControlValsMap = new HashMap();
2535	formControlValsMap.put(MAXDOCS, "100");
2536	formControlValsMap.put(QUERY, "snails");
2537	String searchResponse
2538	= con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2539	System.out.println(searchResponse);
2540
2541	System.out.println("FIELD QUERY:");
2542	formControlValsMap.clear();
2543	formControlValsMap.put(MAXDOCS, "100");
2544	formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2545	formControlValsMap.put(FIELDNAME_ATT,
2546	"allFields,docTitles,allFields,allFields");
2547	searchResponse
2548	= con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2549	System.out.println(searchResponse);
2550
2551	System.exit(0);
2552	}catch(Exception e) {
2553	JOptionPane.showMessageDialog(
2554	null, e, "Error", JOptionPane.ERROR_MESSAGE);
2555	//System.err.println("ERROR: " + e);
2556	e.printStackTrace();
2557	}
2558	}
2559	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: