Context Navigation

source: other-projects/gs3-webservices-java-client/trunk/src/GS3Fedora/org/greenstone/fedora/services/FedoraGS3Connection.java@ 26270

Last change on this file since 26270 was 26270, checked in by ak19, 12 years ago
Now checks request for nodeStructureInfo documentType, as is needed to get it working with GS3 again. 2. Added in reusable constants of gsdl3/util/AbstractBasicDocument.java since these have now been made public constants.
File size: 105.9 KB

Line
1	/**
2	*#########################################################################
3	* FedoraGS3Connection.java - works with the demo-client for Greenstone 3,
4	* of the Greenstone digital library suite from the New Zealand Digital
5	* Library Project at the * University of Waikato, New Zealand.
6	* <BR><BR>
7	* Copyright (C) 2008 New Zealand Digital Library Project
8	* <BR><BR>
9	* This program is free software; you can redistribute it and/or modify
10	* it under the terms of the GNU General Public License as published by
11	* the Free Software Foundation; either version 2 of the License, or
12	* (at your option) any later version.
13	* <BR><BR>
14	* This program is distributed in the hope that it will be useful,
15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	* GNU General Public License for more details.
18	*########################################################################
19	*/
20
21	package org.greenstone.fedora.services;
22
23
24	import java.io.StringReader;
25
26	import org.apache.log4j.Logger;
27	import org.greenstone.fedora.services.FedoraGS3Exception.AuthenticationFailedException;
28	import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException;
29	import org.greenstone.fedora.services.FedoraGS3Exception.FedoraGS3RunException;
30	import org.greenstone.fedora.services.FedoraGS3Exception.NotAFedoraServerException;
31	import org.greenstone.gsdl3.util.AbstractBasicDocument; // for constants
32	import org.greenstone.gsdl3.util.GSXML;
33	import org.w3c.dom.Document;
34	import org.w3c.dom.Element;
35	import org.w3c.dom.Attr;
36	import org.w3c.dom.Text;
37	import org.w3c.dom.NodeList;
38	import org.w3c.dom.Node;
39	import org.xml.sax.InputSource;
40
41	import java.io.File;
42	import java.util.HashMap;
43	import java.util.Properties;
44	import java.util.Map;
45
46	import javax.swing.JOptionPane;
47
48	import org.xml.sax.SAXException;
49	import java.io.UnsupportedEncodingException;
50	import java.io.IOException;
51	import javax.net.ssl.SSLHandshakeException;
52	import java.net.ConnectException;
53	import java.net.MalformedURLException;
54	import java.rmi.RemoteException;
55	import javax.xml.parsers.ParserConfigurationException;
56	import javax.xml.transform.TransformerException;
57
58	/**
59	* Class that extends FedoraConnection in order to be able to use
60	* Fedora's web services to retrieve the specific datastreams of
61	* Greenstone documents stored in Fedora's repository. This class
62	* provides methods that convert those datastreams into Greenstone3
63	* XML response messages which are returned.
64	* @author ak19
65	*/
66	public class FedoraGS3Connection
67	extends FedoraConnection implements FedoraToGS3Interface,
68	FedoraToGS3Interface.Constants
69	{
70	/** The logging instance for this class */
71	private static final Logger LOG = Logger.getLogger(
72	FedoraGS3Connection.class.getName());
73
74	/** Default name of Fedora index */
75	private static final String DEFAULT_FEDORA_INDEX = "BasicIndex";
76
77	/** Complete list of services that are supported our FedoraGS3 would
78	* support if everything goes well. If a connection to FedoraGSearch
79	* cannot be established, the query services will no longer be
80	* available. The actual services supported are given by member
81	* variable serviceNames. */
82	protected static final String[] SERVICES = {
83	"DocumentContentRetrieve", "DocumentMetadataRetrieve",
84	"DocumentStructureRetrieve",
85	"TextQuery", "FieldQuery",
86	"ClassifierBrowse", "ClassifierBrowseMetadataRetrieve"
87	};
88
89	/** List of services actually supported by our FedoraGS3 repository
90	* after construction. If FedoraGenericSearch can't be connected to,
91	* then query services will not be offered */
92	protected String[] serviceNames;
93
94	/** The object used to connect to FedoraGenericSearch, which is used
95	* for doing full-text searching */
96	protected GSearchConnection fedoraGSearch;
97
98	/** The url for the wsdl file of FedoraGSearch's web services
99	* by default this will be the Fedora server's base URL
100	* concatenated to "gsearch/services/FgsOperations?wsdl" */
101	protected String gSearchWSDLURL;
102
103	/** The last part of the gSearchWSDL URL. The first part is
104	* the same as the fedora server's base url. */
105	protected String gSearchWSDLSuffix;
106
107	/** The name of the index that FedoraGSearch will index the GS3
108	* documents into. If no name is specified in the properties file,
109	* this will default to FedoraIndex. */
110	protected String gSearchIndexName;
111
112	/** 5 argument constructor is the same as that of superclass FedoraConnection:
113	* @param protocol can be either http or https
114	* @param host is the host where the fedora server is listening
115	* @param port is the port where the fedora server is listening
116	* @param fedoraServerUsername is the username for administrative
117	* authentication required to access the fedora server.
118	* @param fedoraServerPassword is the password for administrative
119	* authentication required to access the fedora server. If no password was set
120	* when installing Fedora, leave the field "".
121	* Instantiates a FedoraGS3Connection object which connects to Fedora's
122	* web services through stub classes and tries to connect to FedoraGSearch's
123	* web services through the default WSDL location for it
124	* ("gsearch/services/FgsOperations?wsdl"). If another url is to be used,
125	* call setGSearchWSDLURL(url) after the constructor instead.
126	*/
127	public FedoraGS3Connection(String protocol, String host, int port,
128	String fedoraServerUsername, String fedoraServerPassword)
129	throws ParserConfigurationException, MalformedURLException,
130	SSLHandshakeException, RemoteException, AuthenticationFailedException,
131	NotAFedoraServerException, ConnectException, Exception
132	{
133	super(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
134	// super() will call setInitialisationProperties(properties)
135	// And that will try to instantiate the GSearchConnection.
136	}
137
138	/** No-argument constructor which is the same as that of superclass
139	* FedoraConnection: it displays a small dialog requesting input for the
140	* host, port, administrative password and username of the fedora server.
141	* If no password was set on the fedora repository when installing it,
142	* the user can leave the password field blank. */
143	public FedoraGS3Connection()
144	throws ParserConfigurationException, MalformedURLException,
145	CancelledException, ConnectException, RemoteException,
146	SSLHandshakeException, Exception
147	{
148	super();
149	// super() will call setInitialisationProperties(properties)
150	// And that will try to instantiate the GSearchConnection.
151	}
152
153	/** Single-argument constructor which is the same as that of superclass
154	* FedoraConnection: it takes the name of the properties file where
155	* connection initialisation values may already be provided and then
156	* displays a small dialog requesting input for the host, port,
157	* administrative password and username of the fedora server showing
158	* the values in the properties file as default. If the necessary
159	* initialisation are not present in the file, the corresponding fields
160	* in the dialog will be blank.
161	* If no password was set on the fedora repository when installing it,
162	* the user can leave the password field blank. */
163	public FedoraGS3Connection(File propertiesFilename)
164	throws ParserConfigurationException, MalformedURLException,
165	CancelledException, ConnectException, RemoteException,
166	SSLHandshakeException, Exception
167	{
168	super(propertiesFilename);
169	// super() will call setInitialisationProperties(properties)
170	// And that will try to instantiate the GSearchConnection.
171	}
172
173	/** The superclass constructor calls this method passing any preset
174	* properties loaded from a propertiesFile. This method is overridden
175	* here in order to instantiate the gSearchConnection based on the
176	* - gSearchWSDLSuffix that will be appended to the fedora base url.
177	* (If one was not provided in the properties file, gSearchWSDLURL defaults
178	* to something of the form
179	* "http://<fedorahost:port>/fedoragsearch/services/FgsOperations?wsdl"
180	* which is the baseURL of fedora concatenated to the default gSearchWSDLSuffix,
181	* "gsearch/services/FgsOperations?wsdl".
182	* - name of the index into which the GS3 documents have been indexed
183	* and which FedoraGenericSearch should use to perform searches. If none is
184	* given in the properties file, then the index name defaults to "FedoraIndex".
185	* @param properties is the Properties Map loaded from a properties file
186	* (if there was any) which specifies such things as host and port of the
187	* FedoraServer, but can also specify the property "gsearch.wsdlURL.suffix".
188	* At the end of this method, properties' "gsearch.wsdlURL.suffix" will be set
189	* to whatever the final value of this.gSearchWSDLURL' suffix is, and
190	* "gsearch.indexName" will be set to to whatever the final value of
191	* this.gSearchIndexName is.
192	*/
193	protected void setInitialisationProperties(Properties properties)
194	throws ParserConfigurationException, MalformedURLException,
195	CancelledException, ConnectException, RemoteException,
196	SSLHandshakeException, Exception
197	{
198	super.setInitialisationProperties(properties);
199	// gsearchWSDL URL suffix, if not specified, defaults to
200	// "fedoragsearch/services/FgsOperations?wsdl" which is
201	// concatenated to the baseURL of fedora to give the gsearchWSDLURL.
202	this.gSearchWSDLSuffix = properties.getProperty(
203	"gsearch.wsdlURL.suffix", "gsearch/services/FgsOperations?wsdl");
204	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
205	// Set the property to whatever this.gSearchWSDLURL is now,
206	// so that it will be written out to the properties file again
207	properties.setProperty("gsearch.wsdlURL.suffix", this.gSearchWSDLSuffix);
208
209	// Similarly for the name of the index FedoraGenericSearch should use
210	// when performing searches for GS3 docs stored in Fedora's repository.
211	this.gSearchIndexName = properties.getProperty(
212	"gsearch.indexName", DEFAULT_FEDORA_INDEX); // default to FedoraIndex
213	properties.setProperty("gsearch.indexName", this.gSearchIndexName);
214	// Create a connection to FedoraGSearch's web services:
215	initSearchFunctionality();
216	}
217
218	/** Overridden init method to work with the 5 argument constructor, so that we can
219	* bypass using setInitialisationProperties() which works with a Properties map.
220	*/
221	protected void init(String protocol, String host, String port,
222	String fedoraServerUsername, String fedoraServerPassword)
223	throws ParserConfigurationException, MalformedURLException,
224	AuthenticationFailedException, RemoteException, Exception
225	{
226	super.init(protocol, host, port, fedoraServerUsername, fedoraServerPassword);
227	this.gSearchWSDLSuffix = "gsearch/services/FgsOperations?wsdl";
228	this.gSearchWSDLURL = this.baseURL + this.gSearchWSDLSuffix;
229	this.gSearchIndexName = DEFAULT_FEDORA_INDEX;
230	initSearchFunctionality();
231	}
232
233
234	/** Init method that instantiates a GSearchConnection object used
235	* to work with the separate FedoraGSearch web services.
236	* The url of the WSDL for FedoraGSearch's web services is worked out
237	* from the baseURL of the Fedora server.
238	*/
239	protected void initSearchFunctionality()
240	{
241	try {
242	this.fedoraGSearch = null;
243	this.fedoraGSearch = new GSearchConnection(
244	gSearchWSDLURL, gSearchIndexName);
245	this.serviceNames = SERVICES;
246	} catch(Exception e){
247	LOG.error("Cannot connect to FedoraGSearch's web services at "
248	+ gSearchWSDLURL + "\nQuery services will not be available.");
249	// If an exception occurs, something has gone wrong when
250	// trying to connect to FedoraGSearch's web services. This
251	// means, we can't offer query services, as that's provided
252	// by FedoraGSearch
253	serviceNames = null;
254	int countOfNonQueryServices = 0;
255	for(int i = 0; i < SERVICES.length; i++) {
256	// do not count query services
257	if(!SERVICES[i].toLowerCase().contains("query")) {
258	countOfNonQueryServices++;
259	}
260	}
261	// Services now supported are everything except Query services
262	serviceNames = new String[countOfNonQueryServices];
263	int j = 0;
264	for(int i = 0; i < SERVICES.length; i++) {
265	if(!SERVICES[i].toLowerCase().contains("query")) {
266	serviceNames[j] = SERVICES[i];
267	j++; // valid serviceName, so increment serviceName counter
268	}
269
270	}
271	}
272	}
273
274	/** @return the gSearchWSDLURL, the url of the WSDL for the
275	* FedoraGSearch web services */
276	public String getGSearchWSDLURL() { return gSearchWSDLURL; }
277
278	/** Sets the member variable gSearchWSDLURL that specify the location of
279	* the WSDL file of FedoraGSearch's web services. Then it attempts
280	* to instantiate a connection to those web services.
281	* @param url is the new url of the GSearch web services WSDL file */
282	public void setGSearchWSDLURL(String url) {
283	this.gSearchWSDLURL = url;
284	initSearchFunctionality();
285	}
286
287	/** @return the gSearchIndexName, the name of the index Fedora Generic
288	* Search will search in (where GS3 docs have been indexed into). */
289	public String getGSearchIndexName() { return gSearchIndexName; }
290
291	/** Sets the member variable gSearchIndexName that specifies the name
292	* of the index containing indexed GS3 documents. Then it attempts
293	* to instantiate a connection to the Fedora GSearch web services using
294	* this changed value for indexName.
295	* @param indexName is the new name of the index containing indexed GS3
296	* docs that GSearch should search in. */
297	public void setGSearchIndexName(String indexName) {
298	this.gSearchIndexName = indexName;
299	initSearchFunctionality();
300	}
301
302	/** @return the array of the services actually supported by FedoraGS3 */
303	protected String[] getServiceNames() { return this.serviceNames;}
304
305	/**
306	* For finding out if the sectionNumber is given as part of the docID.
307	* @param docID is the String that contains the docPID and may also
308	* contain the section number.
309	* @return true if the document identifier docID contains a section-
310	* number, and false if it consists solely of the docPID.
311	* That is, true is returned if
312	* <pre>docID = "greenstone:colName-<docPID>-<sectionNum>"</pre>
313	* and false is returned if
314	* <pre>docID = "greenstone:colName-<docPID>"</pre>
315	* */
316	protected boolean containsSectionNumber(String docID) {
317	// if there are two hyphens in the docID, then there are sections
318	// (and the section number is appended at end of docID)
319	// docID = "greenstone:colName-<docPID>-<sectionNum>"
320	return (docID.lastIndexOf(HYPHEN) != docID.indexOf(HYPHEN));
321	}
322
323	/** This method will extract the docPID from docID and return it.
324	* (If a sectionNumber is suffixed to the docID, the docPID which is
325	* the prefix is returned; otherwise the docID is the docPID and is
326	* returned)
327	* @param docID is the String that contains the docPID and may also
328	* contain the section number.
329	* @return only the docPID portion of the docID.
330	*/
331	protected String getDocPIDFromDocID(String docID) {
332	if(containsSectionNumber(docID))
333	return docID.substring(0, docID.lastIndexOf(HYPHEN));
334	// else (if there's no sectionNumber), docID is the docPID
335	return docID;
336	}
337
338	/** This method will return the section Number, if there's any
339	* suffixed to the docID. Otherwise it will return the empty string
340	* @param docID is the String that contains the docPID and may also
341	* contain the section number.
342	* @return only the sectionID portion of the docID - if any, else "".
343	*/
344	protected String getSectionIDFromDocID(String docID) {
345	if(containsSectionNumber(docID))
346	return docID.substring(
347	docID.lastIndexOf(HYPHEN)+1, docID.length());
348	return "";
349	}
350
351	/** Given a list of collectionIDs, returns a GS3 DocumentMetadataRetrieve
352	* response message that gives the metadata for each collection identified
353	* @param collIDs is an array of fedora pids identifying collections in the
354	* fedora repository
355	* @return a GS3 DocumentMetadataRetrieve response message containing the
356	* EX metadata for all the requested collections */
357	public String getCollectionMetadata(String[] collIDs) {
358	return getMetadata(collIDs, new String[] {"all"});
359	}
360
361	/** Given a list of document identifiers, a GS3 DocumentMetadataRetrieve
362	* response message is returned containing the metadata for each document.
363	* @param docIDs is an array of document identifiers (docID can either be
364	* <pid>s items (documents) in the fedora repository, or
365	* "<pid>-sectionNumber".
366	* @return a GS3 DocumentMetadataRetrieve response message containing the
367	* EX, DC, DLS metadata for all the requested documents
368	* @param metadata is the list of metadata elements to be retrieved for each doc */
369	public String getDocumentMetadata(String[] docIDs, String[] metadata) {
370	return getMetadata(docIDs, metadata);
371	}
372
373	/** Given a collectionID, returns a GS3 DocumentMetadataRetrieve
374	* response message that gives the metadata for the collection identified
375	* @param collID is a fedora pid identifying a collection in its repository
376	* @return a GS3 DocumentMetadataRetrieve response message containing the
377	* EX metadata for the requested collection
378	* @param metadata is the list of metadata elements to be retrieved for each doc */
379	public String getCollectionMetadata(String collID) {
380	return getMetadata(new String[] {collID}, new String[] {"all"});
381	}
382
383	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
384	* response message containing the metadata for the document.
385	* @param docID is a document identifier (docID can either be a <pid>
386	* of an item (document) in the fedora repository, or it can be
387	* "<pid>-sectionNumber".
388	* @return a GS3 DocumentMetadataRetrieve response message containing the
389	* EX, DC, DLS metadata for the requested document */
390	public String getDocumentMetadata(String docID, String[] metadata) {
391	return getMetadata(new String[] {docID}, metadata);
392	}
393
394	/** @return a greenstone DocumentMetadataRetrieve response for the
395	* documents or collections indicated by the docIDsOrCollIDs.
396	* @param docIDsOrCollIDs is an array of identifiers which may be either the
397	* fedora pids for collections, or otherwise may be a document identifier.
398	* In the last case, the document ID may consist of either
399	* "documentPID-sectionNumber" or may just be just fedora documentPID
400	* @param metadata is the list of metadata elements to be retrieved for each doc */
401	public String getMetadata(String[] docIDsOrCollIDs, String[] metadata)
402	{
403	Document doc = builder.newDocument();
404	FedoraGS3RunException ex = null;
405
406	Element docNodeList = doc.createElement(
407	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
408
409	try{
410	for(int i = 0; i < docIDsOrCollIDs.length; i++) {
411	// create the <documentNode> containing the metadata
412	// for each document docID
413	Element docNode = getMetadata(doc, docIDsOrCollIDs[i], metadata);
414	docNodeList.appendChild(docNode);
415	}
416	} catch(Exception e) {
417	ex = new FedoraGS3RunException(e);
418	ex.setSpecifics("EX (and/or DC, DLS) metadata datastream");
419	}
420
421	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
422	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
423	try{
424	return FedoraCommons.elementToString(responseMsg);
425	} catch(TransformerException e) {
426	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
427	+ " " + e;
428	}
429	}
430
431	/** Method that takes a new DOM document, as well as an identifier of either
432	* a collection or document (which may be a fedora pid for the collection
433	* or document, or may be the documentPid-sectionNumber for a document) and
434	* returns a documentNode element for it:
435	* <documentNode><metadataList>
436	* <metadata name="">value</metadata>
437	* ...
438	* </metadataList></documentNode>
439	* @return documentNode containing the metadata for the collection or
440	* document given by parameter ID
441	* @param id denotes a collection pid, a document pid or a docID of the
442	* form "documentpid-sectionNumber"
443	* @param metadata is the list of metadata elements to be retrieved for each doc */
444	protected Element getMetadata(Document doc, String id, String[] metadata)
445	throws RemoteException, UnsupportedEncodingException,
446	SAXException, IOException
447	{
448	// We're going to create the documentNode nested inside the following
449	// documentNodeList:
450	// <documentNodeList>
451	// <documentNode nodeID=""><metadataList>
452	// <metadata name="">value</metadata>
453	// </metadataList></documentNode>
454	// <documentNode>...</documentNode>
455	// </documentNodeList>
456	// <documentNodeList>
457
458	// <documentNode nodeID="docID"> - the docNode on which a metadata
459	// retrieve is being performed
460	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
461	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
462	attribute.setValue(id);
463	docNode.setAttributeNode(attribute);
464
465	// <metadataList>
466	Element metadataList = doc.createElement(
467	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
468
469	String ex = "";
470	String dc = "";
471	String dls = "";
472	if(id.endsWith(_COLLECTION)) { // docID refers to a collection
473	// Obtain the "EX" datastream (extracted metadata) for the collection
474	ex = this.getEX(id);
475	}
476	else { // docID refers to a document
477	// work out the document's fedora PID and section ID, and then
478	// obtain the EX (extracted metadata) and DC datastreams for the doc
479
480	// Note that EX/DC for pid="greenstone:<colname>-docPID-1"
481	// is the same as for pid="greenstone:<colname>-docPID"
482	// That is, <Section id="1"> refers to the toplevel document docPID
483	// If requested for top-level document, there may also be DLS meta
484	String sectionID = getSectionIDFromDocID(id);
485	String docPID = getDocPIDFromDocID(id);
486	if(sectionID.equals("") \|\| sectionID.equals("1")) {
487	// metadata of toplevel document is requested
488	ex = this.getEX(docPID); // slightly faster than doing
489	//getSectionEXMetadata(docID, "1")
490	dc = this.getDC(docPID);
491	dls = this.getDLS(docPID);
492	}
493	else {
494	ex = getSectionEXMetadata(docPID, sectionID);
495	dc = getSectionDCMetadata(docPID, sectionID);
496	}
497	}
498
499	String metafields = "";
500	for(int i = 0; i < metadata.length; i++) {
501	metafields = metafields + metadata[i] + "\|";
502	}
503
504	// Adding in metadata sets in alphabetical order
505	// DC metadata for a top-level document is different from EX, DLS:
506	// only the element's namespace prefix is "dc", the rest of a tagname
507	// is unknown.
508	if(!dc.equals("")) {
509	addMetadataWithNamespacedTagNames(doc, metadataList,
510	dc, DC, metafields);
511	}
512
513	// Check if we were supposed to process dls and dc metadata
514	// as well. We only ever do this for top-level documents,
515	// in which case, dls and dc will be non-empty strings
516	if(!dls.equals("")) {
517	addMetadataWithFixedTagName(doc, metadataList, dls, DLS, metafields);
518	}
519
520	// we definitely have an EX metadatastream for each
521	// collection object, top-level document object,
522	// and document section item
523	addMetadataWithFixedTagName(doc, metadataList, ex, EX, metafields);
524
525	// now the metadataList has been built up
526	docNode.appendChild(metadataList);
527
528	return docNode; // return <documentNode> containing the metadata
529	}
530
531	/** This method retrieves all the metadata elements in the metaDataStream
532	* parameter of the form <"metadataSetNS:metadata">"value"</metadata> where
533	* metadataSetNS is the namespace of each tag, and creates a new element of
534	* the form <metadata name="metadataSetNS:metadata">"value"</metadata> for
535	* each. Each of these are then appended to the metadataList parameter.
536	* @param doc is the Document object using which the new metadata Elements
537	* are to be constructed
538	* @param metadataList is the <metadataList> Element to which the new
539	* metadata Elements are to be appended as children.
540	* @param metaDatastream the metadata datastream in string form (e.g. the
541	* Dublin Core metadata stored in the Fedora repository).
542	* @param metadataSet is the constant datastream identifier, e.g. "DC".
543	* At present this method applies to the DC metadata and any others like it
544	* where each tagname is different except for the constant dc: namespace.
545	* @param metafields is a \| separated string containing the metadatafields to
546	* extract or "all" if all fields are requested
547	*/
548	protected void addMetadataWithNamespacedTagNames(Document doc, Element metadataList,
549	String metaDatastream, String metadataSet, String metafields)
550	throws SAXException, IOException
551	{
552	Document src = builder.parse(
553	new InputSource(new StringReader(metaDatastream)));
554
555	// The following doesn't work for some reason: to retrieve all elements
556	// whose namespace prefix starts with "dc", we pass "*" for localName
557	//NodeList dcMetaTags = src.getElementsByTagNameNS(metadataSet.toLowerCase(), "*");
558
559	// Longer way: get the children of the root document
560	NodeList children = src.getDocumentElement().getChildNodes();
561
562	for(int i = 0; i < children.getLength(); i++) {
563	String nodeName = children.item(i).getNodeName();
564	// check that the nodename starts with the metadataSet ("dc") namespace,
565	// which simultaneously ensures that the node's an element:
566	if(nodeName.toLowerCase().startsWith(metadataSet.toLowerCase())) {
567	// need to have a period for Greenstone instead of Fedora's colon
568	nodeName = nodeName.replace(COLON, PERIOD);
569	if(metadataSet.equals(DC)) { // dc:title -> dc.Title
570	nodeName = "dc" + PERIOD + Character.toString(Character.toUpperCase(nodeName.charAt(3)))
571	+ nodeName.substring(4);
572	}
573
574	// get the requested metadata fields
575	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(nodeName) != -1) {
576	Element metatag = (Element)children.item(i);
577	String value = FedoraCommons.getValue(metatag);
578	// <dc:tagname>value</dc:tagname>
579	// we're going to put this in our metadata element as
580	// <metadata name="dc.Tagname">value</metadata>
581
582	// create metadata of (name, value) pairs in target DOM (doc)
583	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
584	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
585
586	attribute.setValue(nodeName);
587	metadata.setAttributeNode(attribute);
588	Text content = doc.createTextNode(value);
589	metadata.appendChild(content);
590	metadataList.appendChild(metadata);
591	}
592	}
593	}
594	}
595
596	/** This method retrieves all the metadata elements in the metaDataStream
597	* of the form <"namespace:"metadata name="metadataName">value</metadata>
598	* where "namespace" is the namespace prefix of each tag, and metadataName
599	* is the name of the metadata (like author, title). For each element
600	* it creates a corresponding new element of the form
601	* <metadata name="namespace:metadataName">value</metadata>.
602	* Each of these are then appended to the metadataList parameter.
603	* @param doc is the Document object using which the new metadata Elements
604	* are to be constructed
605	* @param metadataList is the <metadataList> Element to which the new
606	* metadata Elements are to be appended as children.
607	* @param metaDatastream the metadata datastream in string form (e.g. the
608	* EX/Greenstone extracted metadata or DLS metadata stored in the Fedora
609	* repository).
610	* @param metadataSet is the constant datastream identifier,
611	* e.g. "DLS" or "EX".
612	* At present this method applies to the DLS and EX metadata as they have
613	* constant tagnames throughout.
614	* @param metafields is a \| separated string containing the metadatafields to
615	* extract or "all" if all fields are requested.
616	*/
617	protected void addMetadataWithFixedTagName(Document doc, Element metadataList,
618	String metaDatastream, String metadataSet, String metafields)
619	throws SAXException, IOException
620	{
621	// Namespace prefix can be "ex:" or "dls:"
622	String namespacePrefix = "";
623	if(!metadataSet.equals(EX)) {
624	// need to have a period for Greenstone instead of Fedora's colon
625	namespacePrefix = metadataSet.toLowerCase() + PERIOD;
626	}
627
628	Document src = builder.parse(
629	new InputSource(new StringReader(metaDatastream)));
630	NodeList metaTags = src.getElementsByTagName(
631	metadataSet.toLowerCase()+COLON+METADATA);
632	// Looking for tagnames: <ex:metadata> or <dls:metadata>
633
634	for(int i = 0; i < metaTags.getLength(); i++) {
635	Element metatag = (Element)metaTags.item(i);
636
637	// extract the metadata of (name, value) pairs from src DOM
638	// look for <metadata name="name">value</metadata>
639	String name = metatag.hasAttribute(NAME) ?
640	metatag.getAttribute(NAME) : "";
641	// sometimes, there are several metadata for the same name, in this
642	// case, look for a qualifier and append its value to the name to
643	// distinguish it uniquely:
644	if(metatag.hasAttribute(QUALIFIER)) {
645	name = name + HYPHEN + metatag.getAttribute(QUALIFIER);
646	}
647	name = namespacePrefix + name; // prefix with namespace, if any
648	if(metafields.indexOf("all") != -1 \|\| metafields.indexOf(name) != -1) {
649	String value = FedoraCommons.getValue(metatag);
650
651	// create metadata of (name, value) pairs in target DOM (doc)
652	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
653	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
654	attribute.setValue(name);
655	metadata.setAttributeNode(attribute);
656	Text content = doc.createTextNode(value);
657	metadata.appendChild(content);
658
659	metadataList.appendChild(metadata);
660	}
661	}
662	}
663
664	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
665	* response message containing ONLY the Title metadata for the document.
666	* @param docID is a document identifier (docID can either be a <pid>
667	* of an item (document) in the fedora repository, or it can be
668	* "<pid>-sectionNumber".
669	* @return a GS3 DocumentMetadataRetrieve response message containing the
670	* Title metadata for the requested document */
671	public String getTitleMetadata(String docID) {
672	return getTitleMetadata(new String[] { docID });
673	}
674
675	/** Given a document identifier, returns a GS3 DocumentMetadataRetrieve
676	* response message containing ONLY the Title metadata for the documents.
677	* @param docIDs is a list of document identifiers (where docID can either be
678	* a <pid> of an item (document) in the fedora repository, or it can be
679	* "<pid>-sectionNumber".
680	* @return a GS3 DocumentMetadataRetrieve response message containing the
681	* Title metadata for all the requested documents */
682	public String getTitleMetadata(String[] docIDs) {
683	// Must create message of the following form:
684	// <documentNodeList><documentNode nodeID="docID">
685	// <metadataList><metadata name="Title">sometitle</metadata>
686	// </metadataList></documentNode>
687
688	Document doc = builder.newDocument();
689	FedoraGS3RunException ex = null;
690
691	Element docNodeList = doc.createElement(
692	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
693	try{
694	for(int i = 0; i < docIDs.length; i++) {
695	Element docNode = getTitleMetadata(doc, docIDs[i]);
696	docNodeList.appendChild(docNode);
697	}
698	}catch(Exception e) {
699	ex = new FedoraGS3RunException(e);
700	//ex.setSpecifics("EX metadata datastream PID: \|" + docIDs[i] + "\|"); // for debugging PID
701	ex.setSpecifics("EX metadata datastream");
702	}
703
704	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
705	GSXML.REQUEST_TYPE_PROCESS, "DocumentMetadataRetrieve");
706	try{
707	return FedoraCommons.elementToString(responseMsg);
708	} catch(TransformerException e) {
709	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
710	+ " " + e;
711	}
712	}
713
714	/** Method that takes a new DOM document, as well as an identifier of either
715	* a document or document section and returns a documentNode element containing
716	* the title metadata for it:
717	* <documentNode nodeID="docID"><metadataList>
718	* <metadata name="Title">sometitle</metadata>
719	* </metadataList></documentNode>
720	* @return documentNode containing the metadata for the collection or
721	* document given by parameter ID
722	* @param docID denotes the id of a document or a document section, so id
723	* is either a document-pid or it's of the form documentpid-sectionNumber */
724	protected Element getTitleMetadata(Document doc, String docID)
725	throws RemoteException, UnsupportedEncodingException,
726	SAXException, IOException
727	{
728	// Returns a docNode element of the following form:
729	// <documentNode nodeID="docID">
730	// <metadataList><metadata name="Title">sometitle</metadata></metadataList>
731	// </documentNode>
732
733	// <documentNode nodeID="docID">
734	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
735	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
736	attribute.setValue(docID);
737	docNode.setAttributeNode(attribute);
738
739	// <metadataList>
740	Element metaList = doc.createElement(
741	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
742	// <metadata name="Title">
743	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
744	// if we connect it all up (append children), we can immediately add
745	// the name attribute into the metadata element:
746	metaList.appendChild(metadata);
747	docNode.appendChild(metaList);
748	metadata.setAttribute(GSXML.NAME_ATT, TITLE); // immediately add attribute
749
750	String title = "";
751	String sectionID = getSectionIDFromDocID(docID);
752	String docPID = getDocPIDFromDocID(docID);
753
754	// check if title of toplevel document is requested
755	if(sectionID.equals(""))
756	title = this.getDocTitle(docPID);
757	else { // title of document section
758	title = this.getSectionTitle(docPID, sectionID);
759	}
760
761	metadata.appendChild(doc.createTextNode(title));
762
763	return docNode;
764	}
765
766	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
767	* containing the requested portion of the document structure of the documents
768	* indicated by docIDs:
769	* @param docID is the document identifier of the document whose hierarchical
770	* structure is requested. The name of the collection is already included in the
771	* docID for a Fedora DL.
772	* @param structure - strings specifying the required structure of the document.
773	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
774	* @param info - strings specifying the required structural info of the document.
775	* It can be any combination of: siblingPosition, numSiblings, numChildren.
776	*/
777	public String getDocumentStructure(String docID, String[] structure, String[] info) {
778	return getStructure(new String[]{docID}, structure, info);
779	}
780
781
782	/** @return a String representing Greenstone3 DocumentMetadataRetrieve XML
783	* containing the requested portion of the document structure of the documents
784	* indicated by docIDs:
785	* @param docIDs is an array of document identifiers of documents whose
786	* hierarchical structures are requested. The name of the collection is already
787	* included in the docID for a Fedora DL.
788	* @param structure - strings specifying the required structure of each document.
789	* It can be a combination of: ancestors, parent, siblings, children, descendants, entire.
790	* @param info - strings specifying the required structural info of each document.
791	* It can be any combination of: siblingPosition, numSiblings, numChildren.
792	*/
793	public String getDocumentStructure(String[] docIDs, String[] structure, String[] info) {
794	return getStructure(docIDs, structure, info);
795	}
796
797	/**
798	* Returns a greenstone3 DocumentStructureRetrieve XML response message
799	* containing the document structures for the given docIDs.
800	* Similar to FedoraConnection.getTOC(), but instead of fedora formatted XML,
801	* greenstone formatted XML is returned. The requested section of the table
802	* of contents (TOC) for a document is converted into the greenstone3 xml
803	* format that is returned upon DocumentStructureRetrieve requests.
804	* @param docIDs the documentIDs for which the section's structure is returned;
805	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
806	* @param structure - the structure of the sections to return. Can be any combination of:
807	* ancestors, parent, siblings, children, descendants, entire.
808	* @param infos - strings containing any combination of the values: numChildren, numSiblings,
809	* siblingPosition. The requested info gets added as attributes to the returned root element.
810	* @return a greenstone3 DocumentStructureRetrieve XML response message in
811	* String format with the structure of the docIDs requested.
812	*/
813	protected String getStructure(String[] docIDs, String[] structure, String[] infos)
814	{
815	Document doc = builder.newDocument();
816	FedoraGS3RunException ex = null;
817	// <documentNodeList>
818	Element docNodeList = doc.createElement(
819	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
820
821	try{
822	// append the <documentNodes> for the docIDs
823	// to the docNodeList
824	//getStructureElement(docNodeList, docIDs, levels);
825	getStructureElement(docNodeList, docIDs, structure, infos);
826	} catch(Exception e) {
827	ex = new FedoraGS3RunException(e);
828	ex.setSpecifics("(requested portion of) TOC datastream");
829	}
830	// insert our <documentNodeList> into a GS3 response message
831	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
832	GSXML.REQUEST_TYPE_PROCESS, "DocumentStructureRetrieve");
833	try{
834	return FedoraCommons.elementToString(responseMsg);
835	} catch(TransformerException e) {
836	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
837	+ " " + e;
838	}
839	}
840
841
842	/** Given a <documentNodeList> portion of a greenstone3
843	* DocumentStructureRetrieve XML response message, this method will populate
844	* it with the <documentNodes> that represent the structure of the given docIDs.
845	* @param docNodeList is a <documentNodeList> to which <documentNodes> of
846	* the doc structures are appended.
847	* @param docIDs the documentIDs for which the section's structure is returned;
848	* where a docID is either a fedora pid <docPID> or <docPID>-<sectionNumber>.
849	* @param structures - the structure of the sections to return. Can be any combination of:
850	* ancestors, parent, siblings, children, descendants, entire.
851	* @param infos - a string containing any combination of the values: numChildren, numSiblings,
852	* siblingPosition. The requested info gets added as attributes to the returned root element.
853	*/
854	protected void getStructureElement(Element docNodeList, String[] docIDs,
855	String[] structures, String[] infos)
856	throws RemoteException, UnsupportedEncodingException, SAXException,
857	IOException
858	{
859	// Make one string out of requested structure components, and one string from info components
860	String structure = "";
861	String info = "";
862	for(int i = 0; i < structures.length; i++) {
863	structure = structure + structures[i] + "\|";
864	}
865	for(int i = 0; i < infos.length; i++) {
866	info = info + infos[i] + "\|";
867	}
868
869	// process each docID
870	for(int i = 0; i < docIDs.length; i++) {
871	// work out the document's fedora PID and section ID
872	String sectionID = getSectionIDFromDocID(docIDs[i]);
873	String docPID = getDocPIDFromDocID(docIDs[i]);
874	if(sectionID.equals("")) {
875	sectionID = "1";
876	}
877
878	// get the required section, along with children or descendants
879	Element srcDocElement = getSectionStructureXML(docPID, sectionID, structure, info);
880	Document doc = docNodeList.getOwnerDocument();
881
882	// copy-and-convert that structure into a structure format for GS3
883	Element docNode = getStructure(doc, docIDs[i], docPID, srcDocElement);
884
885	if(!info.equals("")) {
886	// <nodeStructureInfo>
887	// <info name="" value="" />
888	// <info name="" value="" />
889	// ...
890	// </nodeStructureInfo>
891	Element nodeStructureInfo = doc.createElement(GSXML.NODE_STRUCTURE_ELEM+"Info");
892	Element root = srcDocElement.getOwnerDocument().getDocumentElement();
893
894	if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_SIBS)) {
895	String numSiblings = root.getAttribute(AbstractBasicDocument.INFO_NUM_SIBS);
896	Element infoEl = doc.createElement(GSXML.INFO_ATT);
897	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_SIBS);
898	infoEl.setAttribute(GSXML.VALUE_ATT, numSiblings);
899	nodeStructureInfo.appendChild(infoEl);
900	}
901
902	if(root.hasAttribute(AbstractBasicDocument.INFO_SIB_POS)) {
903	String siblingPosition = root.getAttribute(AbstractBasicDocument.INFO_SIB_POS);
904	Element infoEl = doc.createElement(GSXML.INFO_ATT);
905	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_SIB_POS);
906	infoEl.setAttribute(GSXML.VALUE_ATT, siblingPosition);
907	nodeStructureInfo.appendChild(infoEl);
908	}
909
910	if(root.hasAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN)) {
911	String numChildren = root.getAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN);
912	Element infoEl = doc.createElement(GSXML.INFO_ATT);
913	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_NUM_CHILDREN);
914	infoEl.setAttribute(GSXML.VALUE_ATT, numChildren);
915	nodeStructureInfo.appendChild(infoEl);
916	}
917
918	if(root.hasAttribute(AbstractBasicDocument.INFO_DOC_TYPE)) {
919	String documentType = root.getAttribute(AbstractBasicDocument.INFO_DOC_TYPE);
920	Element infoEl = doc.createElement(GSXML.INFO_ATT);
921	infoEl.setAttribute(GSXML.NAME_ATT, AbstractBasicDocument.INFO_DOC_TYPE);
922	infoEl.setAttribute(GSXML.VALUE_ATT, documentType);
923	nodeStructureInfo.appendChild(infoEl);
924	}
925
926	docNode.appendChild(nodeStructureInfo);
927	}
928
929	// add it to our list of documentNodes
930	docNodeList.appendChild(docNode);
931	}
932	}
933
934
935	/**
936	* Takes the portion of the XML document outlining the structure of the
937	* document (section)--in the format this is stored in Fedora--and returns
938	* Greenstone 3 DOM XML format for outlining document structure.
939	* @return a <documentNode> element that contains a greenstone3
940	* DocumentStructureRetrieve XML corresponding to the parameter Element section
941	* (which is in fedora XML), for the document indicated by docID.
942	* @param requestingDocID is the identifier of the document for which the
943	* structure was requested. It's this document's children or descendants that
944	* will be returned. Note that this is not always the same as (clear from)
945	* parameter docID.
946	* @param docID is the documentID for which the section's structure is
947	* returned where docID = "docPID-sectionNumber".
948	* @param section - the fedora section XML that is being mirrored in
949	* greenstone3 format.
950	*/
951	protected Element getStructure(Document doc, String requestingDocID,
952	String docID, Element section)
953	{
954	// we want to mirror the section's DOM (given in fedora XML) in
955	// greenstone3's XML for a DocumentStructureRetrieve response.
956
957	// <documentNode nodeID="docID"> - the docNode on which a structure retrieve
958	// is being performed
959	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
960	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
961	attribute.setValue(requestingDocID); //requestingDocID.replace(HYPHEN+SECTION, "")
962	docNode.setAttributeNode(attribute);
963
964	// <nodeStructure>
965	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
966
967	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
968	Element rootNode = createDocNodeFromSubsection(doc, section, docID);
969
970	// fills in the subtree of the rootNode in our nodeStructure element
971	createDocStructure(doc, section, rootNode, docID);
972	//where section represents the root section
973
974	nodeStructure.appendChild(rootNode);
975	docNode.appendChild(nodeStructure);
976	return docNode;
977	}
978
979
980	/** Recursive method that creates a documentStructure mirroring parameter
981	* section, starting from parameter parent down to all descendants
982	* @param section is the XML <Section> in the fedora repository's TOC
983	* for the docPID whose substructure is to be mirrored
984	* @param parent is the XML documentNode in the greenstone repository whose
985	* descendants created by this method will correspond to the descendants of
986	* parameter section.
987	* @param doc is the document containing the parent;
988	* @param docPID is the prefix of all nodeIDs in the parent's structure
989	*/
990	protected void createDocStructure(
991	Document doc, Element section, Element parent, String docPID)
992	{
993	// get the section's children (if any)
994	NodeList children = section.getChildNodes();
995	for(int i = 0; i < children.getLength(); i++) {
996	Node n = children.item(i);
997
998	if(n.getNodeName().equals(SECTION_ELEMENT)) {
999	//then we know it's an element AND that its tagname is "Section"
1000	Element subsection = (Element)n;
1001	Element child = createDocNodeFromSubsection(doc, subsection, docPID);
1002	parent.appendChild(child);
1003
1004	// recursion call on newly found child-element and subsection
1005	createDocStructure(doc, subsection, child, docPID);
1006	}
1007	}
1008	}
1009
1010	/** Given a particular subsection element, this method creates a
1011	* Greenstone3 DocumentNode element that mirrors it.
1012	* @param doc is the document that will contain the created DocumentNode
1013	* @param docID is the prefix of all nodeIDs in the parent's structure
1014	* @param subSection is the XML <Section> in the fedora repository's
1015	* TOC for the docPID which will be mirrored in the greenstone XML
1016	* documentNode that will be returned.
1017	* @return a greenstone <documentNode> that represents the fedora TOC's
1018	* <Section> element passed as parameter subSection. */
1019	protected Element createDocNodeFromSubsection(
1020	Document doc, Element subSection, String docID)
1021	{
1022	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1023	Attr docType = doc.createAttribute(GSXML.DOC_TYPE_ATT);
1024	docType.setValue(GSXML.DOC_TYPE_HIERARCHY);
1025	docNode.setAttributeNode(docType);
1026
1027	Attr nodeID = doc.createAttribute(GSXML.NODE_ID_ATT);
1028	String sectionID = subSection.hasAttribute(ID) ?
1029	subSection.getAttribute(ID) : "";
1030	if(sectionID.equals("1")
1031	&& subSection.getElementsByTagName(SECTION_ELEMENT).getLength() > 0) { // root, non-leaf case
1032	// reset the attribute without the section number (just "docID" may be important for democlient?)
1033	nodeID.setValue(docID + HYPHEN + sectionID); // nodeID.setValue(docID);
1034	} else {
1035	nodeID.setValue(docID + HYPHEN + sectionID);
1036	}
1037	//nodeID.setValue(docID + HYPHEN + sectionID);
1038	docNode.setAttributeNode(nodeID);
1039
1040	Attr nodeType = doc.createAttribute(GSXML.NODE_TYPE_ATT);
1041	if(subSection.hasAttribute(GSXML.NODE_TYPE_ATT)) {
1042	nodeType.setValue(subSection.getAttribute(GSXML.NODE_TYPE_ATT));
1043	}
1044	docNode.setAttributeNode(nodeType);
1045	return docNode;
1046	}
1047
1048
1049	/** Given an identifier that is either a docPID or a concatenation of
1050	* docPID+sectionID, this method works out the fedora assigned docPID and
1051	* sectionID and then calls getContentBody(docPID, sectionID) with those.
1052	* @param docID is expected to be of the form
1053	* "greenstone:<collectionName>-<docPID>-<sectionNumber>" or
1054	* "greenstone:<collectionName>-<docPID>"
1055	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1056	* "greenstone:<collectionName>-1" ("greenstone:<collectionName>-Section1")
1057	* is returned! */
1058	public String getContent(String docID) {
1059	return this.getContent(new String[]{docID});
1060	}
1061
1062	/** Given an identifier that is a concatenation of docID+sectionID, this
1063	* method works out the fedora assigned docPID and sectionID and then calls
1064	* getContentBody(docPID, sectionID) with those.
1065	* @param docIDs is an array of document identifiers of the form
1066	* "greenstone:<collectionName>-<docPID>-<sectionNumber>"
1067	* If it is "greenstone:<collectionName>-<docPID>", then the content for
1068	* "greenstone:<collectionName>-Section1" is returned! */
1069	public String getContent(String[] docIDs) {
1070	Document doc = builder.newDocument();
1071	FedoraGS3RunException ex = null;
1072
1073	//<documentNodeList>
1074	Element docNodeList = doc.createElement(
1075	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
1076
1077	try{
1078	for(int i = 0; i < docIDs.length; i++) {
1079	// get the sectionID and docPID from the docID
1080	String sectionID = this.removePrefix(
1081	getSectionIDFromDocID(docIDs[i]), SECTION);
1082	String docPID = getDocPIDFromDocID(docIDs[i]);
1083	if(sectionID.equals("")) // if no section is specified, get
1084	sectionID = "1"; // get the content for Section id="1"
1085
1086	// Get the contents for the requested section of document docPID
1087	String sectionContent = this.getContentBody(docPID, sectionID);
1088
1089	// set the nodeID attribute
1090	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
1091	Attr nodeId = doc.createAttribute(GSXML.NODE_ID_ATT);
1092
1093	nodeId.setValue(docIDs[i]); // just set the docID which will contain
1094	// the docPID (and sectionID if already present)
1095
1096	docNode.setAttributeNode(nodeId);
1097	// set the text content to what was retrieved
1098	Element nodeContent = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1099	Text textNode = doc.createTextNode(sectionContent.trim());
1100
1101	nodeContent.appendChild(textNode);
1102	docNode.appendChild(nodeContent);
1103	//add the documentNode to the docNodeList
1104	docNodeList.appendChild(docNode);
1105	}
1106	} catch(Exception e) {
1107	ex = new FedoraGS3RunException(e);
1108	ex.setSpecifics("requested doc Section datastream");
1109	}
1110	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
1111	GSXML.REQUEST_TYPE_PROCESS, "DocumentContentRetrieve");
1112	try{
1113	return FedoraCommons.elementToString(responseMsg);
1114	} catch(TransformerException e) {
1115	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1116	+ " " + e;
1117	}
1118	}
1119
1120	/** Gets the contents of a textNode from a section.
1121	* @return the text content of a section.
1122	* @param docPID the pid of the document from which a section's text is to
1123	* be retrieved.
1124	* @param sectionID is the section identifier of the document denoted by
1125	* docPID whose text is to be returned.
1126	*/
1127	protected String getContentBody(String docPID, String sectionID)
1128	throws RemoteException, UnsupportedEncodingException,
1129	SAXException, IOException
1130	{
1131	String section = this.getSection(docPID, sectionID);
1132
1133	// the content is nested inside a <Section> element,
1134	// we extract it from there:
1135	InputSource source = new InputSource(new StringReader(section));
1136	Document doc = builder.parse(source);
1137
1138	// The document Element is the <Section> we want.
1139	// Get its text contents:
1140	section = FedoraCommons.getValue(doc.getDocumentElement());
1141
1142	// we are going to remove all occurrences of "_httpdocimg_/"
1143	// that precede associated filenames, because that's a GS3
1144	// defined macro for resolving relative urls. It won't help
1145	// with documents stored in fedora.
1146	section = section.replaceAll(GS3FilePathMacro+"/", "");
1147	return section;
1148	}
1149
1150	/** Here we create the greenstone's response message element:
1151	* <message&lg;<response><content></response></message>
1152	* @return a greenstone response-message element.
1153	* @param doc - the Document object which should me used to create the
1154	* <message> and <response> elements
1155	* @param content - the element that is to be nested inside <response>
1156	* @param ex - any exception that occurred when trying to create
1157	* the content parameter
1158	* @param responseType - the value for the type attribute of <response>,
1159	* such as "describe", "retrieve", "browse", "query"...
1160	* @param originator - indiates the collectionName or service (like
1161	* DocumentContentRetrieve) from where this response message originates
1162	*/
1163	protected Element createResponseMessage(Document doc, Element content,
1164	Exception ex, String responseType, String originator)
1165	{
1166	Element response = doc.createElement(GSXML.RESPONSE_ELEM);
1167	// from = "FedoraGS3"
1168	Attr attribute = doc.createAttribute(GSXML.FROM_ATT);
1169	attribute.setValue(originator);
1170	response.setAttributeNode(attribute);
1171
1172	// type = "describe" or "process" - whatever's given in requestType:
1173	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1174	attribute.setValue(responseType);
1175	response.setAttributeNode(attribute);
1176
1177	if(content != null)
1178	response.appendChild(content);
1179
1180	// we'll create an error element for RemoteExceptions (web service problems)
1181	// and UnsupportedEncodingExceptions and
1182	if(ex != null) {
1183	Element error = doc.createElement(GSXML.ERROR_ELEM);
1184	error.appendChild(doc.createTextNode(ex.getMessage()));
1185	// now append the error to the <response> element (after
1186	// the content element whatever that was)
1187	response.appendChild(error);
1188	}
1189
1190	Element message = doc.createElement(GSXML.MESSAGE_ELEM);
1191	message.appendChild(response);
1192	doc.appendChild(message);
1193	return message;
1194	}
1195
1196	/** @return a <serviceList> Element as defined by GS3: containing all the
1197	* services (denoted by <service> elements) that are supported by FedoraGS3.
1198	* At present these are: DocumentContentRetrieve, DocumentMetadataRetrieve,
1199	* DocumentStructureRetrieve, TextQuery, FieldQuery, ClassifierBrowse,
1200	* ClassifierBrowseMetadataRetrieve (as indicated by member var serviceNames).
1201	* @param doc - the Document object which should me used to create the
1202	* <serviceList> element */
1203	protected Element createServiceList(Document doc)
1204	{
1205	Element serviceList = doc.createElement(
1206	GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
1207
1208	for(int i = 0; i < serviceNames.length; i++) {
1209	// create the <service name="serviceName[i]" type="servicetype" />
1210	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1211
1212	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1213	attribute.setValue(serviceNames[i]);
1214	service.setAttributeNode(attribute);
1215
1216	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1217	if(serviceNames[i].equals("ClassifierBrowse")) //browseTitlesByLetter
1218	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1219	else if(serviceNames[i].contains("Query")) // search services
1220	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1221	else
1222	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1223	service.setAttributeNode(attribute);
1224
1225	// add the service element to the serviceList element
1226	// <serviceList><service /></serviceList>
1227	serviceList.appendChild(service);
1228	}
1229	return serviceList;
1230	}
1231
1232	/** @return a GS3 response message for a describe services request:
1233	* indicating the list of services supported by the Fedora-Greenstone
1234	* interface. These are DocumentContentRetrieve, DocumentMetadataRetrieve,
1235	* DocumentStructureRetrieve, ClassifierBrowse, TextQuery, FieldQuery,
1236	* ClassifierBrowseMetadataRetrieve - as indicated by member variable
1237	* serviceNames. */
1238	public String getServiceList()
1239	{
1240	Document doc = builder.newDocument();
1241	Element serviceList = createServiceList(doc);
1242	// make <serviceList> the body of the responseMessage:
1243	// <message><response><serviceList></response></message>
1244	Element responseMsg = createResponseMessage(doc, serviceList, null,
1245	GSXML.REQUEST_TYPE_DESCRIBE, "");
1246	try {
1247	return FedoraCommons.elementToString(responseMsg);
1248	}catch(TransformerException e) {
1249	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1250	+ " " + e;
1251	}
1252	}
1253
1254	/** @return a GS3 describe response message listing the collections and
1255	* collection-specific metadata stored in the Fedora-Greenstone repository. */
1256	public String getCollectionList()
1257	{
1258	Document doc = builder.newDocument();
1259	FedoraGS3RunException ex = null; // any RemoteException
1260
1261	// create the <collectionList /> element
1262	Element collectionList = doc.createElement(
1263	GSXML.COLLECTION_ELEM+GSXML.LIST_MODIFIER);
1264	try{
1265	String[] collectionNames = this.getCollectionNames(
1266	this.getCollections()); // this line could throw RemoteException
1267	for(int i = 0; i < collectionNames.length; i++) {
1268	// create the <collection name="somename" /> element
1269	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1270	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1271	attribute.setValue(collectionNames[i]);
1272	collection.setAttributeNode(attribute);
1273
1274	// append the <collection> element as child of <collectionList>
1275	collectionList.appendChild(collection);
1276
1277	//if(collection.hasAttribute(GSXML.NAME_ATT))
1278	//LOG.debug(collection.getAttribute(GSXML.NAME_ATT));
1279	}
1280	} catch(RemoteException e) { // if this happens, perhaps it's because it
1281	// can't find Greenstone collections in fedora repository?
1282	ex = new FedoraGS3RunException(e);
1283	ex.setSpecifics(
1284	"greenstone collections in fedora repository");
1285	}
1286
1287	// make <collectionList> the body of the responseMessage:
1288	// <message><response><collectionList></response></message>
1289	Element responseMsg = createResponseMessage(doc, collectionList, ex,
1290	GSXML.REQUEST_TYPE_DESCRIBE, "");
1291	try{
1292	return FedoraCommons.elementToString(responseMsg);
1293	}catch(TransformerException e) {
1294	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1295	+ " " + e;
1296	}
1297	}
1298
1299	/** @return a GS3 describe response message for a collection in the
1300	* Fedora-Greenstone repository.
1301	* @param collectionName - the name of the collection that is to be described.
1302	* It will be converted to a fedora collection pid, which is of the form
1303	* "greenstone:<collectionName>-collection". */
1304	public String describeCollection(String collectionName)
1305	{
1306	Document doc = builder.newDocument();
1307	FedoraGS3RunException ex = null;
1308
1309	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1310	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1311	attribute.setValue(collectionName);
1312	collection.setAttributeNode(attribute);
1313
1314	//<displayItem assigned="true" lang="en" name="name">
1315	//"some display name"</displayItem>
1316	Element displayItem = doc.createElement(GSXML.DISPLAY_TEXT_ELEM);
1317
1318	attribute = doc.createAttribute(GSXML.LANG_ATT);
1319	attribute.setValue(this.lang);
1320	displayItem.setAttributeNode(attribute);
1321
1322	attribute = doc.createAttribute(GSXML.NAME_ATT);
1323	attribute.setValue(GSXML.DISPLAY_TEXT_NAME);
1324	displayItem.setAttributeNode(attribute);
1325
1326	try{
1327	Text textNode = doc.createTextNode(
1328	this.getCollectionTitle(getCollectionPID(collectionName)));
1329	displayItem.appendChild(textNode);
1330	} catch(Exception e) {
1331	// can't find Greenstone collections in fedora repository or problem
1332	// getting their titles from their metadata datastream?
1333	ex = new FedoraGS3RunException(e);
1334	ex.setSpecifics("greenstone collections or their metadata"
1335	+ "in the fedora repository");
1336	}
1337	// now append the displayItem element as child of the collection element
1338	collection.appendChild(displayItem);
1339	// get the <serviceList> and add it into the collection description.
1340	// Services for all collections in the FedoraGS3 repository are the
1341	// same, offering a ClassifierBrowse to browse titles by starting letter
1342	// and DocRetrieve services: Content, Metadata and Structure.
1343
1344	Element serviceList = createServiceList(doc);
1345	collection.appendChild(serviceList);
1346
1347	Element responseMsg = createResponseMessage(doc, collection, ex,
1348	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1349	try{
1350	return FedoraCommons.elementToString(responseMsg);
1351	}catch(TransformerException e) {
1352	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1353	+ " " + e;
1354	}
1355	}
1356
1357	/** @return a GS3 describe response message for the services of a collection
1358	* in the Fedora-Greenstone repository. So far, these services are the same for
1359	* all fedora collections: they are the services given in member variable
1360	* serviceNames: DocumentContent/Metadata/StructureRetrieve, ClassifierBrowse,
1361	* ClassifierBrowseMetadataRetrieve.
1362	* All collections in this Digital Library (Fedora Repository) share the
1363	* same services, so this method returns the same services as getServiceList();
1364	* @param collectionName - the name of the collection whose services are to
1365	* be described. It will be converted to a fedora collection pid, which is of
1366	* the form "greenstone:<collectionName>-collection". */
1367	public String describeCollectionServices(String collectionName)
1368	{
1369	Document doc = builder.newDocument();
1370
1371	Element collection = doc.createElement(GSXML.COLLECTION_ELEM);
1372	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1373	attribute.setValue(collectionName);
1374	collection.setAttributeNode(attribute);
1375
1376	Element serviceList = createServiceList(doc);
1377	collection.appendChild(serviceList);
1378
1379	Element responseMsg = createResponseMessage(doc, collection, null,
1380	GSXML.REQUEST_TYPE_DESCRIBE, collectionName);
1381	try{
1382	return FedoraCommons.elementToString(responseMsg);
1383	}catch(TransformerException e) {
1384	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1385	+ " " + e;
1386	}
1387	}
1388
1389	/** All collections in this Digital Library (Fedora Repository) share
1390	* the same services, so this method returns the same as
1391	* describeCollectionService(collName, serviceName).
1392	* @return a GS3 describe response message for the requested service
1393	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1394	* return nothing special except their names; browse (and any query)
1395	* return more complex XML responses.
1396	* @param serviceName - the name of the service in the collection which is to
1397	* be described.*/
1398	public String describeService(String serviceName)
1399	{
1400	// For all the retrieve services (incl ClassifierBrowseMetadataRetrieve)
1401	// we return:
1402	// <message><response from="<name>Retrieve" type="describe">
1403	// <service name="<name>Retrieve" type="retrieve" /></response></message>
1404	// But for browse (and any query) service, we return the data necessary
1405	// for displaying it
1406
1407	Document doc = this.builder.newDocument();
1408	Element service = doc.createElement(GSXML.SERVICE_ELEM);
1409	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1410	attribute.setValue(serviceName);
1411	service.setAttributeNode(attribute);
1412
1413	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1414
1415	if(serviceName.toLowerCase().endsWith("retrieve")) {
1416	attribute.setValue(GSXML.SERVICE_TYPE_RETRIEVE);
1417	}
1418	else if(serviceName.toLowerCase().contains("browse")) {
1419	attribute.setValue(GSXML.SERVICE_TYPE_BROWSE);
1420
1421	// we need name and description <displayItem> elements
1422	Element displayItem
1423	= createNameValuePairElement(doc,
1424	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, "Browse");
1425	service.appendChild(displayItem);
1426
1427	displayItem = createNameValuePairElement(doc,
1428	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1429	"Browse pre-defined classification hierarchies");
1430	service.appendChild(displayItem);
1431
1432	// now need a classifierList
1433	Element classifierList = doc.createElement(
1434	GSXML.CLASSIFIER_ELEM+GSXML.LIST_MODIFIER);
1435
1436	int classifierNum = 1;
1437	// append a <classifier content="some letter" name="CL#">
1438	// for each letter of the alphabet:
1439	Element classifier = createClassifierElement(doc, "TitleByLetter",
1440	classifierNum++, "titles by letter", "Browse titles by letter");
1441	// now add this <classifier> to the <classifierList>
1442	classifierList.appendChild(classifier);
1443
1444	// ANY MORE CLASSIFIERS? ADD THEM HERE
1445
1446	service.appendChild(classifierList);
1447	} // ELSE check for whether it is a query service
1448	else if(serviceName.toLowerCase().contains("query")) {
1449	attribute.setValue(GSXML.SERVICE_TYPE_QUERY);
1450	if(serviceName.equals("TextQuery")) {
1451	describeTextQueryService(service);
1452	} else if(serviceName.equals("FieldQuery")) {
1453	describeFieldQueryService(service);
1454	}
1455	}
1456
1457	// don't forget to add the type attribute to the service!
1458	service.setAttributeNode(attribute);
1459
1460	String from = serviceName;
1461
1462	Element responseMsg = createResponseMessage(doc, service, null,
1463	GSXML.REQUEST_TYPE_DESCRIBE, from);
1464	try{
1465	return FedoraCommons.elementToString(responseMsg);
1466	}catch(TransformerException e) {
1467	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1468	+ " " + e;
1469	}
1470	}
1471
1472	/** Appends children to the parameter service Element that make the
1473	* final service Element into a describe response XML for FedoraGS3's
1474	* TextQuery service.
1475	* @param service is the service Element that is being filled out. */
1476	protected void describeTextQueryService(Element service) {
1477	Document doc = service.getOwnerDocument();
1478	// we need name, submit (button) and description <displayItem> elements
1479	Element displayItem = createNameValuePairElement(doc,
1480	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1481	"Text Search");
1482	service.appendChild(displayItem);
1483
1484	displayItem = createNameValuePairElement(doc,
1485	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1486	service.appendChild(displayItem);
1487
1488	displayItem = createNameValuePairElement(doc,
1489	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1490	"Title and full-text search service");
1491	service.appendChild(displayItem);
1492
1493	//create the <paramList>
1494	Element paramList = doc.createElement(
1495	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1496
1497	// we ignore granularity to search at: it will always be
1498	// document and section level
1499	// we ignore casefolding: always on (that is, case is irrelevant)
1500	// we ignore document display order: always ranked
1501
1502	// Constructing the following:
1503	// <param default="100" name="maxDocs" type="integer">
1504	// <displayItem name="name">Maximum hits to return</displayItem>
1505	// </param>
1506	Element param = doc.createElement(GSXML.PARAM_ELEM);
1507
1508	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1509	attribute.setValue(MAXDOCS);
1510	param.setAttributeNode(attribute);
1511
1512	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1513	attribute.setValue("100");
1514	param.setAttributeNode(attribute);
1515
1516	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1517	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1518	param.setAttributeNode(attribute);
1519
1520	displayItem = createNameValuePairElement(doc,
1521	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1522	"Maximum hits to return");
1523	param.appendChild(displayItem);
1524
1525	paramList.appendChild(param);
1526
1527	// Constructing the following:
1528	// <param name="query" type="string">
1529	// <displayItem name="name">Query string</displayItem>
1530	// </param>
1531	param = doc.createElement(GSXML.PARAM_ELEM);
1532
1533	attribute = doc.createAttribute(GSXML.NAME_ATT);
1534	attribute.setValue(QUERY);
1535	param.setAttributeNode(attribute);
1536
1537	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1538	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1539	param.setAttributeNode(attribute);
1540
1541	displayItem = createNameValuePairElement(doc,
1542	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1543	"Query string");
1544	param.appendChild(displayItem);
1545
1546	paramList.appendChild(param);
1547
1548	service.appendChild(paramList);
1549	}
1550
1551	/** Appends children to the parameter service Element that make the
1552	* final service Element into a describe response XML for FedoraGS3's
1553	* FieldQuery service.
1554	* @param service is the service Element that is being filled out. */
1555	protected void describeFieldQueryService(Element service) {
1556	Document doc = service.getOwnerDocument();
1557	// we need name, submit (button) and description <displayItem> elements
1558	Element displayItem = createNameValuePairElement(doc,
1559	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1560	"Form Search");
1561	service.appendChild(displayItem);
1562
1563	displayItem = createNameValuePairElement(doc,
1564	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_SUBMIT, "Search");
1565	service.appendChild(displayItem);
1566
1567	displayItem = createNameValuePairElement(doc,
1568	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_DESCRIPTION,
1569	"Simple fielded search");
1570	service.appendChild(displayItem);
1571
1572	//create the <paramList>
1573	Element paramList = doc.createElement(
1574	GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
1575
1576	// we ignore granularity to search at: it will always be
1577	// document and section level
1578	// we ignore casefolding: always on (that is, case is irrelevant)
1579	// we ignore document display order: always ranked
1580
1581	// Constructing the following:
1582	// <param default="100" name="maxDocs" type="integer">
1583	// <displayItem name="name">Maximum hits to return</displayItem>
1584	// </param>
1585	Element param = doc.createElement(GSXML.PARAM_ELEM);
1586
1587	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
1588	attribute.setValue(MAXDOCS);
1589	param.setAttributeNode(attribute);
1590
1591	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1592	attribute.setValue("100");
1593	param.setAttributeNode(attribute);
1594
1595	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1596	attribute.setValue(GSXML.PARAM_TYPE_INTEGER);
1597	param.setAttributeNode(attribute);
1598
1599	displayItem = createNameValuePairElement(doc,
1600	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1601	"Maximum hits to return");
1602	param.appendChild(displayItem);
1603
1604	paramList.appendChild(param);
1605
1606	// Constructing the following:
1607	// <param name="simpleField" occurs="4" type="multi">
1608	// <displayItem name="name"></displayItem>
1609	//
1610	// <param name="query" type="string">
1611	// <displayItem name="name">Word or phrase </displayItem>
1612	// </param>
1613	//
1614	// <param default="allFields" name="fieldname" type="enum_single">
1615	// <displayItem name="name">in field</displayItem>
1616	//
1617	// <option name="docTitles">
1618	// <displayItem name="name">document titles</displayItem>
1619	// </option>
1620	// <option name="allTitles">
1621	// <displayItem name="name">document and section titles</displayItem>
1622	// </option>
1623	// <option name="fullText">
1624	// <displayItem name="name">full text</displayItem>
1625	// </option>
1626	// <option name="all">
1627	// <displayItem name="name">titles and full text</displayItem>
1628	// </option>
1629	// <option name="">
1630	// <displayItem name="name"></displayItem>
1631	// </option>
1632	// </param>
1633	// </param>
1634	Element rowOfParams = doc.createElement(GSXML.PARAM_ELEM);
1635	attribute = doc.createAttribute(GSXML.NAME_ATT);
1636	attribute.setValue(SIMPLEFIELD_ATT);
1637	rowOfParams.setAttributeNode(attribute);
1638
1639	// we want the row of controls to occur multiple times
1640	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1641	attribute.setValue(GSXML.PARAM_TYPE_MULTI);
1642	rowOfParams.setAttributeNode(attribute);
1643
1644	attribute = doc.createAttribute(OCCURS_ATT);
1645	attribute.setValue("4"); // we want this row to occur 4 times
1646	rowOfParams.setAttributeNode(attribute);
1647
1648	// <param name="query" type="string">
1649	// <displayItem name="name">Word or phrase </displayItem>
1650	// </param>
1651	param = doc.createElement(GSXML.PARAM_ELEM);
1652
1653	attribute = doc.createAttribute(GSXML.NAME_ATT);
1654	attribute.setValue(QUERY);
1655	param.setAttributeNode(attribute);
1656
1657	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1658	attribute.setValue(GSXML.PARAM_TYPE_STRING);
1659	param.setAttributeNode(attribute);
1660
1661	displayItem = createNameValuePairElement(doc,
1662	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1663	"Word or phrase");
1664	param.appendChild(displayItem);
1665	rowOfParams.appendChild(param);
1666
1667	// <param default="allFields" name="fieldName" type="enum_single">
1668	// <displayItem name="name">in field</displayItem>
1669	param = doc.createElement(GSXML.PARAM_ELEM);
1670	attribute = doc.createAttribute(GSXML.NAME_ATT);
1671	attribute.setValue(FIELDNAME_ATT);
1672	param.setAttributeNode(attribute);
1673
1674	attribute = doc.createAttribute(GSXML.TYPE_ATT);
1675	attribute.setValue(GSXML.PARAM_TYPE_ENUM_SINGLE);
1676	param.setAttributeNode(attribute);
1677
1678	attribute = doc.createAttribute(GSXML.DEFAULT_ATT);
1679	attribute.setValue(ALL_FIELDS);
1680	param.setAttributeNode(attribute);
1681
1682	displayItem = createNameValuePairElement(doc,
1683	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1684	"in field");
1685	param.appendChild(displayItem);
1686
1687	String[] searchFieldNames
1688	= {ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT};
1689	String[] searchFieldDisplay = {"all titles and full-text",
1690	"document titles only", "document and section titles",
1691	"full-text only"};
1692
1693	// for each fieldName create an option element and insert
1694	// the option into the enum_multi drop-down param:
1695	// <option name="fieldName">
1696	// <displayItem name="name">fieldName</displayItem>
1697	// </option>
1698	for(int i = 0; i < searchFieldNames.length; i++) {
1699	Element option = doc.createElement(GSXML.PARAM_OPTION_ELEM);
1700	attribute = doc.createAttribute(GSXML.NAME_ATT);
1701	attribute.setValue(searchFieldNames[i]);
1702	option.setAttributeNode(attribute);
1703
1704	displayItem = createNameValuePairElement(doc,
1705	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME,
1706	searchFieldDisplay[i]);
1707	option.appendChild(displayItem);
1708	param.appendChild(option); // add option to the drop-down box
1709	}
1710
1711	rowOfParams.appendChild(param);
1712	paramList.appendChild(rowOfParams);
1713	service.appendChild(paramList);
1714	}
1715
1716	/**
1717	* @return a GS3 describe response message for the requested service
1718	* of the given collection. DocumentContent/Metadata/StructureRetrieve
1719	* return nothing special except their names; browse (and any query)
1720	* return more complex XML responses.
1721	* All collections in this Digital Library (Fedora Repository) share
1722	* the same services, so this method returns the same as
1723	* describeService(serviceName).
1724	* @param collectionName - the name of the collection whose service is to
1725	* be described. It will be converted to a fedora collection pid, which is of
1726	* the form "greenstone:<collectionName>-collection".
1727	* @param serviceName - the name of the service in the collection which is to
1728	* be described. */
1729	public String describeCollectionService(String collectionName,
1730	String serviceName) {
1731	// collectionName can be ignored, because all services are FedoraGS3
1732	// services and are not unique to any particular (greenstone) collection.
1733	return describeService(serviceName);
1734	}
1735
1736	/** This method performs the implemented browse operation: allowing the
1737	* user to browse the titles of documents in the given collection by letter
1738	* and returning the results.
1739	* @param collectionName is the name of the collection whose documents
1740	* starting with the given letter will be returned.
1741	* @param classifierIDs are the ids of the classifiers on which to browse. In
1742	* this case, the classifier indicates whether we browse titles by letter, or
1743	* browse (documents) by collection; and it is of the form <CL(letter)>.
1744	* @param structures - the requested browse substructure. Can be any combination
1745	* of ancestors, parent, siblings, children, descendants.
1746	* @param infos - the requested structural info. Can be numSiblings,
1747	* siblingPosition, numChildren.
1748	* @return a GS3 ClassifierBrowse response message which lists all
1749	* the documents that start with the letter indicated by parameter classifier.
1750	*/
1751	public String browse(String collectionName, String[] classifierIDs,
1752	String[] structures, String[] infos)
1753	{
1754	// Construct one string from the structures and structural info arrays
1755	String structure = "";
1756	String info = "";
1757	for(int i = 0; i < structures.length; i++) {
1758	structure = structure + structures[i] + "\|";
1759	}
1760	for(int i = 0; i < infos.length; i++) {
1761	info = info + infos[i] + "\|";
1762	}
1763
1764	Document doc = builder.newDocument();
1765	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1766
1767	// <classifierNodeList>
1768	Element classifierNodeList = doc.createElement(GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
1769
1770	for(int i = 0; i < classifierIDs.length; i++) {
1771	if(classifierIDs[i].startsWith("CL1")) { // browse by titles
1772	browseTitlesByLetterClassifier(doc, classifierNodeList,
1773	collectionName, classifierIDs[i],
1774	structure, info);
1775	}
1776	}
1777
1778	Element responseMsg = createResponseMessage(doc, classifierNodeList, ex,
1779	GSXML.REQUEST_TYPE_DESCRIBE, /collectionName+/ /"ClassifierBrowse");
1780	try {
1781	return FedoraCommons.elementToString(responseMsg);
1782	} catch(TransformerException e) {
1783	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
1784	+ " " + e;
1785	}
1786	}
1787
1788	/** CL1 browsing classifier: browsing titles by starting letter.
1789	* The browsing structure is retrieved.
1790	* @param doc - the document object that will contain the CL1 browsing structure.
1791	* @param classifierNodeList - the classifiers will be added to this nodeList.
1792	* @param collectionName - name of the collection through which we are browsing CL1.
1793	* @param classifierID - the ID of the (sub)classifier. Can be CL1, CL1.x, where x is
1794	* a letter.
1795	* @param structure - the requested browse substructure. Can be any combination of
1796	* ancestors, parent, siblings, children, descendants. 'siblings' not yet implemented.
1797	* @param info - the requested structural info. Can be numSiblings, siblingPosition,
1798	* numChildren.
1799	* @return the classifierNodeList with the CL1 classifier browse structure.
1800	*/
1801	public Element browseTitlesByLetterClassifier(Document doc, Element classifierNodeList,
1802	String collectionName, String classifierID,
1803	String structure, String info)
1804	{
1805	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1806
1807	if(structure.indexOf("entire") != -1) {
1808	structure = structure + "ancestors\|descendants";
1809	}
1810
1811	// Structure of ancestors and children only at this stage
1812	int firstLevel = classifierID.indexOf('.');
1813	int secondLevel = classifierID.lastIndexOf('.');
1814
1815	// <nodeStructure>
1816	Element nodeStructure = doc.createElement(GSXML.NODE_STRUCTURE_ELEM);
1817
1818	// requested classifier node
1819	Element classNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1820	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1821	attribute.setValue(classifierID);
1822	classNode.setAttributeNode(attribute);
1823	Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1824	typeAttribute.setValue(GSXML.VLIST);
1825	classNode.setAttributeNode(typeAttribute);
1826
1827	if(firstLevel == -1) { // CL1 - toplevel node
1828	Element root = (Element)classNode.cloneNode(true); // clone the node before appending children
1829
1830	classifierNodeList.appendChild(classNode);
1831	classNode.appendChild(nodeStructure);
1832
1833	nodeStructure.appendChild(root);
1834	if(structure.indexOf("descendants") != -1) {
1835	getTitlesByLetterStructure(collectionName, root, classifierID, true, null);
1836	} else if(structure.indexOf("children") != -1) {
1837	getTitlesByLetterStructure(collectionName, root, classifierID, false, null);
1838	}
1839	// nothing to be done for siblings
1840	}
1841	else if(firstLevel == secondLevel) { // CL1.x, where x is a number
1842
1843	if(structure.indexOf("parent") != -1
1844	\|\| structure.indexOf("ancestors") != -1
1845	\|\| structure.indexOf("siblings") != -1) {
1846	String toplevelID = classifierID.substring(0, firstLevel);
1847	Element toplevelNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
1848	attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1849	attribute.setValue(toplevelID);
1850	toplevelNode.setAttributeNode(attribute);
1851	typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1852	typeAttribute.setValue(GSXML.VLIST);
1853	toplevelNode.setAttributeNode(typeAttribute);
1854	Element node = (Element)toplevelNode.cloneNode(true); // clone nodes before appending children
1855
1856	classifierNodeList.appendChild(toplevelNode);
1857	toplevelNode.appendChild(nodeStructure);
1858	nodeStructure.appendChild(node);
1859
1860	if(structure.indexOf("siblings") != -1) { // get the children of the parents too
1861	getTitlesByLetterStructure(collectionName, node, toplevelID, false, classNode);
1862	// pass the requested node (classNode) so that it is attached in the correct
1863	// location among its siblings, and to ensure that it is not recreated.
1864	// getTitlesByLetterStructure() will append classNode to node
1865	} else {
1866	node.appendChild(classNode);
1867	}
1868	} else {
1869	Element node = (Element)classNode.cloneNode(true);
1870	classifierNodeList.appendChild(node);
1871	node.appendChild(nodeStructure);
1872	nodeStructure.appendChild(classNode);
1873	}
1874
1875	int num = Integer.parseInt(classifierID.substring(firstLevel+1)); // get x from CL1.x
1876	char ch = (char)(num - 1 + 'A');
1877	if(structure.indexOf("descendants") != -1) {
1878	getTitlesForLetter(ch, collectionName, classNode, "descendants");
1879	} else if(structure.indexOf("children") != -1) {
1880	getTitlesForLetter(ch, collectionName, classNode, "children");
1881	}
1882	}
1883	else { // ought to be a doc structure retrieve request, not classifierbrowse structure retrieve
1884	LOG.error("ClassifierID: " + classifierID + ". Shouldn't be in browse method");
1885	}
1886
1887	return classifierNodeList;
1888	}
1889
1890	/** Creates a (CL1) subclassifier element for the docs whose titles start with
1891	* the given letter.
1892	* @param ch - the starting letter of the document titles to retrieve.
1893	* @param collectionName - name of the collection through which we are browsing CL1.
1894	* @param classifierNode - the docNodes found will be appended to this node.
1895	* @param depthStructure - can be descendants or children. Specifies what to retrieve:
1896	* gets descendants of any documents found, otherwise gets just the children.
1897	* @return the given classifierNode which will have the child (or descendant) documents
1898	* appended to it.
1899	*/
1900	public Element getTitlesForLetter(char ch, String collectionName,
1901	Element classifierNode, String depthStructure)
1902	{
1903	Document doc = classifierNode.getOwnerDocument();
1904	FedoraGS3RunException ex = null; //any RemoteException or UnsupportedEncodingException
1905
1906
1907	// Retrieve the document structure for each subClassifierID:
1908	// all the documents that begin with its letter.
1909	String letter = String.valueOf(ch);
1910	try {
1911	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1912	if(docPIDs.length == 0) {
1913	return classifierNode; // skip letters that don't have any kids
1914	}
1915
1916	for(int i = 0; i < docPIDs.length; i++) {
1917	// work out the document's fedora PID and section ID
1918	String sectionID = getSectionIDFromDocID(docPIDs[i]);
1919	String docPID = getDocPIDFromDocID(docPIDs[i]);
1920
1921	// get the required section, along with children or descendants
1922	Element section = getSectionStructureXML(docPID, sectionID, depthStructure, "");
1923
1924	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
1925	Element docRootNode = createDocNodeFromSubsection(doc, section, docPID);
1926
1927	// fills in the subtree of the rootNode in our nodeStructure element
1928	createDocStructure(doc, section, docRootNode, docPID); //where section represents the root section
1929	classifierNode.appendChild(docRootNode);
1930	}
1931	} catch(Exception e) {
1932	ex = new FedoraGS3RunException(e);
1933	ex.setSpecifics("requested portion of TOC file or trouble with fielded search ");
1934	}
1935
1936	return classifierNode;
1937	}
1938
1939
1940	/** Creates all the subclassifiers (CL1.x) for CL1, the classifier to browse by the
1941	* starting letter of the alphabet. X is each letter of the alphabet for which there
1942	* are matching document titles.
1943	* @param collectionName - name of the collection through which we are browsing CL1.
1944	* @param classifierNode - the docNodes found will be appended to this node.
1945	* @param classifierID - the ID of parent classifier, i.e. CL1, which is used to create
1946	* the IDs for the subclassifiers (CL.x).
1947	* @param getDescendants - if true, get descendants of any documents found, otherwise
1948	* get just the children.
1949	* @param wantedSibling - the node (already created) whose siblings are requested. We
1950	* need to make sure not to recreate this node when creating its sibling nodes.
1951	* @return the given classifierNode, with the CL.x subclassifiers for the letters of
1952	* the alphabet that are represented in the document titles.
1953	*/
1954	public Element getTitlesByLetterStructure(String collectionName, Element classifierNode,
1955	String classifierID, boolean getDescendants,
1956	Element wantedSibling)
1957	{
1958	String ID = "";
1959	if(wantedSibling != null) { // the pre-created classifier node whose siblings were requested
1960	ID = wantedSibling.getAttribute(GSXML.NODE_ID_ATT);
1961	}
1962
1963	Document doc = classifierNode.getOwnerDocument();
1964	FedoraGS3RunException ex = null; // any RemoteException or UnsupportedEncodingException
1965
1966	// We're going to loop to the end of the alphabet
1967	int count = 1;
1968	for(char ch = 'A'; ch <= 'Z'; ch++, count++) {
1969	// Retrieve the document structure for each subClassifierID:
1970	// all the documents that begin with its letter.
1971	String letter = String.valueOf(ch);
1972	try {
1973	String[] docPIDs = this.browseTitlesByLetter(collectionName, letter);
1974	if(docPIDs.length == 0) {
1975	continue; // skip letters that don't have any kids
1976	}
1977	Element subClassifier = null;
1978	if(wantedSibling != null && ID.equals(classifierID+"."+count)) {
1979	// already have the requested node, don't recreate it
1980	subClassifier = wantedSibling;
1981	} else {
1982	// <classifierNode childType="VList" nodeID="CL1.x">
1983	subClassifier = doc.createElement(GSXML.CLASS_NODE_ELEM);
1984	Attr typeAttribute = doc.createAttribute(GSXML.CHILD_TYPE_ATT);
1985	typeAttribute.setValue(GSXML.VLIST);
1986	subClassifier.setAttributeNode(typeAttribute);
1987	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
1988	attribute.setValue(classifierID+"."+count);
1989	subClassifier.setAttributeNode(attribute);
1990	}
1991	classifierNode.appendChild(subClassifier); // either way, append the subClassifier node
1992
1993	if(getDescendants) { // get the documents
1994
1995	// append the <docNodes> for the docPIDs found as children
1996	// of subclassifier
1997
1998	for(int i = 0; i < docPIDs.length; i++) {
1999	// work out the document's fedora PID and section ID
2000	String sectionID = getSectionIDFromDocID(docPIDs[i]);
2001	String docPID = getDocPIDFromDocID(docPIDs[i]);
2002
2003	// get the required section, along with children or descendants
2004	Element section = getSectionStructureXML(docPID, sectionID, "descendants", "");
2005
2006	// <documentNode nodeID="docID" docType="hierarchy" nodeType="root">
2007	Element rootNode = createDocNodeFromSubsection(doc, section, docPID);
2008
2009	// fills in the subtree of the rootNode in our nodeStructure element
2010	createDocStructure(doc, section, rootNode, docPID); //where section represents the root section
2011	subClassifier.appendChild(rootNode);
2012	}
2013	}
2014	} catch(Exception e) {
2015	ex = new FedoraGS3RunException(e);
2016	ex.setSpecifics("requested portion of TOC file or "
2017	+ "trouble with fielded search ");
2018	}
2019	}
2020	return classifierNode;
2021	}
2022
2023
2024	/** This method performs something equivalent to a greenstone3
2025	* ClassifierBrowseMetadataRetrieve on the classifierNodeIDs
2026	* @param classNodeIDs array of classifierNode IDs for which the metadata
2027	* needs to be returned.
2028	* @param metafields are the classifier metadata fields that are to be returned.
2029	* At present this method ignores them/pretends the requested metafields are
2030	* "all" and always returns the Title meta for the requested classifier nodes
2031	* (because that is all the metadata this Fedora classifier has at present).
2032	* @return a GS3 ClassifierBrowseMetadataRetrieve response message which
2033	* lists the metadata for all the classifierNodes passed as parameter.*/
2034	public String browseMetadataRetrieve(String[] classNodeIDs, String[] metafields)
2035	{
2036	Document doc = this.builder.newDocument();
2037	// <classifierNodeList>
2038	Element classifierNodeList = doc.createElement(
2039	GSXML.CLASS_NODE_ELEM+GSXML.LIST_MODIFIER);
2040
2041	// create <classifierNode><metadataList><metadata>s
2042	// </metadataList></classifierNode> for all letters of the alphabet
2043	for(int i = 0; i < classNodeIDs.length; i++) {
2044	// strip ID of everything before the first '.' (i.e. remove "CL#.")
2045	int index = classNodeIDs[i].indexOf('.');
2046	String subClassifierNumber = classNodeIDs[i].substring(index+1);
2047	index = subClassifierNumber.indexOf('.'); // find next decimal point, if any
2048	if(index != -1) {
2049	subClassifierNumber = subClassifierNumber.substring(0, index);
2050	}
2051	int subClassifierNum = Integer.parseInt(subClassifierNumber);
2052	String classifierName = "";
2053	if(subClassifierNum == 0) { // no document titles started with a letter
2054	classifierName = "A-Z";
2055	} else {
2056	char letter = (char)('A' + subClassifierNum - 1); // A = 1
2057	classifierName = String.valueOf(letter);
2058	}
2059
2060	// <classifierNode nodeID="CL#.subNum">
2061	Element classifierNode = doc.createElement(GSXML.CLASS_NODE_ELEM);
2062	Attr attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2063	attribute.setValue(classNodeIDs[i]);
2064	classifierNode.setAttributeNode(attribute);
2065
2066	// <metadataList>
2067	Element metadataList = doc.createElement(
2068	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2069
2070	// at least one metadata element: that of the title of this
2071	// classifierNode:
2072	// <metadata name="Title">letter</metadata>
2073	Element metadata = this.createNameValuePairElement(doc,
2074	GSXML.METADATA_ELEM, "Title", classifierName);
2075
2076	// now connect up everything
2077	metadataList.appendChild(metadata);
2078	classifierNode.appendChild(metadataList);
2079	classifierNodeList.appendChild(classifierNode);
2080	}
2081
2082	Element responseMsg = createResponseMessage(doc, classifierNodeList, null,
2083	GSXML.REQUEST_TYPE_PROCESS, //collName +
2084	"ClassifierBrowseMetadataRetrieve");
2085	try{
2086	return FedoraCommons.elementToString(responseMsg);
2087	}catch(TransformerException e) {
2088	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2089	+ " " + e;
2090	}
2091	}
2092
2093	/** @return a newly created element of the following format:
2094	* <classifier content="somecontent" name="CL+num">
2095	* <displayItem name="name">someClassifierName</displayItem>
2096	* <displayItem name="description">Browse by classifier name</displayItem>
2097	* </classifier>
2098	* @param doc - the document used to create the element
2099	* @param content - value of the content attribute
2100	* @param classifierNum - the number suffixed to the CL, together forming
2101	* the classifier Node's ID
2102	* @param displayNameVal is the bodytext of a named displayItem element
2103	* @param displayDescrVal is the bodytext of a displayItem element with
2104	* description */
2105	protected Element createClassifierElement(Document doc, String content,
2106	int classifierNum, String displayNameVal, String displayDescrVal)
2107	{
2108	final String CL = "CL";
2109	Element classifier = doc.createElement(GSXML.CLASSIFIER_ELEM);
2110	// content attribute
2111	Attr att = doc.createAttribute(GSXML.CLASSIFIER_CONTENT_ATT);
2112	att.setValue(content);
2113	classifier.setAttributeNode(att);
2114	// name attribute
2115	att = doc.createAttribute(GSXML.NAME_ATT);
2116	att.setValue(CL + classifierNum);
2117	classifier.setAttributeNode(att);
2118
2119	// now create the displayItem children for classifier:
2120	// <displayItem name="name">#letter</displayItem>
2121	// <displayItem name="description">Browse titles starting with #letter</displayItem>
2122	Element displayItem = createNameValuePairElement(doc,
2123	GSXML.DISPLAY_TEXT_ELEM, GSXML.DISPLAY_TEXT_NAME, displayNameVal);
2124	classifier.appendChild(displayItem);
2125	displayItem = createNameValuePairElement(doc, GSXML.DISPLAY_TEXT_ELEM,
2126	GSXML.DISPLAY_TEXT_DESCRIPTION, displayDescrVal);
2127	classifier.appendChild(displayItem);
2128
2129	return classifier;
2130	}
2131
2132
2133	/** @return a newly created element of the following format:
2134	* <elementName name="somename">"some display value"</elementName>
2135	* @param doc - the document used to create the element
2136	* @param elementName - the tag name
2137	* @param name - value of attribute name
2138	* @param value - the body text of the element */
2139	protected Element createNameValuePairElement(Document doc, String elementName,
2140	String name, String value) {
2141	// <elementName name="somename">"some display value"</elementName>
2142	Element element = doc.createElement(elementName);
2143	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2144	attribute.setValue(name);
2145	element.setAttributeNode(attribute);
2146
2147	element.appendChild(doc.createTextNode(value));
2148	return element;
2149	}
2150
2151	/**
2152	* @param collection is the collection to search in
2153	* @param query is the query term to search for. It won't specify the
2154	* indexed field to search in, which will mean that GSearch will
2155	* search all default indexed fields.
2156	* @param maxDocs is the maximum number of results to return (which
2157	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2158	*/
2159	public String[] textQuery(String collection, String query,
2160	int maxDocs)
2161	throws Exception
2162	{
2163	// no need to search there is no query or query is empty spaces
2164	if(query.trim().equals(""))
2165	return new String[]{};
2166
2167	// QUERY value won't specify indexed field to search, Fedora
2168	// Gsearch will take that as meaning all default indexed fields.
2169	// Params to search() method below: string of fielded query terms;
2170	// hitpageStart, hitpageEnd, snippetsMax (leave that 0)
2171	query = query + " " + "PID" + COLON + GREENSTONE;
2172
2173	String searchResult = this.fedoraGSearch.search(query, 1, maxDocs, 0);
2174	// now we have the XML returned by FedoraGSearch, get the pids
2175	// of the documents returned (if any)
2176	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2177	collection, searchResult);
2178	return pids;
2179	}
2180
2181	/**
2182	* This method performs a fieldquery, searching for x number of phrases
2183	* in each of the 4 indexed fields.
2184	* @param collection is the collection to search in
2185	* @param nameValParamsMap is a Map of several(key, value) entries,
2186	* 4 of which we're concerned with here:
2187	* - the keys are ALL_FIELDS, DOC_TITLES, ALL_TITLES, FULLTEXT
2188	* - the values are a comma separated list of terms (phrases or single
2189	* words) to search that field in. There may be more than 1 or
2190	* there may be none (in which case there may be N empty values or
2191	* spaces separated by commas).
2192	* @param maxDocs is the maximum number of results to return (which
2193	* at present we consider equivalent to FedoraGSearch's hitpageSize).
2194	* */
2195	public String[] fieldQuery(String collection, Map nameValParamsMap,
2196	int maxDocs)
2197	throws Exception
2198	{
2199	// we're going to maintain a list of UNIQUE pids that were returned
2200	// in search results. Hence we use Set:
2201	java.util.Set set = new java.util.HashSet();
2202
2203	// (1) Use Fedora's search to search document titles, if they were
2204	// specified:
2205	String[] docTitlepids = {};
2206
2207	String docTitleTerms = (String)nameValParamsMap.get(DOC_TITLES);
2208	if(docTitleTerms != null) { // no doc titles may have been specified
2209	String[] phrases = docTitleTerms.split(COMMA);
2210
2211	// search the individual phrases first:
2212	for(int i = 0; i < phrases.length; i++) {
2213	if(phrases.equals("") \|\| phrases.equals(" "))
2214	continue; //skip when there are no terms
2215	docTitlepids = this.searchDocumentTitles(
2216	collection, phrases[i], false);
2217	for(int j = 0; j < docTitlepids.length; j++)
2218	set.add(docTitlepids[j]);
2219	}
2220	}
2221	// (2) use FedoraGSearch to search doc AND section titles, and
2222	// fulltext (in case these were specified in nameValParamsMap):
2223	String searchResult = this.fedoraGSearch.search(
2224	nameValParamsMap, 1, maxDocs);
2225
2226	String[] pids = this.fedoraGSearch.getPIDsFromSearchResult(
2227	collection, searchResult);
2228
2229	for(int i = 0; i < pids.length; i++)
2230	set.add(pids[i]);
2231
2232	pids = null;
2233	pids = new String[set.size()];
2234	set.toArray(pids); // unique pids
2235	return pids;
2236	}
2237
2238	/** @return a String representing Greenstone3 XML for a query process
2239	* response returning the results for the query denoted by parameter
2240	* nameValParamsMap.
2241	* @param nameValParamsMap is a Hashmap of name and value pairs for all the
2242	* query field data values. The names match the field names that
2243	* describeCollectionService() would have returned for the query service.
2244	* @param collection is the name of the collection
2245	* @param service is the name of the query service
2246	* This method is only ever called when any of the services in the digital
2247	* library described themselves as type=query. Therefore any digital
2248	* libraries that have no query services, can just return emtpy message
2249	* strings (or even "") since this method will never be called on them
2250	* anyway. */
2251	public String query(String collection, String service,
2252	Map nameValParamsMap)
2253	{
2254	FedoraGS3RunException ex = null;
2255	// (1) obtain the requested number of maximum result documents
2256	int maxDocs = 100;
2257	try{
2258	maxDocs = Integer.parseInt((String)nameValParamsMap.get(MAXDOCS));
2259	} catch(NumberFormatException e) {
2260	maxDocs = 100;
2261	}
2262
2263	String pids[] = {};
2264	// (2) for Textquery, we simply search ALL_FIELDS using FedoraGSearch
2265	if(service.endsWith("TextQuery")) {
2266	try {
2267	// get the Query field:
2268	String query = (String)nameValParamsMap.get(QUERY);
2269	pids = textQuery(collection, query, maxDocs);
2270	}
2271	catch(Exception e) {
2272	LOG.error("Error in TextQuery processing: " + e);
2273	ex = new FedoraGS3RunException(
2274	"When trying to use FedoraGenericSearch for a TextQuery", e);
2275
2276	}
2277	} else { // (3) FieldQuery
2278	// first get the comma-separated lists
2279	String listOfFieldNames = (String)nameValParamsMap.get(FIELDNAME_ATT);
2280	String listOfSearchTerms = (String)nameValParamsMap.get(QUERY);
2281	// both are comma separated lists, so split both on 'comma'
2282	String[] fieldNames = listOfFieldNames.split(COMMA);
2283	String[] searchTerms = listOfSearchTerms.split(COMMA);
2284
2285	// In the fieldNames and searchTerms lists of nameValParamsMap,
2286	// each searchTerm element was matched with its correspondingly
2287	// indexed fieldName.
2288	// A new map is going to reorganise this, by putting all terms
2289	// for a particular fieldName together in a comma separated list
2290	// and associating that with the fieldName. I.e. (key, value) ->
2291	// (fieldName, comma-separated list of all terms in that field)
2292	Map map = new HashMap();
2293	for(int i = 0; i < searchTerms.length; i++) {
2294	// there may be fewer searchTerms than fieldNames (since some
2295	// fieldNames may have been left empty), so loop on searchTerms
2296	if(map.containsKey(fieldNames[i])) { // fieldName is already
2297	// in the list, so append comma with new value
2298	String termsList = (String)map.get(fieldNames[i]);
2299	termsList = termsList + COMMA + searchTerms[i];
2300	map.put(fieldNames[i], termsList);
2301	} else { // this is the first time this fieldName occurred
2302	// just put the fieldName with searchTerm as-is
2303	map.put(fieldNames[i], searchTerms[i]);
2304	}
2305	}
2306
2307	try {
2308	// For fieldquery, we search on all the fieldNames specified
2309	// - if DOC_TITLES is specified then we use Fedora's search
2310	// - for all other fieldNames specified, we use FedoraGSearch
2311	pids = fieldQuery(collection, map, maxDocs);
2312	}
2313	catch(Exception e) {
2314	LOG.error("Error in FieldQuery processing: " + e);
2315	ex = new FedoraGS3RunException(
2316	"When trying to use FedoraGenericSearch for a FieldQuery", e);
2317	}
2318	}
2319
2320	// Build Greenstone XML Query response message for from
2321	// the pids (which should be document identifiers)
2322	Document doc = builder.newDocument();
2323	// <metadataList><metadata name="numDocsMatched" value="n" />
2324	// </metadataList>
2325	Element metadataList = doc.createElement(
2326	GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
2327	Element metadata = doc.createElement(GSXML.METADATA_ELEM);
2328
2329	Attr attribute = doc.createAttribute(GSXML.NAME_ATT);
2330	attribute.setValue(NUM_DOCS_MATCHED);
2331	metadata.setAttributeNode(attribute);
2332
2333	attribute = doc.createAttribute(GSXML.VALUE_ATT);
2334	attribute.setValue(Integer.toString(pids.length));
2335	metadata.setAttributeNode(attribute);
2336
2337	metadataList.appendChild(metadata);
2338
2339	// <documentNodeList>
2340	// <documentNode nodeID="HASHac0a04dd14571c60d7fbfd.4.2"
2341	// docType='hierarchy' nodeType="leaf" />
2342	// ...
2343	// ...
2344	// </documentNodeList>
2345	Element docNodeList = doc.createElement(
2346	GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
2347	// for each
2348	for(int i = 0; i < pids.length; i++) {
2349	Element docNode = doc.createElement(GSXML.DOC_NODE_ELEM);
2350	attribute = doc.createAttribute(GSXML.NODE_ID_ATT);
2351	attribute.setValue(pids[i]);
2352	docNode.setAttributeNode(attribute);
2353
2354	attribute = doc.createAttribute(GSXML.DOC_TYPE_ATT);
2355	attribute.setValue("hierarchy");
2356	docNode.setAttributeNode(attribute);
2357
2358	attribute = doc.createAttribute(GSXML.NODE_TYPE_ATT);
2359	attribute.setValue("root");
2360	docNode.setAttributeNode(attribute);
2361	docNodeList.appendChild(docNode);
2362	}
2363
2364	Element responseMsg = createResponseMessage(doc, docNodeList, ex,
2365	GSXML.REQUEST_TYPE_PROCESS, service);
2366	try{
2367	return FedoraCommons.elementToString(responseMsg);
2368	}catch(TransformerException e) {
2369	return FedoraGS3RunException.xmlToStringConversionFailureResponseMsg
2370	+ " " + e;
2371	}
2372	}
2373
2374
2375	// FOR NOW, add the new method that converts URLs to document identifiers(PIDs)
2376	/** Given a URL that represents a fedoraPID, will look up the object.
2377	* If it exists, it will return the contents of the DC:Title of its datastream.
2378	* If it doesn't exist, it will return the URL as-is.
2379	* @param URL: the URL that (after modification) represents a fedoraPID to look up.
2380	* @param collection: the name of collection in which to search for the URL
2381	* representing a fedoraPID.
2382	* @return the string (representing a fedoraPID) stored in the DC:Title of the
2383	* URL-fedoraPID. If the URL-fedoraPID is not an object in the given collection,
2384	* then the parameter URL is returned.
2385	*/
2386	public String getPIDforURL(String url, String collection) {
2387	FedoraGS3RunException ex = null; // any RemoteException
2388
2389	// (1) convert url to the fedorapid
2390	// / -> _ and : -> -
2391	String fedoraPID = url.replaceAll("/", "_");
2392	fedoraPID = fedoraPID.replaceAll(":", "-");
2393	// prefix "greenstone-http:<colname>-" to the fedoraPID
2394	fedoraPID = GREENSTONE+_HTTP+COLON+collection+HYPHEN+fedoraPID;
2395	//LOG.error("### fedoraPID: " + fedoraPID);
2396
2397	// (2) Look up the datastream for the fedorapid
2398	String dcTitle = "";
2399	try {
2400	dcTitle = getDCTitle(fedoraPID);
2401	} catch(Exception e) {
2402	LOG.error("Error retrieving dcTitle for PID " + fedoraPID + ": " + e);
2403	ex = new FedoraGS3RunException("When trying to retrieve dc:title for URL: " + url, e);
2404	}
2405	//String dc = this.getDC(fedoraPID);
2406	//LOG.error("### document ID (in dcTitle) found is: " + dcTitle);
2407
2408	// (3) if fedorapid exists, extract the dc:title content.
2409	// if it doesn't exist, return url
2410	if(dcTitle.equals("")) {
2411	return url;
2412	} else {
2413	// It represents a fedoraPID of its own, so prefix fedora namespace and return it.
2414	//return GREENSTONE+COLON+collection+HYPHEN+dcTitle; // NO. Handled in g2f-buildcol.pl
2415	return dcTitle+"-1";
2416	}
2417	}
2418
2419	public static void main(String args[]) {
2420	try{
2421	// testing default constructor
2422	//FedoraGS3Connection con = new FedoraGS3Connection();
2423
2424	// testing constructor that takes properties file to show initial
2425	// fedora server values
2426	java.io.File propertyFilename
2427	= new java.io.File("fedoraGS3.properties");
2428	FedoraGS3Connection con = new FedoraGS3Connection(propertyFilename);
2429
2430	// DESCRIBE: serviceList, collectionList
2431	System.out.println("serviceList:\n" + con.getServiceList());
2432
2433	System.out.println("collectionList:\n" + con.getCollectionList());
2434
2435	String[] colPIDs = con.getCollections();
2436	String[] collectionNames = con.getCollectionNames(con.getCollections());
2437
2438
2439	for(int i = 0; i < collectionNames.length; i++) {
2440	System.out.println("Describing collections:\n");
2441	System.out.println(con.describeCollection(collectionNames[i]));
2442	System.out.println("Describing collection services:\n"
2443	+ con.describeCollectionServices(collectionNames[i]));
2444	}
2445
2446	String[] serviceNames = con.getServiceNames();
2447	for(int i = 0; i < serviceNames.length; i++) {
2448	System.out.println("Describing " + serviceNames[i] + ":\n"
2449	+ con.describeCollectionService("demo", serviceNames[i]));
2450	}
2451
2452
2453	// TRYING OUT SPECIAL top-level document metadata retrieval (DLS, DC)
2454	// along with EX of the top-level document:
2455	System.out.println("\nGET META for greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae:");
2456	System.out.println(con.getDocumentMetadata(new String[]{"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae"}, new String[]{"all"}));
2457
2458
2459	String[] docIDs = con.getCollectionDocs(colPIDs[0]);
2460	System.out.println("\nGET CONTENT:");
2461	for(int i = 0; i < docIDs.length; i++) {
2462	System.out.println(con.getContent(docIDs[i]));
2463	}
2464
2465	System.out.println("\nGET META:");
2466	for(int i = 0; i < docIDs.length; i++) {
2467	System.out.println(con.getDocumentMetadata(docIDs[i], new String[]{"all"}));
2468	}
2469
2470	String[] getTitlesFor = {
2471	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae",
2472	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b",
2473	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1",
2474	"greenstone:gs2mgdemo-HASH01d667303fe98545f03c14ae-1.7",
2475	"greenstone:gs2mgdemo-HASHa568bac1d8d7bd12a0938b-1.5.1"
2476	};
2477
2478	// first let's display the regular meta for top-level docs and
2479	// their sections
2480	for(int i = 0; i < getTitlesFor.length; i++) {
2481	System.out.println(con.getDocumentMetadata(getTitlesFor[i], new String[]{"all"}));
2482	}
2483
2484	System.out.println("\nTitles are:");
2485	System.out.println(con.getTitleMetadata(getTitlesFor));
2486
2487	System.out.println("\nGET STRUCTURE:");
2488	for(int i = 0; i < docIDs.length; i++) {
2489	System.out.println("Descendents and numChildren:\n"
2490	+ con.getDocumentStructure(docIDs[i], new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2491	System.out.println("Parent and numSiblings:\n"
2492	+ con.getDocumentStructure(docIDs[i], new String[] {"parent"}, new String[] {AbstractBasicDocument.INFO_NUM_SIBS}));
2493	}
2494
2495	// TEST ERROR CASES:
2496	System.out.println("\nTESTING ERROR CASES");
2497	System.out.println(con.getContent("greenstone:demo-pinky"));
2498	String[] errorCases = { "greenstone:demo-HASH23d1019b589e2ef6a680e3-1.5.1.5",
2499	"greenstone:demo-pinky" };
2500	System.out.println(con.getContent(errorCases));
2501	System.out.println(con.getDocumentMetadata(errorCases, new String[]{"all"}));
2502	System.out.println(con.getDocumentStructure(errorCases, new String[] {"descendants"}, new String[] {AbstractBasicDocument.INFO_NUM_CHILDREN}));
2503
2504	System.out.println("\nCLASSIFIER BROWSE");
2505	System.out.println(con.browse("gs2mgdemo", //"ClassifierBrowse",
2506	new String[]{"CL1"}, new String[] {""}, new String[] {""}));
2507
2508	System.out.println("\nCLASSIFIER BROWSE METADATA RETRIEVE");
2509	String[] classNodeIDs = new String[26];
2510	for(int i = 0; i < classNodeIDs.length; i++) {
2511	int subClassifierNum = i + 1;
2512	classNodeIDs[i] = "CL1." + subClassifierNum;
2513	}
2514	System.out.println(con.browseMetadataRetrieve(//"gs2mgdemo",
2515	classNodeIDs, new String[]{"all"}));
2516
2517	System.out.println("Testing query services");
2518	System.out.println("TEXT QUERY:");
2519	Map formControlValsMap = new HashMap();
2520	formControlValsMap.put(MAXDOCS, "100");
2521	formControlValsMap.put(QUERY, "snails");
2522	String searchResponse
2523	= con.query("gs2mgdemo", "TextQuery", formControlValsMap);
2524	System.out.println(searchResponse);
2525
2526	System.out.println("FIELD QUERY:");
2527	formControlValsMap.clear();
2528	formControlValsMap.put(MAXDOCS, "100");
2529	formControlValsMap.put(QUERY, "interview,Gender equality,cyclone");
2530	formControlValsMap.put(FIELDNAME_ATT,
2531	"allFields,docTitles,allFields,allFields");
2532	searchResponse
2533	= con.query("gs2mgdemo", "FieldQuery", formControlValsMap);
2534	System.out.println(searchResponse);
2535
2536	System.exit(0);
2537	}catch(Exception e) {
2538	JOptionPane.showMessageDialog(
2539	null, e, "Error", JOptionPane.ERROR_MESSAGE);
2540	//System.err.println("ERROR: " + e);
2541	e.printStackTrace();
2542	}
2543	}
2544	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: