Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 31776

Last change on this file since 31776 was 31776, checked in by ak19, 7 years ago
Kathy described a problem on the mailing list about the AutoLoadConverters msg appearing before XML content when connecting with a client-GLI to a remote GS3, which breaks parsing of the XML. (I found it was present in my GS3 installation from 4 May 2016.) I've narrowed it down to running client-gli with the debug flag turned on, which seemed related to the same problem I'd seen when running gli -debug after the change to SafeProcess in Plugins.java, which I fixed by removing lines preceding XML content before parsing the XML. For client-gli, SafeProcess isn't used which is also why the problem with client-gli is much older, but I'm now using a common and existing solution for both: doing what Plugins.java used to do before the change to SafeProcess, which is call XMLTools.readXMLStream(), which would parse out content before XML. The RemoteGreenstoneServer should only call this method if it actually has some XML content it's dealing with. Could have solved this in RemoteGreenstoneServerAction.java's GetScriptOptions, but am solving it in RemoteGreenstoneServer's sendCommandToServerInternal, since there may be many Actions returning XML, not just GetScriptOptions.
Property svn:keywords set to `Author Date Id Revision`
File size: 33.9 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15	import java.io.StringWriter; // for elementToString()
16
17	// SAX
18	import org.xml.sax.XMLReader;
19	import org.xml.sax.SAXException;
20	import org.xml.sax.SAXParseException;
21	import org.xml.sax.helpers.DefaultHandler;
22	import org.xml.sax.InputSource;
23
24	// JAXP
25	import javax.xml.parsers.DocumentBuilder;
26	import javax.xml.parsers.DocumentBuilderFactory;
27	import javax.xml.parsers.FactoryConfigurationError;
28	import javax.xml.parsers.ParserConfigurationException;
29	import javax.xml.parsers.SAXParser;
30	import javax.xml.parsers.SAXParserFactory;
31	// for elementToString():
32	import javax.xml.transform.OutputKeys;
33	import javax.xml.transform.Transformer;
34	import javax.xml.transform.TransformerFactory;
35	import javax.xml.transform.dom.DOMSource;
36	import javax.xml.transform.stream.StreamResult;
37
38
39	/** This class is a static class containing useful XML functions */
40	public class XMLTools
41	{
42	/** extracts the text out of a node */
43	public static Node getNodeTextNode(Element param)
44	{
45	param.normalize();
46	Node n = param.getFirstChild();
47	while (n != null && n.getNodeType() != Node.TEXT_NODE)
48	{
49	n = n.getNextSibling();
50	}
51	return n;
52	}
53
54	/** extracts the text out of a node */
55	public static String getNodeText(Element param)
56	{
57	Node text_node = getNodeTextNode(param);
58	if (text_node == null)
59	{
60	return "";
61	}
62	return text_node.getNodeValue();
63	}
64
65	public static void setNodeText(Element elem, String text)
66	{
67	Node old_text_node = getNodeTextNode(elem);
68	if (old_text_node != null)
69	{
70	elem.removeChild(old_text_node);
71	}
72	Text t = elem.getOwnerDocument().createTextNode(text);
73	elem.appendChild(t);
74	}
75
76	/** returns the (first) child element with the given name */
77	public static Node getChildByTagName(Node n, String name)
78	{
79
80	Node child = n.getFirstChild();
81	while (child != null)
82	{
83	if (child.getNodeName().equals(name))
84	{
85	return child;
86	}
87	child = child.getNextSibling();
88	}
89	return null; //not found
90	}
91
92	/**
93	* returns the (nth) child element with the given name index numbers start
94	* at 0
95	*/
96	public static Node getChildByTagNameIndexed(Node n, String name, int index)
97	{
98	if (index == -1)
99	{
100	return getChildByTagName(n, name);
101	}
102	int count = 0;
103	Node child = n.getFirstChild();
104	while (child != null)
105	{
106	if (child.getNodeName().equals(name))
107	{
108	if (count == index)
109	{
110	return child;
111	}
112	else
113	{
114	count++;
115	}
116	}
117	child = child.getNextSibling();
118	}
119	return null; //not found
120	}
121
122	/**
123	* returns the element parent/node_name[@attribute_name='attribute_value']
124	*/
125	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126	{
127
128	NodeList children = parent.getChildNodes();
129	for (int i = 0; i < children.getLength(); i++)
130	{
131	Node child = children.item(i);
132	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
133	if (child.getNodeName().equals(node_name))
134	{
135	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136	return (Element) child;
137	}
138	}
139	// not found
140	return null;
141	}
142
143	/**
144	* returns a list of elements
145	* parent/node_name[@attribute_name='attribute_value']
146	*/
147	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148	{
149	ArrayList elements = new ArrayList();
150	NodeList children = parent.getChildNodes();
151	for (int i = 0; i < children.getLength(); i++)
152	{
153	//System.out.println("getNamedElementList");
154	Node child = children.item(i);
155	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
156	if (child.getNodeName().equals(node_name))
157	{
158	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159	elements.add((Element) child);
160	}
161	}
162	// not found
163	if (elements.size() == 0)
164	{
165	elements = null;
166	}
167	return elements;
168	}
169
170	public static void copyAllChildren(Element to, Element from)
171	{
172
173	Document to_doc = to.getOwnerDocument();
174	Node child = from.getFirstChild();
175	while (child != null)
176	{
177	to.appendChild(to_doc.importNode(child, true));
178	child = child.getNextSibling();
179	}
180	}
181
182	/** duplicates all elements in list elements and appends to toElement */
183	public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184	int num_elems = elements.getLength();
185	if (num_elems < 1)
186	{
187	return;
188	}
189	for (int i = 0; i < num_elems; i++)
190	{
191	Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192	toElement.appendChild(to_element);
193	}
194
195	}
196	/** Duplicates an element */
197	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198	{
199	return duplicateElementNS(owner, element, null, with_attributes);
200	}
201
202	/** Duplicates an element */
203	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204	{
205	Element duplicate;
206	if (namespace_uri == null)
207	{
208	duplicate = owner.createElement(element.getTagName());
209	}
210	else
211	{
212	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213	}
214	// Copy element attributes
215	if (with_attributes)
216	{
217	NamedNodeMap attributes = element.getAttributes();
218	for (int i = 0; i < attributes.getLength(); i++)
219	{
220	Node attribute = attributes.item(i);
221	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222	}
223	}
224
225	// Copy element children
226	NodeList children = element.getChildNodes();
227	for (int i = 0; i < children.getLength(); i++)
228	{
229	Node child = children.item(i);
230	duplicate.appendChild(owner.importNode(child, true));
231	}
232
233	return duplicate;
234	}
235
236	/** Remove all of the child nodes from a certain node. */
237	static final public void clear(Node node)
238	{
239	while (node.hasChildNodes())
240	{
241	node.removeChild(node.getFirstChild());
242	}
243	}
244
245	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246	{
247	ArrayList child_elements = new ArrayList();
248
249	NodeList children_nodelist = parent_element.getChildNodes();
250	for (int i = 0; i < children_nodelist.getLength(); i++)
251	{
252	Node child_node = children_nodelist.item(i);
253	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254	{
255	child_elements.add(child_node);
256	}
257	}
258
259	return child_elements;
260	}
261
262	static public String getElementTextValue(Element element)
263	{
264	// Find the first text node child
265	NodeList children_nodelist = element.getChildNodes();
266	for (int i = 0; i < children_nodelist.getLength(); i++)
267	{
268	Node child_node = children_nodelist.item(i);
269	if (child_node.getNodeType() == Node.TEXT_NODE)
270	{
271	return child_node.getNodeValue();
272	}
273	}
274
275	// None found
276	return "";
277	}
278
279	/**
280	* Method to retrieve the value of a given node.
281	*
282	* @param element
283	* The <strong>Element</strong> whose value we wish to find. Soon
284	* to be deprecated!
285	*/
286	static final public String getValue(Node element)
287	{
288	if (element == null)
289	{
290	return "";
291	}
292	// If we've been given a subject node first retrieve its value node.
293	if (element.getNodeName().equals("Subject"))
294	{
295	element = getNodeFromNamed(element, "Value");
296	}
297	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298	if (element != null && element.hasChildNodes())
299	{
300	StringBuffer text_buffer = new StringBuffer();
301	NodeList text_nodes = element.getChildNodes();
302	for (int i = 0; i < text_nodes.getLength(); i++)
303	{
304	Node possible_text = text_nodes.item(i);
305	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306	{
307	text_buffer.append(possible_text.getNodeValue());
308	}
309	}
310	return text_buffer.toString();
311	}
312	return "";
313	}
314
315	/**
316	* Method to retrieve from the node given, a certain child node with the
317	* specified name.
318	*
319	* @param parent
320	* The <strong>Node</strong> whose children should be searched.
321	* @param name
322	* The required nodes name as a <strong>String</strong>.
323	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
324	* otherwise. Soon to be deprecated!
325	*/
326	static final public Node getNodeFromNamed(Node parent, String name)
327	{
328	Node child = null;
329	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330	{
331	if (i.getNodeName().equals(name))
332	{
333	child = i;
334	}
335	}
336	return child;
337	}
338
339	static final public String WELLFORMED = "well-formed !";
340	static final public String NOTWELLFORMED = "not well-formed";
341	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:gslib='http://www.greenstone.org/skinning' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342	static final private String FOOTER = "</collectionConfig>";
343
344
345	public static Document getDOM(String xml_str)
346	{
347	Document doc = null;
348	try {
349
350	DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351	InputSource is = new InputSource();
352	is.setCharacterStream(new StringReader(xml_str));
353	doc = db.parse(is);
354
355	} catch (Exception e) {
356	e.printStackTrace();
357	}
358	return doc;
359	}
360
361	public static String parse(String xml_str)
362	{
363	String validation_msg = WELLFORMED;
364	xml_str = HEADER + xml_str + FOOTER;
365	try
366	{
367	SAXParserFactory factory = SAXParserFactory.newInstance();
368	factory.setNamespaceAware(true);
369	//factory.setValidating (true);
370	SAXParser parser = factory.newSAXParser();
371	InputSource iSource = new InputSource(new StringReader(xml_str));
372	// parser.parse (iSource, new DefaultHandler ());
373
374	org.xml.sax.XMLReader reader = parser.getXMLReader();
375	reader.setContentHandler(new DefaultHandler());
376	reader.setErrorHandler(new DefaultHandler());
377	reader.parse(iSource);
378	}
379	catch (FactoryConfigurationError e)
380	{
381	validation_msg = "unable to get a document builder factory";
382	}
383	catch (ParserConfigurationException e)
384	{
385	validation_msg = "unable to configure parser";
386	}
387	catch (SAXParseException e)
388	{
389	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390	}
391	catch (SAXException e)
392	{
393	validation_msg += " Fatal error: " + e.toString();
394	}
395	catch (IOException e)
396	{
397	validation_msg = "Unable to read the input, i/o error";
398	}
399
400	return validation_msg;
401	}
402
403	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404	public static String parseDOM(String xml_str)
405	{
406	String validation_msg = WELLFORMED;
407
408	try
409	{
410	SAXParserFactory factory = SAXParserFactory.newInstance();
411	factory.setNamespaceAware(true);
412	//factory.setValidating (true);
413	SAXParser parser = factory.newSAXParser();
414	InputSource iSource = new InputSource(new StringReader(xml_str));
415	// parser.parse (iSource, new DefaultHandler ());
416
417	org.xml.sax.XMLReader reader = parser.getXMLReader();
418	reader.setContentHandler(new DefaultHandler());
419	reader.setErrorHandler(new DefaultHandler());
420	reader.parse(iSource);
421	}
422	catch (FactoryConfigurationError e)
423	{
424	validation_msg = "unable to get a document builder factory";
425	}
426	catch (ParserConfigurationException e)
427	{
428	validation_msg = "unable to configure parser";
429	}
430	catch (SAXParseException e)
431	{
432	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433	}
434	catch (SAXException e)
435	{
436	validation_msg += " " + e.toString();
437	}
438	catch (IOException e)
439	{
440	validation_msg = "Unable to read the input, i/o error";
441	}
442
443	return validation_msg;
444	}
445
446	public static String parse(File xml_file)
447	{
448	String validation_msg = WELLFORMED;
449
450	try
451	{
452	SAXParserFactory factory = SAXParserFactory.newInstance();
453	factory.setNamespaceAware(true);
454	//factory.setValidating (true);
455	SAXParser parser = factory.newSAXParser();
456	FileReader r = new FileReader(xml_file);
457	InputSource iSource = new InputSource(r);
458	XMLReader reader = parser.getXMLReader();
459	reader.setContentHandler(new DefaultHandler());
460	reader.setErrorHandler(new DefaultHandler());
461	reader.parse(iSource);
462	}
463	catch (FactoryConfigurationError e)
464	{
465	validation_msg = "unable to get a document builder factory";
466	}
467	catch (ParserConfigurationException e)
468	{
469	validation_msg = "unable to configure parser";
470	}
471	catch (SAXParseException e)
472	{
473	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474	}
475	catch (SAXException e)
476	{
477	validation_msg += " Fatal error: " + e.toString();
478	}
479	catch (IOException e)
480	{
481	validation_msg = "Unable to read the input, i/o error";
482	}
483
484	return validation_msg;
485	}
486
487	/** Returns a string of the location. */
488	private static String getLocationString(SAXParseException ex)
489	{
490	StringBuffer str = new StringBuffer();
491
492	String systemId = ex.getSystemId();
493	if (systemId != null)
494	{
495	int index = systemId.lastIndexOf('/');
496	if (index != -1)
497	systemId = systemId.substring(index + 1);
498	str.append(systemId);
499	}
500	str.append("(line ");
501	str.append(ex.getLineNumber() - 1);
502	str.append(", column ");
503	str.append(ex.getColumnNumber());
504	str.append("): ");
505
506	return str.toString();
507
508	} // getLocationString(SAXParseException):String
509
510	/** Parse an XML document from a given file path */
511	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512	{
513	if (use_class_loader == true)
514	{
515	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516	if (is != null)
517	{
518	return parseXML(is);
519	}
520	}
521
522	// Try the file outside the classes directory
523	return parseXMLFile(new File(xml_file_path));
524	}
525
526	/** Parse an XML document from a given file */
527	static public Document parseXMLFile(File xml_file)
528	{
529	// No file? No point trying!
530	if (xml_file.exists() == false)
531	{
532	// System.err.println("@@@ file " + xml_file + " does not exist.");
533	return null;
534	}
535
536	try
537	{
538	return parseXML(new FileInputStream(xml_file));
539	}
540	catch (Exception exception)
541	{
542	DebugStream.printStackTrace(exception);
543	return null;
544	}
545	}
546
547	/** Parse an XML document from a given input stream */
548	static public Document parseXML(InputStream xml_input_stream)
549	{
550	Document document = null;
551
552	try
553	{
554	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555	document = parseXML(isr);
556	isr.close();
557	xml_input_stream.close();
558	}
559	catch (Exception exception)
560	{
561	DebugStream.printStackTrace(exception);
562	}
563
564	return document;
565	}
566
567	/** Parse an XML document from a given reader */
568	static public Document parseXML(Reader xml_reader)
569	{
570	Document document = null;
571
572	// If debugging, the following will store the XML contents to be parsed,
573	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574	String xmlContents = "";
575
576	try
577	{
578	Reader reader = null;
579
580	// (1) By default, GLI will remove any contents preceeding (and invalidating)
581	// the XML and present these lines separately to the user
582	if (!DebugStream.isDebuggingEnabled())
583	{
584	try
585	{
586	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587	}
588	catch (Exception e)
589	{
590	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591	e.printStackTrace();
592	}
593	}
594
595	// (2) If we are running GLI in debug mode:
596	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597	// idea of where things went wrong. This will print the "XML" contents to either
598	// system.out (if debugging is off) or to the DebugStream otherwise.
599	// We need to read the XML twice to know the line where things went wrong, so
600	// do the additional reading only if we're debugging
601	else
602	{
603	StringBuffer buf = new StringBuffer();
604	char[] buffer = new char[500];
605	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606	while (numCharsRead != -1)
607	{
608	buf.append(buffer, 0, numCharsRead);
609	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610	}
611	xmlContents = buf.toString();
612	xml_reader.close(); // closing the old Reader
613	xml_reader = null;
614	buffer = null;
615	buf = null;
616	// we need a Reader to parse the same contents as the Reader that was just closed
617	reader = new BufferedReader(new StringReader(xmlContents));
618	//System.err.println("xmlContents:\n" + xmlContents);
619	}
620
621	// (2) The actual XML parsing
622	InputSource isc = new InputSource(reader);
623	DOMParser parser = new DOMParser();
624	parser.setFeature("http://xml.org/sax/features/validation", false);
625	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629	parser.setEntityResolver(new GLIEntityResolver());
630	parser.parse(isc);
631	document = parser.getDocument();
632
633	}
634	catch (SAXParseException e)
635	{
636	showXMLParseFailureLine(e, xmlContents);
637	}
638	catch (SAXException exception)
639	{
640	System.err.println("SAX exception: " + exception.getMessage());
641	if (DebugStream.isDebuggingEnabled())
642	{
643	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
644	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646	System.exit(-1);
647	}
648	// else, not running in debug mode, so don't exit after exception
649	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650	DebugStream.printStackTrace(exception);
651	}
652	catch (Exception exception)
653	{
654	DebugStream.printStackTrace(exception);
655	}
656
657	return document;
658	}
659
660	/**
661	* Displays the line (string) where the SAXParseException occurred, given a
662	* String of the entire xml that was being parsed and the SAXParseException
663	* object that was caught. The messages are printed to DebugStream, so run
664	* GLI/FLI with -debug to view this output.
665	*
666	* @param xmlContents
667	* is the entire xml that was being parsed when the exception
668	* occurred
669	* @param e
670	* is the SAXParseException object that was thrown upon parsing
671	* the xmlContents.
672	*/
673	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674	{
675
676	// There should be no characters at all that preceed the <?xml>... bit.
677	// The first check is for starting spaces:
678	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
679	{
680	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682	return; // nothing more to do, first error identified
683	}
684
685	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
686	String line = "";
687	int linenumber = e.getLineNumber();
688	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689	if (DebugStream.isDebuggingEnabled())
690	{
691	if (linenumber != -1)
692	{
693	String[] lines = xmlContents.split("\n");
694	if (lines.length > 0)
695	{
696	DebugStream.println(" (number of lines: " + lines.length + ")");
697	if (lines.length >= linenumber)
698	{
699	line = lines[linenumber - 1];
700	}
701	else
702	{ // error is past the last line
703	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704	}
705	}
706	else
707	{
708	DebugStream.print("\n");
709	}
710	lines = null;
711
712	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
713	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715	// Uncomment if you want to print out the entire contents of the XML doc:
716	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
717	// + xmlContents + "\n************END\n");
718	}
719	else
720	{ // no particular line number, print out all the xml so debugger can inspect it
721	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
722	}
723	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725	System.exit(-1);
726	}
727	else
728	{ // not running in debug mode
729	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730	}
731	}
732
733	static public StringBuffer readXMLStream(InputStream input_stream)
734	{
735	StringBuffer xml = new StringBuffer("");
736	try {
737	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
738	xml = XMLTools.readXMLStream(new InputStreamReader(input_stream, "UTF-8"));
739	} catch (UnsupportedEncodingException error) {
740	System.err.println("Failed when trying to parse XML stream");
741	error.printStackTrace();
742	}
743
744	return xml;
745	}
746
747	static public StringBuffer readXMLStream(String s) {
748	return XMLTools.readXMLStream(new StringReader(s));
749	}
750
751
752	static public StringBuffer readXMLStream(Reader reader)
753	{
754	StringBuffer xml = new StringBuffer("");
755
756	try
757	{
758	BufferedReader buffered_in = new BufferedReader(reader);
759
760	String line = "";
761	boolean xml_content = false;
762	while ((line = buffered_in.readLine()) != null)
763	{
764	if (xml_content)
765	{
766	xml.append(line);
767	xml.append("\n");
768	}
769	else if (line.trim().startsWith("<?xml"))
770	{
771	xml_content = true;
772	xml.append(line);
773	xml.append("\n");
774	}
775	else
776	{
777	System.err.println(line);
778	}
779	}
780	buffered_in = null;
781	}
782	catch (Exception error)
783	{
784	System.err.println("Failed when trying to parse XML stream");
785	error.printStackTrace();
786	}
787
788	return xml;
789	}
790
791	/**
792	* Removes characters that are invalid in XML (see
793	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
794	*/
795	static public String removeInvalidCharacters(String text)
796	{
797	char[] safe_characters = new char[text.length()];
798	int j = 0;
799
800	char[] raw_characters = new char[text.length()];
801	text.getChars(0, text.length(), raw_characters, 0);
802	for (int i = 0; i < raw_characters.length; i++)
803	{
804	char character = raw_characters[i];
805	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
806	{
807	safe_characters[j] = character;
808	j++;
809	}
810	}
811
812	return new String(safe_characters, 0, j);
813	}
814
815	static public void setElementTextValue(Element element, String text)
816	{
817	// Remove all text node children
818	NodeList children_nodelist = element.getChildNodes();
819	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
820	{
821	Node child_node = children_nodelist.item(i);
822	if (child_node.getNodeType() == Node.TEXT_NODE)
823	{
824	element.removeChild(child_node);
825	}
826	}
827
828	// Add a new text node
829	if (text != null)
830	{
831	element.appendChild(element.getOwnerDocument().createTextNode(text));
832	}
833	}
834
835	/**
836	* Set the #text node value of some element.
837	*
838	* @param element
839	* the Element whose value we wish to set
840	* @param value
841	* the new value for the element as a String Soon to be
842	* deprecated!
843	*/
844	static final public void setValue(Element element, String value)
845	{
846	// Remove any existing child node(s)
847	clear(element);
848	// Add new text node.
849	if (value != null)
850	{
851	element.appendChild(element.getOwnerDocument().createTextNode(value));
852	}
853	}
854
855	static public void indentXML(Element elem, int depth)
856	{
857	Document doc = elem.getOwnerDocument();
858
859	String startIndentString = "\n";
860	for (int i = 0; i < depth; i++)
861	{
862	startIndentString += "\t";
863	}
864	Node startTextNode = doc.createTextNode(startIndentString);
865
866	String endIndentString = "\n";
867	for (int i = 0; i < depth - 1; i++)
868	{
869	endIndentString += "\t";
870	}
871	Node endTextNode = doc.createTextNode(endIndentString);
872
873	boolean found = false;
874	Node child = elem.getFirstChild();
875	while (child != null)
876	{
877	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
878	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
879	{
880	Node spaceTextNode = child;
881	child = child.getNextSibling();
882	elem.removeChild(spaceTextNode);
883
884	if(child == null) break;
885	}
886
887	// now process normal element nodes as intended
888	if (child.getNodeType() == Node.ELEMENT_NODE)
889	{
890	found = true;
891	break;
892	}
893	child = child.getNextSibling();
894	}
895
896	if (found)
897	{
898	elem.appendChild(endTextNode);
899	}
900
901	child = elem.getFirstChild();
902	while (child != null)
903	{
904	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
905	// because the first while loop above would break out when it found an element node and wouldn't have got rid
906	// of all the empty text nodes yet.
907	// This time, beware not to delete the special end and start empty textnodes just added, since
908	// they've been created and inserted specifically.
909	if(child != endTextNode && child != startTextNode
910	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
911	{
912	Node spaceTextNode = child;
913	child = child.getNextSibling();
914	elem.removeChild(spaceTextNode);
915
916	if(child == null) break;
917	}
918
919	// go back to processing normal element nodes as intended
920	if (child.getNodeType() == Node.ELEMENT_NODE)
921	{
922	elem.insertBefore(startTextNode.cloneNode(false), child);
923	indentXML((Element) child, depth + 1);
924	}
925	child = child.getNextSibling();
926	}
927	}
928
929	/**
930	* Write an XML document to a given file with the text node of the specified
931	* element unescaped
932	*/
933	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
934	{
935	indentXML(document.getDocumentElement(), 1);
936	try
937	{
938	OutputStream os = new FileOutputStream(xml_file);
939	// Create an output format for our document.
940	OutputFormat f = new OutputFormat(document);
941	f.setEncoding("UTF-8");
942	f.setIndenting(true);
943	f.setLineWidth(0); // Why isn't this working!
944	f.setPreserveSpace(true);
945	if (nonEscapingTagNames != null)
946	{
947	f.setNonEscapingElements(nonEscapingTagNames);
948	}
949	// Create the necessary writer stream for serialization.
950	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
951	Writer w = new BufferedWriter(osw);
952	// Generate a new serializer from the above.
953	XMLSerializer s = new XMLSerializer(w, f);
954	s.asDOMSerializer();
955	// Finally serialize the document to file.
956	s.serialize(document);
957	// And close.
958	os.close();
959	}
960	catch (Exception exception)
961	{
962	DebugStream.printStackTrace(exception);
963	}
964	}
965
966	/** Write an XML document to a given file */
967	static public void writeXMLFile(File xml_file, Document document)
968	{
969	writeXMLFile(xml_file, document, null);
970	}
971
972	public static void printXMLNode(Node e)
973	{
974	printXMLNode(e, 0);
975	}
976
977	public static void printXMLNode(Node e, int depth)
978	{ //recursive method call using DOM API...
979
980	for (int i = 0; i < depth; i++)
981	System.out.print(' ');
982
983	if (e.getNodeType() == Node.TEXT_NODE)
984	{
985	//System.out.println("text") ;
986	if (e.getNodeValue() != "")
987	{
988	System.out.println(e.getNodeValue());
989	}
990	return;
991	}
992
993	System.out.print('<');
994	System.out.print(e.getNodeName());
995	NamedNodeMap attrs = e.getAttributes();
996	if (attrs != null)
997	{
998	for (int i = 0; i < attrs.getLength(); i++)
999	{
1000	Node attr = attrs.item(i);
1001	System.out.print(' ');
1002	System.out.print(attr.getNodeName());
1003	System.out.print("=\"");
1004	System.out.print(attr.getNodeValue());
1005	System.out.print('"');
1006	}
1007	}
1008	NodeList children = e.getChildNodes();
1009
1010	if (children == null \|\| children.getLength() == 0)
1011	System.out.println("/>");
1012	else
1013	{
1014
1015	System.out.println('>');
1016
1017	int len = children.getLength();
1018	for (int i = 0; i < len; i++)
1019	{
1020	printXMLNode(children.item(i), depth + 1);
1021	}
1022
1023	for (int i = 0; i < depth; i++)
1024	System.out.print(' ');
1025
1026	System.out.println("</" + e.getNodeName() + ">");
1027	}
1028
1029	}
1030
1031	public static String xmlNodeToString(Node e)
1032	{
1033	StringBuffer sb = new StringBuffer("");
1034	xmlNodeToString(sb, e, true, "\t", 2);
1035	return sb.toString();
1036	}
1037
1038	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1039	{
1040
1041	if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1042	{
1043	if (e.getNodeValue() != "")
1044	{
1045	String text = e.getNodeValue();
1046	sb.append("<![CDATA[");
1047	sb.append(text);
1048	sb.append("]]>");
1049	}
1050	return;
1051	}
1052
1053	if (e.getNodeType() == Node.TEXT_NODE)
1054	{
1055	if (e.getNodeValue() != "")
1056	{
1057	String text = e.getNodeValue();
1058	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
1059	for (Character c : text.toCharArray())
1060	{
1061	if (c.equals('\n'))
1062	{
1063	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1064	break;
1065	}
1066
1067	if (!Character.isWhitespace(c))
1068	{
1069	break;
1070	}
1071	}
1072	sb.append(text);
1073	}
1074	return;
1075	}
1076
1077	if (e.getNodeType() == Node.COMMENT_NODE)
1078	{
1079	if (e.getNodeValue() != "")
1080	{
1081	sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1082	}
1083	return;
1084	}
1085
1086	if (indent)
1087	{
1088	for (int i = 0; i < depth; i++)
1089	{
1090	sb.append(indentString);
1091	}
1092	}
1093
1094	sb.append('<');
1095	sb.append(e.getNodeName());
1096	NamedNodeMap attrs = e.getAttributes();
1097	if (attrs != null)
1098	{
1099	for (int i = 0; i < attrs.getLength(); i++)
1100	{
1101	Node attr = attrs.item(i);
1102	sb.append(' ');
1103	sb.append(attr.getNodeName());
1104	sb.append("=\"");
1105	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1106	sb.append('"');
1107	}
1108	}
1109	NodeList children = e.getChildNodes();
1110
1111	boolean hasElements = false;
1112	boolean indentSwapped = false;
1113	for (int i = 0; i < children.getLength(); i++)
1114	{
1115	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1116	{
1117	hasElements = true;
1118	}
1119	if ((children.item(i).getNodeType() == Node.TEXT_NODE \|\| children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1120	{
1121	if (children.item(i).getNodeValue().trim().length() > 0)
1122	{
1123	indentSwapped = true;
1124	indent = false;
1125	}
1126	}
1127	}
1128
1129	if (children == null \|\| children.getLength() == 0)
1130	{
1131	sb.append("/>");
1132
1133	if (indent)
1134	{
1135	sb.append("\n");
1136	}
1137	}
1138	else
1139	{
1140	sb.append(">");
1141	if (hasElements && indent)
1142	{
1143	sb.append("\n");
1144	}
1145
1146	int len = children.getLength();
1147	for (int i = 0; i < len; i++)
1148	{
1149	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1150	}
1151
1152	if (indent)
1153	{
1154	for (int i = 0; i < depth; i++)
1155	{
1156	sb.append(indentString);
1157	}
1158	}
1159
1160	sb.append("</" + e.getNodeName() + ">");
1161
1162	if ((hasElements && indent) \|\| indentSwapped)
1163	{
1164	sb.append("\n");
1165	}
1166	}
1167	}
1168
1169	public static String xmlNodeToStringWithoutIndenting(Node e)
1170	{
1171	StringBuffer sb = new StringBuffer("");
1172	xmlNodeToStringWithoutNewline(sb, e, -1);
1173	return sb.toString();
1174	}
1175
1176	public static String xmlNodeToStringWithoutNewline(Node e)
1177	{
1178	StringBuffer sb = new StringBuffer("");
1179	xmlNodeToStringWithoutNewline(sb, e, 0);
1180	return sb.toString();
1181	}
1182
1183	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1184	{
1185
1186	for (int i = 0; i < depth; i++)
1187	{
1188	sb.append(' ');
1189	}
1190
1191	if (e.getNodeType() == Node.TEXT_NODE)
1192	{
1193	if (e.getNodeValue() != "")
1194	{
1195	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1196	}
1197	return;
1198	}
1199
1200	if (e.getNodeType() == Node.COMMENT_NODE)
1201	{
1202	if (e.getNodeValue() != "")
1203	{
1204	sb.append("<!--" + e.getNodeValue() + "-->");
1205	}
1206	return;
1207	}
1208
1209	sb.append('<');
1210	sb.append(e.getNodeName());
1211	NamedNodeMap attrs = e.getAttributes();
1212	if (attrs != null)
1213	{
1214	for (int i = 0; i < attrs.getLength(); i++)
1215	{
1216	Node attr = attrs.item(i);
1217	sb.append(' ');
1218	sb.append(attr.getNodeName());
1219	sb.append("=\"");
1220	sb.append(attr.getNodeValue());
1221	sb.append('"');
1222	}
1223	}
1224	NodeList children = e.getChildNodes();
1225
1226	if (children == null \|\| children.getLength() == 0)
1227	sb.append("/>");
1228	else
1229	{
1230
1231	sb.append(">");
1232
1233	int len = children.getLength();
1234	for (int i = 0; i < len; i++)
1235	{
1236	if (depth >= 0)
1237	{
1238	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1239	}
1240	else
1241	{
1242	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1243	}
1244	}
1245
1246	for (int i = 0; i < depth; i++)
1247	sb.append(' ');
1248
1249	sb.append("</" + e.getNodeName() + ">");
1250	}
1251	}
1252
1253
1254
1255	// This method will convert an Element to a String too, like xmlNodeToString() above.
1256	// But for a document root element (doc.getDocumentElement()), this method will additionally
1257	// return its processing instruction line at the start (<?xml ... ?>).
1258	// This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1259	public static String elementToString(Element e, boolean indent)
1260	{
1261	String str = "";
1262	try
1263	{
1264	TransformerFactory tf = TransformerFactory.newInstance();
1265	Transformer trans = tf.newTransformer();
1266	StringWriter sw = new StringWriter();
1267	if (indent)
1268	{
1269	trans.setOutputProperty(OutputKeys.INDENT, "yes");
1270	}
1271	else
1272	{
1273	trans.setOutputProperty(OutputKeys.INDENT, "no");
1274	}
1275	trans.transform(new DOMSource(e), new StreamResult(sw));
1276	str = sw.toString();
1277	}
1278	catch (Exception ex)
1279	{
1280	str += "Exception: couldn't write " + e + " to log";
1281	}
1282	finally
1283	{
1284	return str;
1285	}
1286	}
1287	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: