Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 26018

Last change on this file since 26018 was 26018, checked in by sjm84, 12 years ago
Changing the default format statement to include some templates for document display
Property svn:keywords set to `Author Date Id Revision`
File size: 30.8 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15
16	// SAX
17	import org.xml.sax.XMLReader;
18	import org.xml.sax.SAXException;
19	import org.xml.sax.SAXParseException;
20	import org.xml.sax.helpers.DefaultHandler;
21	import org.xml.sax.InputSource;
22
23	// JAXP
24	import javax.xml.parsers.FactoryConfigurationError;
25	import javax.xml.parsers.ParserConfigurationException;
26	import javax.xml.parsers.SAXParser;
27	import javax.xml.parsers.SAXParserFactory;
28
29	/** This class is a static class containing useful XML functions */
30	public class XMLTools
31	{
32	/** extracts the text out of a node */
33	public static Node getNodeTextNode(Element param)
34	{
35	param.normalize();
36	Node n = param.getFirstChild();
37	while (n != null && n.getNodeType() != Node.TEXT_NODE)
38	{
39	n = n.getNextSibling();
40	}
41	return n;
42	}
43
44	/** extracts the text out of a node */
45	public static String getNodeText(Element param)
46	{
47	Node text_node = getNodeTextNode(param);
48	if (text_node == null)
49	{
50	return "";
51	}
52	return text_node.getNodeValue();
53	}
54
55	public static void setNodeText(Element elem, String text)
56	{
57	Node old_text_node = getNodeTextNode(elem);
58	if (old_text_node != null)
59	{
60	elem.removeChild(old_text_node);
61	}
62	Text t = elem.getOwnerDocument().createTextNode(text);
63	elem.appendChild(t);
64	}
65
66	/** returns the (first) child element with the given name */
67	public static Node getChildByTagName(Node n, String name)
68	{
69
70	Node child = n.getFirstChild();
71	while (child != null)
72	{
73	if (child.getNodeName().equals(name))
74	{
75	return child;
76	}
77	child = child.getNextSibling();
78	}
79	return null; //not found
80	}
81
82	/**
83	* returns the (nth) child element with the given name index numbers start
84	* at 0
85	*/
86	public static Node getChildByTagNameIndexed(Node n, String name, int index)
87	{
88	if (index == -1)
89	{
90	return getChildByTagName(n, name);
91	}
92	int count = 0;
93	Node child = n.getFirstChild();
94	while (child != null)
95	{
96	if (child.getNodeName().equals(name))
97	{
98	if (count == index)
99	{
100	return child;
101	}
102	else
103	{
104	count++;
105	}
106	}
107	child = child.getNextSibling();
108	}
109	return null; //not found
110	}
111
112	/**
113	* returns the element parent/node_name[@attribute_name='attribute_value']
114	*/
115	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
116	{
117
118	NodeList children = parent.getChildNodes();
119	for (int i = 0; i < children.getLength(); i++)
120	{
121	Node child = children.item(i);
122	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
123	if (child.getNodeName().equals(node_name))
124	{
125	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
126	return (Element) child;
127	}
128	}
129	// not found
130	return null;
131	}
132
133	/**
134	* returns a list of elements
135	* parent/node_name[@attribute_name='attribute_value']
136	*/
137	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
138	{
139	ArrayList elements = new ArrayList();
140	NodeList children = parent.getChildNodes();
141	for (int i = 0; i < children.getLength(); i++)
142	{
143	//System.out.println("getNamedElementList");
144	Node child = children.item(i);
145	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
146	if (child.getNodeName().equals(node_name))
147	{
148	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
149	elements.add((Element) child);
150	}
151	}
152	// not found
153	if (elements.size() == 0)
154	{
155	elements = null;
156	}
157	return elements;
158	}
159
160	public static void copyAllChildren(Element to, Element from)
161	{
162
163	Document to_doc = to.getOwnerDocument();
164	Node child = from.getFirstChild();
165	while (child != null)
166	{
167	to.appendChild(to_doc.importNode(child, true));
168	child = child.getNextSibling();
169	}
170	}
171
172	/** Duplicates an element */
173	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
174	{
175	return duplicateElementNS(owner, element, null, with_attributes);
176	}
177
178	/** Duplicates an element */
179	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
180	{
181	Element duplicate;
182	if (namespace_uri == null)
183	{
184	duplicate = owner.createElement(element.getTagName());
185	}
186	else
187	{
188	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
189	}
190	// Copy element attributes
191	if (with_attributes)
192	{
193	NamedNodeMap attributes = element.getAttributes();
194	for (int i = 0; i < attributes.getLength(); i++)
195	{
196	Node attribute = attributes.item(i);
197	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
198	}
199	}
200
201	// Copy element children
202	NodeList children = element.getChildNodes();
203	for (int i = 0; i < children.getLength(); i++)
204	{
205	Node child = children.item(i);
206	duplicate.appendChild(owner.importNode(child, true));
207	}
208
209	return duplicate;
210	}
211
212	/** Remove all of the child nodes from a certain node. */
213	static final public void clear(Node node)
214	{
215	while (node.hasChildNodes())
216	{
217	node.removeChild(node.getFirstChild());
218	}
219	}
220
221	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
222	{
223	ArrayList child_elements = new ArrayList();
224
225	NodeList children_nodelist = parent_element.getChildNodes();
226	for (int i = 0; i < children_nodelist.getLength(); i++)
227	{
228	Node child_node = children_nodelist.item(i);
229	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
230	{
231	child_elements.add(child_node);
232	}
233	}
234
235	return child_elements;
236	}
237
238	static public String getElementTextValue(Element element)
239	{
240	// Find the first text node child
241	NodeList children_nodelist = element.getChildNodes();
242	for (int i = 0; i < children_nodelist.getLength(); i++)
243	{
244	Node child_node = children_nodelist.item(i);
245	if (child_node.getNodeType() == Node.TEXT_NODE)
246	{
247	return child_node.getNodeValue();
248	}
249	}
250
251	// None found
252	return "";
253	}
254
255	/**
256	* Method to retrieve the value of a given node.
257	*
258	* @param element
259	* The <strong>Element</strong> whose value we wish to find. Soon
260	* to be deprecated!
261	*/
262	static final public String getValue(Node element)
263	{
264	if (element == null)
265	{
266	return "";
267	}
268	// If we've been given a subject node first retrieve its value node.
269	if (element.getNodeName().equals("Subject"))
270	{
271	element = getNodeFromNamed(element, "Value");
272	}
273	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
274	if (element != null && element.hasChildNodes())
275	{
276	StringBuffer text_buffer = new StringBuffer();
277	NodeList text_nodes = element.getChildNodes();
278	for (int i = 0; i < text_nodes.getLength(); i++)
279	{
280	Node possible_text = text_nodes.item(i);
281	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
282	{
283	text_buffer.append(possible_text.getNodeValue());
284	}
285	}
286	return text_buffer.toString();
287	}
288	return "";
289	}
290
291	/**
292	* Method to retrieve from the node given, a certain child node with the
293	* specified name.
294	*
295	* @param parent
296	* The <strong>Node</strong> whose children should be searched.
297	* @param name
298	* The required nodes name as a <strong>String</strong>.
299	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
300	* otherwise. Soon to be deprecated!
301	*/
302	static final public Node getNodeFromNamed(Node parent, String name)
303	{
304	Node child = null;
305	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
306	{
307	if (i.getNodeName().equals(name))
308	{
309	child = i;
310	}
311	}
312	return child;
313	}
314
315	static final public String WELLFORMED = "well-formed !";
316	static final public String NOTWELLFORMED = "not well-formed";
317	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
318	static final private String FOOTER = "</collectionConfig>";
319
320	public static String parse(String xml_str)
321	{
322	String validation_msg = WELLFORMED;
323	xml_str = HEADER + xml_str + FOOTER;
324	try
325	{
326	SAXParserFactory factory = SAXParserFactory.newInstance();
327	factory.setNamespaceAware(true);
328	//factory.setValidating (true);
329	SAXParser parser = factory.newSAXParser();
330	InputSource iSource = new InputSource(new StringReader(xml_str));
331	// parser.parse (iSource, new DefaultHandler ());
332
333	org.xml.sax.XMLReader reader = parser.getXMLReader();
334	reader.setContentHandler(new DefaultHandler());
335	reader.setErrorHandler(new DefaultHandler());
336	reader.parse(iSource);
337	}
338	catch (FactoryConfigurationError e)
339	{
340	validation_msg = "unable to get a document builder factory";
341	}
342	catch (ParserConfigurationException e)
343	{
344	validation_msg = "unable to configure parser";
345	}
346	catch (SAXParseException e)
347	{
348	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
349	}
350	catch (SAXException e)
351	{
352	validation_msg += " Fatal error: " + e.toString();
353	}
354	catch (IOException e)
355	{
356	validation_msg = "Unable to read the input, i/o error";
357	}
358
359	return validation_msg;
360	}
361
362	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
363	public static String parseDOM(String xml_str)
364	{
365	String validation_msg = WELLFORMED;
366
367	try
368	{
369	SAXParserFactory factory = SAXParserFactory.newInstance();
370	factory.setNamespaceAware(true);
371	//factory.setValidating (true);
372	SAXParser parser = factory.newSAXParser();
373	InputSource iSource = new InputSource(new StringReader(xml_str));
374	// parser.parse (iSource, new DefaultHandler ());
375
376	org.xml.sax.XMLReader reader = parser.getXMLReader();
377	reader.setContentHandler(new DefaultHandler());
378	reader.setErrorHandler(new DefaultHandler());
379	reader.parse(iSource);
380	}
381	catch (FactoryConfigurationError e)
382	{
383	validation_msg = "unable to get a document builder factory";
384	}
385	catch (ParserConfigurationException e)
386	{
387	validation_msg = "unable to configure parser";
388	}
389	catch (SAXParseException e)
390	{
391	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
392	}
393	catch (SAXException e)
394	{
395	validation_msg += " " + e.toString();
396	}
397	catch (IOException e)
398	{
399	validation_msg = "Unable to read the input, i/o error";
400	}
401
402	return validation_msg;
403	}
404
405	public static String parse(File xml_file)
406	{
407	String validation_msg = WELLFORMED;
408
409	try
410	{
411	SAXParserFactory factory = SAXParserFactory.newInstance();
412	factory.setNamespaceAware(true);
413	//factory.setValidating (true);
414	SAXParser parser = factory.newSAXParser();
415	FileReader r = new FileReader(xml_file);
416	InputSource iSource = new InputSource(r);
417	XMLReader reader = parser.getXMLReader();
418	reader.setContentHandler(new DefaultHandler());
419	reader.setErrorHandler(new DefaultHandler());
420	reader.parse(iSource);
421	}
422	catch (FactoryConfigurationError e)
423	{
424	validation_msg = "unable to get a document builder factory";
425	}
426	catch (ParserConfigurationException e)
427	{
428	validation_msg = "unable to configure parser";
429	}
430	catch (SAXParseException e)
431	{
432	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433	}
434	catch (SAXException e)
435	{
436	validation_msg += " Fatal error: " + e.toString();
437	}
438	catch (IOException e)
439	{
440	validation_msg = "Unable to read the input, i/o error";
441	}
442
443	return validation_msg;
444	}
445
446	/** Returns a string of the location. */
447	private static String getLocationString(SAXParseException ex)
448	{
449	StringBuffer str = new StringBuffer();
450
451	String systemId = ex.getSystemId();
452	if (systemId != null)
453	{
454	int index = systemId.lastIndexOf('/');
455	if (index != -1)
456	systemId = systemId.substring(index + 1);
457	str.append(systemId);
458	}
459	str.append("(line ");
460	str.append(ex.getLineNumber() - 1);
461	str.append(", column ");
462	str.append(ex.getColumnNumber());
463	str.append("): ");
464
465	return str.toString();
466
467	} // getLocationString(SAXParseException):String
468
469	/** Parse an XML document from a given file path */
470	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
471	{
472	if (use_class_loader == true)
473	{
474	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
475	if (is != null)
476	{
477	return parseXML(is);
478	}
479	}
480
481	// Try the file outside the classes directory
482	return parseXMLFile(new File(xml_file_path));
483	}
484
485	/** Parse an XML document from a given file */
486	static public Document parseXMLFile(File xml_file)
487	{
488	// No file? No point trying!
489	if (xml_file.exists() == false)
490	{
491	return null;
492	}
493
494	try
495	{
496	return parseXML(new FileInputStream(xml_file));
497	}
498	catch (Exception exception)
499	{
500	DebugStream.printStackTrace(exception);
501	return null;
502	}
503	}
504
505	/** Parse an XML document from a given input stream */
506	static public Document parseXML(InputStream xml_input_stream)
507	{
508	Document document = null;
509
510	try
511	{
512	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
513	document = parseXML(isr);
514	isr.close();
515	xml_input_stream.close();
516	}
517	catch (Exception exception)
518	{
519	DebugStream.printStackTrace(exception);
520	}
521
522	return document;
523	}
524
525	/** Parse an XML document from a given reader */
526	static public Document parseXML(Reader xml_reader)
527	{
528	Document document = null;
529
530	// If debugging, the following will store the XML contents to be parsed,
531	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
532	String xmlContents = "";
533
534	try
535	{
536	Reader reader = null;
537
538	// (1) By default, GLI will remove any contents preceeding (and invalidating)
539	// the XML and present these lines separately to the user
540	if (!DebugStream.isDebuggingEnabled())
541	{
542	try
543	{
544	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
545	}
546	catch (Exception e)
547	{
548	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
549	e.printStackTrace();
550	}
551	}
552
553	// (2) If we are running GLI in debug mode:
554	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
555	// idea of where things went wrong. This will print the "XML" contents to either
556	// system.out (if debugging is off) or to the DebugStream otherwise.
557	// We need to read the XML twice to know the line where things went wrong, so
558	// do the additional reading only if we're debugging
559	else
560	{
561	StringBuffer buf = new StringBuffer();
562	char[] buffer = new char[500];
563	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
564	while (numCharsRead != -1)
565	{
566	buf.append(buffer, 0, numCharsRead);
567	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
568	}
569	xmlContents = buf.toString();
570	xml_reader.close(); // closing the old Reader
571	xml_reader = null;
572	buffer = null;
573	buf = null;
574	// we need a Reader to parse the same contents as the Reader that was just closed
575	reader = new BufferedReader(new StringReader(xmlContents));
576	//System.err.println("xmlContents:\n" + xmlContents);
577	}
578
579	// (2) The actual XML parsing
580	InputSource isc = new InputSource(reader);
581	DOMParser parser = new DOMParser();
582	parser.setFeature("http://xml.org/sax/features/validation", false);
583	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
584	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
585	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
586	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
587	parser.parse(isc);
588	document = parser.getDocument();
589
590	}
591	catch (SAXParseException e)
592	{
593	showXMLParseFailureLine(e, xmlContents);
594	}
595	catch (SAXException exception)
596	{
597	System.err.println("SAX exception: " + exception.getMessage());
598	if (DebugStream.isDebuggingEnabled())
599	{
600	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
601	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
602	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
603	System.exit(-1);
604	}
605	// else, not running in debug mode, so don't exit after exception
606	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
607	DebugStream.printStackTrace(exception);
608	}
609	catch (Exception exception)
610	{
611	DebugStream.printStackTrace(exception);
612	}
613
614	return document;
615	}
616
617	/**
618	* Displays the line (string) where the SAXParseException occurred, given a
619	* String of the entire xml that was being parsed and the SAXParseException
620	* object that was caught. The messages are printed to DebugStream, so run
621	* GLI/FLI with -debug to view this output.
622	*
623	* @param xmlContents
624	* is the entire xml that was being parsed when the exception
625	* occurred
626	* @param e
627	* is the SAXParseException object that was thrown upon parsing
628	* the xmlContents.
629	*/
630	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
631	{
632
633	// There should be no characters at all that preceed the <?xml>... bit.
634	// The first check is for starting spaces:
635	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
636	{
637	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
638	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
639	return; // nothing more to do, first error identified
640	}
641
642	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
643	String line = "";
644	int linenumber = e.getLineNumber();
645	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
646	if (DebugStream.isDebuggingEnabled())
647	{
648	if (linenumber != -1)
649	{
650	String[] lines = xmlContents.split("\n");
651	if (lines.length > 0)
652	{
653	DebugStream.println(" (number of lines: " + lines.length + ")");
654	if (lines.length >= linenumber)
655	{
656	line = lines[linenumber - 1];
657	}
658	else
659	{ // error is past the last line
660	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
661	}
662	}
663	else
664	{
665	DebugStream.print("\n");
666	}
667	lines = null;
668
669	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
670	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
671
672	// Uncomment if you want to print out the entire contents of the XML doc:
673	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
674	// + xmlContents + "\n************END\n");
675	}
676	else
677	{ // no particular line number, print out all the xml so debugger can inspect it
678	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
679	}
680	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
681	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
682	System.exit(-1);
683	}
684	else
685	{ // not running in debug mode
686	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
687	}
688	}
689
690	static public StringBuffer readXMLStream(InputStream input_stream)
691	{
692	StringBuffer xml = new StringBuffer("");
693
694	try
695	{
696	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
697	BufferedReader buffered_in = new BufferedReader(isr);
698
699	String line = "";
700	boolean xml_content = false;
701	while ((line = buffered_in.readLine()) != null)
702	{
703	if (xml_content)
704	{
705	xml.append(line);
706	xml.append("\n");
707	}
708	else if (line.trim().startsWith("<?xml"))
709	{
710	xml_content = true;
711	xml.append(line);
712	xml.append("\n");
713	}
714	else
715	{
716	System.err.println(line);
717	}
718	}
719	buffered_in = null;
720	}
721	catch (Exception error)
722	{
723	System.err.println("Failed when trying to parse XML stream");
724	error.printStackTrace();
725	}
726
727	return xml;
728	}
729
730	/**
731	* Removes characters that are invalid in XML (see
732	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
733	*/
734	static public String removeInvalidCharacters(String text)
735	{
736	char[] safe_characters = new char[text.length()];
737	int j = 0;
738
739	char[] raw_characters = new char[text.length()];
740	text.getChars(0, text.length(), raw_characters, 0);
741	for (int i = 0; i < raw_characters.length; i++)
742	{
743	char character = raw_characters[i];
744	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
745	{
746	safe_characters[j] = character;
747	j++;
748	}
749	}
750
751	return new String(safe_characters, 0, j);
752	}
753
754	static public void setElementTextValue(Element element, String text)
755	{
756	// Remove all text node children
757	NodeList children_nodelist = element.getChildNodes();
758	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
759	{
760	Node child_node = children_nodelist.item(i);
761	if (child_node.getNodeType() == Node.TEXT_NODE)
762	{
763	element.removeChild(child_node);
764	}
765	}
766
767	// Add a new text node
768	if (text != null)
769	{
770	element.appendChild(element.getOwnerDocument().createTextNode(text));
771	}
772	}
773
774	/**
775	* Set the #text node value of some element.
776	*
777	* @param element
778	* the Element whose value we wish to set
779	* @param value
780	* the new value for the element as a String Soon to be
781	* deprecated!
782	*/
783	static final public void setValue(Element element, String value)
784	{
785	// Remove any existing child node(s)
786	clear(element);
787	// Add new text node.
788	if (value != null)
789	{
790	element.appendChild(element.getOwnerDocument().createTextNode(value));
791	}
792	}
793
794	static public void indentXML(Element elem, int depth)
795	{
796	Document doc = elem.getOwnerDocument();
797
798	String startIndentString = "\n";
799	for (int i = 0; i < depth; i++)
800	{
801	startIndentString += "\t";
802	}
803	Node startTextNode = doc.createTextNode(startIndentString);
804
805	String endIndentString = "\n";
806	for (int i = 0; i < depth - 1; i++)
807	{
808	endIndentString += "\t";
809	}
810	Node endTextNode = doc.createTextNode(endIndentString);
811
812	boolean found = false;
813	Node child = elem.getFirstChild();
814	while (child != null)
815	{
816	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
817	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
818	{
819	Node spaceTextNode = child;
820	child = child.getNextSibling();
821	elem.removeChild(spaceTextNode);
822
823	if(child == null) break;
824	}
825
826	// now process normal element nodes as intended
827	if (child.getNodeType() == Node.ELEMENT_NODE)
828	{
829	found = true;
830	break;
831	}
832	child = child.getNextSibling();
833	}
834
835	if (found)
836	{
837	elem.appendChild(endTextNode);
838	}
839
840	child = elem.getFirstChild();
841	while (child != null)
842	{
843	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
844	// because the first while loop above would break out when it found an element node and wouldn't have got rid
845	// of all the empty text nodes yet.
846	// This time, beware not to delete the special end and start empty textnodes just added, since
847	// they've been created and inserted specifically.
848	if(child != endTextNode && child != startTextNode
849	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
850	{
851	Node spaceTextNode = child;
852	child = child.getNextSibling();
853	elem.removeChild(spaceTextNode);
854
855	if(child == null) break;
856	}
857
858	// go back to processing normal element nodes as intended
859	if (child.getNodeType() == Node.ELEMENT_NODE)
860	{
861	elem.insertBefore(startTextNode.cloneNode(false), child);
862	indentXML((Element) child, depth + 1);
863	}
864	child = child.getNextSibling();
865	}
866	}
867
868	/**
869	* Write an XML document to a given file with the text node of the specified
870	* element unescaped
871	*/
872	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
873	{
874	indentXML(document.getDocumentElement(), 1);
875	try
876	{
877	OutputStream os = new FileOutputStream(xml_file);
878	// Create an output format for our document.
879	OutputFormat f = new OutputFormat(document);
880	f.setEncoding("UTF-8");
881	f.setIndenting(true);
882	f.setLineWidth(0); // Why isn't this working!
883	f.setPreserveSpace(true);
884	if (nonEscapingTagNames != null)
885	{
886	f.setNonEscapingElements(nonEscapingTagNames);
887	}
888	// Create the necessary writer stream for serialization.
889	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
890	Writer w = new BufferedWriter(osw);
891	// Generate a new serializer from the above.
892	XMLSerializer s = new XMLSerializer(w, f);
893	s.asDOMSerializer();
894	// Finally serialize the document to file.
895	s.serialize(document);
896	// And close.
897	os.close();
898	}
899	catch (Exception exception)
900	{
901	DebugStream.printStackTrace(exception);
902	}
903	}
904
905	/** Write an XML document to a given file */
906	static public void writeXMLFile(File xml_file, Document document)
907	{
908	writeXMLFile(xml_file, document, null);
909	}
910
911	public static void printXMLNode(Node e)
912	{
913	printXMLNode(e, 0);
914	}
915
916	public static void printXMLNode(Node e, int depth)
917	{ //recursive method call using DOM API...
918
919	for (int i = 0; i < depth; i++)
920	System.out.print(' ');
921
922	if (e.getNodeType() == Node.TEXT_NODE)
923	{
924	//System.out.println("text") ;
925	if (e.getNodeValue() != "")
926	{
927	System.out.println(e.getNodeValue());
928	}
929	return;
930	}
931
932	System.out.print('<');
933	System.out.print(e.getNodeName());
934	NamedNodeMap attrs = e.getAttributes();
935	if (attrs != null)
936	{
937	for (int i = 0; i < attrs.getLength(); i++)
938	{
939	Node attr = attrs.item(i);
940	System.out.print(' ');
941	System.out.print(attr.getNodeName());
942	System.out.print("=\"");
943	System.out.print(attr.getNodeValue());
944	System.out.print('"');
945	}
946	}
947	NodeList children = e.getChildNodes();
948
949	if (children == null \|\| children.getLength() == 0)
950	System.out.println("/>");
951	else
952	{
953
954	System.out.println('>');
955
956	int len = children.getLength();
957	for (int i = 0; i < len; i++)
958	{
959	printXMLNode(children.item(i), depth + 1);
960	}
961
962	for (int i = 0; i < depth; i++)
963	System.out.print(' ');
964
965	System.out.println("</" + e.getNodeName() + ">");
966	}
967
968	}
969
970	public static String xmlNodeToString(Node e)
971	{
972	StringBuffer sb = new StringBuffer("");
973	xmlNodeToString(sb, e, true, "\t", 2);
974	return sb.toString();
975	}
976
977	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
978	{
979	if (e.getNodeType() == Node.TEXT_NODE)
980	{
981	if (e.getNodeValue() != "")
982	{
983	String text = e.getNodeValue();
984	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
985	for (Character c : text.toCharArray())
986	{
987	if (c.equals('\n'))
988	{
989	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
990	break;
991	}
992
993	if (!Character.isWhitespace(c))
994	{
995	break;
996	}
997	}
998	sb.append(text);
999	}
1000	return;
1001	}
1002
1003	if (e.getNodeType() == Node.COMMENT_NODE)
1004	{
1005	if (e.getNodeValue() != "")
1006	{
1007	sb.append("<!--\n" + e.getNodeValue() + "\n-->\n");
1008	}
1009	return;
1010	}
1011
1012	if (indent)
1013	{
1014	for (int i = 0; i < depth; i++)
1015	{
1016	sb.append(indentString);
1017	}
1018	}
1019
1020	sb.append('<');
1021	sb.append(e.getNodeName());
1022	NamedNodeMap attrs = e.getAttributes();
1023	if (attrs != null)
1024	{
1025	for (int i = 0; i < attrs.getLength(); i++)
1026	{
1027	Node attr = attrs.item(i);
1028	sb.append(' ');
1029	sb.append(attr.getNodeName());
1030	sb.append("=\"");
1031	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1032	sb.append('"');
1033	}
1034	}
1035	NodeList children = e.getChildNodes();
1036
1037	boolean hasElements = false;
1038	boolean indentSwapped = false;
1039	for (int i = 0; i < children.getLength(); i++)
1040	{
1041	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1042	{
1043	hasElements = true;
1044	}
1045	if (children.item(i).getNodeType() == Node.TEXT_NODE && indent)
1046	{
1047	if (children.item(i).getNodeValue().trim().length() > 0)
1048	{
1049	indentSwapped = true;
1050	indent = false;
1051	}
1052	}
1053	}
1054
1055	if (children == null \|\| children.getLength() == 0)
1056	{
1057	sb.append("/>");
1058
1059	if (indent)
1060	{
1061	sb.append("\n");
1062	}
1063	}
1064	else
1065	{
1066	sb.append(">");
1067	if (hasElements && indent)
1068	{
1069	sb.append("\n");
1070	}
1071
1072	int len = children.getLength();
1073	for (int i = 0; i < len; i++)
1074	{
1075	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1076	}
1077
1078	if (indent)
1079	{
1080	for (int i = 0; i < depth; i++)
1081	{
1082	sb.append(indentString);
1083	}
1084	}
1085
1086	sb.append("</" + e.getNodeName() + ">");
1087
1088	if ((hasElements && indent) \|\| indentSwapped)
1089	{
1090	sb.append("\n");
1091	}
1092	}
1093	}
1094
1095	public static String xmlNodeToStringWithoutIndenting(Node e)
1096	{
1097	StringBuffer sb = new StringBuffer("");
1098	xmlNodeToStringWithoutNewline(sb, e, -1);
1099	return sb.toString();
1100	}
1101
1102	public static String xmlNodeToStringWithoutNewline(Node e)
1103	{
1104	StringBuffer sb = new StringBuffer("");
1105	xmlNodeToStringWithoutNewline(sb, e, 0);
1106	return sb.toString();
1107	}
1108
1109	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1110	{
1111
1112	for (int i = 0; i < depth; i++)
1113	{
1114	sb.append(' ');
1115	}
1116
1117	if (e.getNodeType() == Node.TEXT_NODE)
1118	{
1119	if (e.getNodeValue() != "")
1120	{
1121	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1122	}
1123	return;
1124	}
1125
1126	if (e.getNodeType() == Node.COMMENT_NODE)
1127	{
1128	if (e.getNodeValue() != "")
1129	{
1130	sb.append("<!--" + e.getNodeValue() + "-->");
1131	}
1132	return;
1133	}
1134
1135	sb.append('<');
1136	sb.append(e.getNodeName());
1137	NamedNodeMap attrs = e.getAttributes();
1138	if (attrs != null)
1139	{
1140	for (int i = 0; i < attrs.getLength(); i++)
1141	{
1142	Node attr = attrs.item(i);
1143	sb.append(' ');
1144	sb.append(attr.getNodeName());
1145	sb.append("=\"");
1146	sb.append(attr.getNodeValue());
1147	sb.append('"');
1148	}
1149	}
1150	NodeList children = e.getChildNodes();
1151
1152	if (children == null \|\| children.getLength() == 0)
1153	sb.append("/>");
1154	else
1155	{
1156
1157	sb.append(">");
1158
1159	int len = children.getLength();
1160	for (int i = 0; i < len; i++)
1161	{
1162	if (depth >= 0)
1163	{
1164	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1165	}
1166	else
1167	{
1168	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1169	}
1170	}
1171
1172	for (int i = 0; i < depth; i++)
1173	sb.append(' ');
1174
1175	sb.append("</" + e.getNodeName() + ">");
1176	}
1177	}
1178	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: